diff --git "a/evals/core_9mcqa/task-010-openbookqa:mc-predictions.jsonl" "b/evals/core_9mcqa/task-010-openbookqa:mc-predictions.jsonl" new file mode 100644--- /dev/null +++ "b/evals/core_9mcqa/task-010-openbookqa:mc-predictions.jsonl" @@ -0,0 +1,500 @@ +{"doc_id": 0, "native_id": "8-343", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.369396448135376, "incorrect_loss_raw": 1.4047423601150513, "correct_loss_per_char": 0.684698224067688, "incorrect_loss_per_char": 0.7023711800575256, "correct_loss_per_token": 1.369396448135376, "incorrect_loss_per_token": 1.4047423601150513, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3016093969345093, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": true, "logits_per_token": -1.3016093969345093, "logits_per_char": -0.6508046984672546, "num_chars": 2}, {"sum_logits": -1.369396448135376, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.369396448135376, "logits_per_char": -0.684698224067688, "num_chars": 2}, {"sum_logits": -1.5556128025054932, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.5556128025054932, "logits_per_char": -0.7778064012527466, "num_chars": 2}, {"sum_logits": -1.3570048809051514, "num_tokens": 1, "num_tokens_all": 293, "is_greedy": false, "logits_per_token": -1.3570048809051514, "logits_per_char": -0.6785024404525757, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1, "native_id": "1129", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1169270277023315, "incorrect_loss_raw": 1.5095955928166707, "correct_loss_per_char": 0.5584635138511658, "incorrect_loss_per_char": 0.7547977964083353, "correct_loss_per_token": 1.1169270277023315, "incorrect_loss_per_token": 1.5095955928166707, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1169270277023315, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.1169270277023315, "logits_per_char": -0.5584635138511658, "num_chars": 2}, {"sum_logits": -1.546855092048645, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.546855092048645, "logits_per_char": -0.7734275460243225, "num_chars": 2}, {"sum_logits": -1.392316222190857, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.392316222190857, "logits_per_char": -0.6961581110954285, "num_chars": 2}, {"sum_logits": -1.5896154642105103, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.5896154642105103, "logits_per_char": -0.7948077321052551, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 2, "native_id": "880", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4375560283660889, "incorrect_loss_raw": 1.410355567932129, "correct_loss_per_char": 0.7187780141830444, "incorrect_loss_per_char": 0.7051777839660645, "correct_loss_per_token": 1.4375560283660889, "incorrect_loss_per_token": 1.410355567932129, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1264058351516724, "num_tokens": 1, "num_tokens_all": 245, "is_greedy": true, "logits_per_token": -1.1264058351516724, "logits_per_char": -0.5632029175758362, "num_chars": 2}, {"sum_logits": -1.3599711656570435, "num_tokens": 1, "num_tokens_all": 245, "is_greedy": false, "logits_per_token": -1.3599711656570435, "logits_per_char": -0.6799855828285217, "num_chars": 2}, {"sum_logits": -1.4375560283660889, "num_tokens": 1, "num_tokens_all": 245, "is_greedy": false, "logits_per_token": -1.4375560283660889, "logits_per_char": -0.7187780141830444, "num_chars": 2}, {"sum_logits": -1.744689702987671, "num_tokens": 1, "num_tokens_all": 245, "is_greedy": false, "logits_per_token": -1.744689702987671, "logits_per_char": -0.8723448514938354, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 3, "native_id": "7-999", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.459143877029419, "incorrect_loss_raw": 1.3711395263671875, "correct_loss_per_char": 0.7295719385147095, "incorrect_loss_per_char": 0.6855697631835938, "correct_loss_per_token": 1.459143877029419, "incorrect_loss_per_token": 1.3711395263671875, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2909541130065918, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.2909541130065918, "logits_per_char": -0.6454770565032959, "num_chars": 2}, {"sum_logits": -1.39195716381073, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.39195716381073, "logits_per_char": -0.695978581905365, "num_chars": 2}, {"sum_logits": -1.459143877029419, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.459143877029419, "logits_per_char": -0.7295719385147095, "num_chars": 2}, {"sum_logits": -1.4305073022842407, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4305073022842407, "logits_per_char": -0.7152536511421204, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 4, "native_id": "8-464", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3564645051956177, "incorrect_loss_raw": 1.419977863629659, "correct_loss_per_char": 0.6782322525978088, "incorrect_loss_per_char": 0.7099889318148295, "correct_loss_per_token": 1.3564645051956177, "incorrect_loss_per_token": 1.419977863629659, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1969047784805298, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": true, "logits_per_token": -1.1969047784805298, "logits_per_char": -0.5984523892402649, "num_chars": 2}, {"sum_logits": -1.458781361579895, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.458781361579895, "logits_per_char": -0.7293906807899475, "num_chars": 2}, {"sum_logits": -1.3564645051956177, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.3564645051956177, "logits_per_char": -0.6782322525978088, "num_chars": 2}, {"sum_logits": -1.6042474508285522, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.6042474508285522, "logits_per_char": -0.8021237254142761, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 5, "native_id": "9-794", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3621066808700562, "incorrect_loss_raw": 1.4027103980382283, "correct_loss_per_char": 0.6810533404350281, "incorrect_loss_per_char": 0.7013551990191141, "correct_loss_per_token": 1.3621066808700562, "incorrect_loss_per_token": 1.4027103980382283, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3634936809539795, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.3634936809539795, "logits_per_char": -0.6817468404769897, "num_chars": 2}, {"sum_logits": -1.3912254571914673, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.3912254571914673, "logits_per_char": -0.6956127285957336, "num_chars": 2}, {"sum_logits": -1.3621066808700562, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": true, "logits_per_token": -1.3621066808700562, "logits_per_char": -0.6810533404350281, "num_chars": 2}, {"sum_logits": -1.4534120559692383, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.4534120559692383, "logits_per_char": -0.7267060279846191, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 6, "native_id": "9-1163", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3630999326705933, "incorrect_loss_raw": 1.4090337753295898, "correct_loss_per_char": 0.6815499663352966, "incorrect_loss_per_char": 0.7045168876647949, "correct_loss_per_token": 1.3630999326705933, "incorrect_loss_per_token": 1.4090337753295898, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.312984585762024, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": true, "logits_per_token": -1.312984585762024, "logits_per_char": -0.656492292881012, "num_chars": 2}, {"sum_logits": -1.4260488748550415, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4260488748550415, "logits_per_char": -0.7130244374275208, "num_chars": 2}, {"sum_logits": -1.3630999326705933, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.3630999326705933, "logits_per_char": -0.6815499663352966, "num_chars": 2}, {"sum_logits": -1.488067865371704, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.488067865371704, "logits_per_char": -0.744033932685852, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 7, "native_id": "9-322", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5087645053863525, "incorrect_loss_raw": 1.3606393734614055, "correct_loss_per_char": 0.7543822526931763, "incorrect_loss_per_char": 0.6803196867307028, "correct_loss_per_token": 1.5087645053863525, "incorrect_loss_per_token": 1.3606393734614055, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2683360576629639, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.2683360576629639, "logits_per_char": -0.6341680288314819, "num_chars": 2}, {"sum_logits": -1.5087645053863525, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.5087645053863525, "logits_per_char": -0.7543822526931763, "num_chars": 2}, {"sum_logits": -1.35897958278656, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.35897958278656, "logits_per_char": -0.67948979139328, "num_chars": 2}, {"sum_logits": -1.4546024799346924, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4546024799346924, "logits_per_char": -0.7273012399673462, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 8, "native_id": "7-1140", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5398869514465332, "incorrect_loss_raw": 1.3543836275736492, "correct_loss_per_char": 0.7699434757232666, "incorrect_loss_per_char": 0.6771918137868246, "correct_loss_per_token": 1.5398869514465332, "incorrect_loss_per_token": 1.3543836275736492, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2607237100601196, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": true, "logits_per_token": -1.2607237100601196, "logits_per_char": -0.6303618550300598, "num_chars": 2}, {"sum_logits": -1.404016137123108, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.404016137123108, "logits_per_char": -0.702008068561554, "num_chars": 2}, {"sum_logits": -1.3984110355377197, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.3984110355377197, "logits_per_char": -0.6992055177688599, "num_chars": 2}, {"sum_logits": -1.5398869514465332, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.5398869514465332, "logits_per_char": -0.7699434757232666, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 9, "native_id": "7-903", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3102985620498657, "incorrect_loss_raw": 1.4322414795557659, "correct_loss_per_char": 0.6551492810249329, "incorrect_loss_per_char": 0.7161207397778829, "correct_loss_per_token": 1.3102985620498657, "incorrect_loss_per_token": 1.4322414795557659, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2392423152923584, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": true, "logits_per_token": -1.2392423152923584, "logits_per_char": -0.6196211576461792, "num_chars": 2}, {"sum_logits": -1.3102985620498657, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.3102985620498657, "logits_per_char": -0.6551492810249329, "num_chars": 2}, {"sum_logits": -1.4388762712478638, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.4388762712478638, "logits_per_char": -0.7194381356239319, "num_chars": 2}, {"sum_logits": -1.6186058521270752, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.6186058521270752, "logits_per_char": -0.8093029260635376, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 10, "native_id": "7-511", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4264492988586426, "incorrect_loss_raw": 1.406666358311971, "correct_loss_per_char": 0.7132246494293213, "incorrect_loss_per_char": 0.7033331791559855, "correct_loss_per_token": 1.4264492988586426, "incorrect_loss_per_token": 1.406666358311971, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.109633445739746, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.109633445739746, "logits_per_char": -0.554816722869873, "num_chars": 2}, {"sum_logits": -1.5308561325073242, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.5308561325073242, "logits_per_char": -0.7654280662536621, "num_chars": 2}, {"sum_logits": -1.4264492988586426, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4264492988586426, "logits_per_char": -0.7132246494293213, "num_chars": 2}, {"sum_logits": -1.5795094966888428, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.5795094966888428, "logits_per_char": -0.7897547483444214, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 11, "native_id": "9-937", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3981189727783203, "incorrect_loss_raw": 1.3987945715586345, "correct_loss_per_char": 0.6990594863891602, "incorrect_loss_per_char": 0.6993972857793173, "correct_loss_per_token": 1.3981189727783203, "incorrect_loss_per_token": 1.3987945715586345, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.241837501525879, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": true, "logits_per_token": -1.241837501525879, "logits_per_char": -0.6209187507629395, "num_chars": 2}, {"sum_logits": -1.3981189727783203, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.3981189727783203, "logits_per_char": -0.6990594863891602, "num_chars": 2}, {"sum_logits": -1.4039833545684814, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.4039833545684814, "logits_per_char": -0.7019916772842407, "num_chars": 2}, {"sum_logits": -1.550562858581543, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.550562858581543, "logits_per_char": -0.7752814292907715, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 12, "native_id": "8-201", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4575952291488647, "incorrect_loss_raw": 1.3749999205271404, "correct_loss_per_char": 0.7287976145744324, "incorrect_loss_per_char": 0.6874999602635702, "correct_loss_per_token": 1.4575952291488647, "incorrect_loss_per_token": 1.3749999205271404, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3097851276397705, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": true, "logits_per_token": -1.3097851276397705, "logits_per_char": -0.6548925638198853, "num_chars": 2}, {"sum_logits": -1.3242698907852173, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.3242698907852173, "logits_per_char": -0.6621349453926086, "num_chars": 2}, {"sum_logits": -1.4575952291488647, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4575952291488647, "logits_per_char": -0.7287976145744324, "num_chars": 2}, {"sum_logits": -1.490944743156433, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.490944743156433, "logits_per_char": -0.7454723715782166, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 13, "native_id": "1618", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2853113412857056, "incorrect_loss_raw": 1.4333475430806477, "correct_loss_per_char": 0.6426556706428528, "incorrect_loss_per_char": 0.7166737715403239, "correct_loss_per_token": 1.2853113412857056, "incorrect_loss_per_token": 1.4333475430806477, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2853113412857056, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.2853113412857056, "logits_per_char": -0.6426556706428528, "num_chars": 2}, {"sum_logits": -1.4585598707199097, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4585598707199097, "logits_per_char": -0.7292799353599548, "num_chars": 2}, {"sum_logits": -1.323984980583191, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.323984980583191, "logits_per_char": -0.6619924902915955, "num_chars": 2}, {"sum_logits": -1.5174977779388428, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.5174977779388428, "logits_per_char": -0.7587488889694214, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 14, "native_id": "758", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4466629028320312, "incorrect_loss_raw": 1.3761239051818848, "correct_loss_per_char": 0.7233314514160156, "incorrect_loss_per_char": 0.6880619525909424, "correct_loss_per_token": 1.4466629028320312, "incorrect_loss_per_token": 1.3761239051818848, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3498059511184692, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.3498059511184692, "logits_per_char": -0.6749029755592346, "num_chars": 2}, {"sum_logits": -1.4611272811889648, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.4611272811889648, "logits_per_char": -0.7305636405944824, "num_chars": 2}, {"sum_logits": -1.4466629028320312, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.4466629028320312, "logits_per_char": -0.7233314514160156, "num_chars": 2}, {"sum_logits": -1.3174384832382202, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": true, "logits_per_token": -1.3174384832382202, "logits_per_char": -0.6587192416191101, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 15, "native_id": "7-414", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5255708694458008, "incorrect_loss_raw": 1.3596142133076985, "correct_loss_per_char": 0.7627854347229004, "incorrect_loss_per_char": 0.6798071066538492, "correct_loss_per_token": 1.5255708694458008, "incorrect_loss_per_token": 1.3596142133076985, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.195932149887085, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": true, "logits_per_token": -1.195932149887085, "logits_per_char": -0.5979660749435425, "num_chars": 2}, {"sum_logits": -1.3820768594741821, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.3820768594741821, "logits_per_char": -0.6910384297370911, "num_chars": 2}, {"sum_logits": -1.5008336305618286, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.5008336305618286, "logits_per_char": -0.7504168152809143, "num_chars": 2}, {"sum_logits": -1.5255708694458008, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.5255708694458008, "logits_per_char": -0.7627854347229004, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 16, "native_id": "9-675", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3599705696105957, "incorrect_loss_raw": 1.4151846965154011, "correct_loss_per_char": 0.6799852848052979, "incorrect_loss_per_char": 0.7075923482577006, "correct_loss_per_token": 1.3599705696105957, "incorrect_loss_per_token": 1.4151846965154011, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4993418455123901, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.4993418455123901, "logits_per_char": -0.7496709227561951, "num_chars": 2}, {"sum_logits": -1.524011492729187, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.524011492729187, "logits_per_char": -0.7620057463645935, "num_chars": 2}, {"sum_logits": -1.3599705696105957, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.3599705696105957, "logits_per_char": -0.6799852848052979, "num_chars": 2}, {"sum_logits": -1.2222007513046265, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.2222007513046265, "logits_per_char": -0.6111003756523132, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 17, "native_id": "9-163", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4534704685211182, "incorrect_loss_raw": 1.3854471842447917, "correct_loss_per_char": 0.7267352342605591, "incorrect_loss_per_char": 0.6927235921223959, "correct_loss_per_token": 1.4534704685211182, "incorrect_loss_per_token": 1.3854471842447917, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.213841438293457, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.213841438293457, "logits_per_char": -0.6069207191467285, "num_chars": 2}, {"sum_logits": -1.4902491569519043, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4902491569519043, "logits_per_char": -0.7451245784759521, "num_chars": 2}, {"sum_logits": -1.4534704685211182, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4534704685211182, "logits_per_char": -0.7267352342605591, "num_chars": 2}, {"sum_logits": -1.4522509574890137, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4522509574890137, "logits_per_char": -0.7261254787445068, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 18, "native_id": "1032", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.255384922027588, "incorrect_loss_raw": 1.4447789986928303, "correct_loss_per_char": 0.627692461013794, "incorrect_loss_per_char": 0.7223894993464152, "correct_loss_per_token": 1.255384922027588, "incorrect_loss_per_token": 1.4447789986928303, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.255384922027588, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.255384922027588, "logits_per_char": -0.627692461013794, "num_chars": 2}, {"sum_logits": -1.4477012157440186, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4477012157440186, "logits_per_char": -0.7238506078720093, "num_chars": 2}, {"sum_logits": -1.373610019683838, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.373610019683838, "logits_per_char": -0.686805009841919, "num_chars": 2}, {"sum_logits": -1.5130257606506348, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.5130257606506348, "logits_per_char": -0.7565128803253174, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 19, "native_id": "889", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.449794888496399, "incorrect_loss_raw": 1.415250023206075, "correct_loss_per_char": 0.7248974442481995, "incorrect_loss_per_char": 0.7076250116030375, "correct_loss_per_token": 1.449794888496399, "incorrect_loss_per_token": 1.415250023206075, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0268014669418335, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.0268014669418335, "logits_per_char": -0.5134007334709167, "num_chars": 2}, {"sum_logits": -1.449794888496399, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.449794888496399, "logits_per_char": -0.7248974442481995, "num_chars": 2}, {"sum_logits": -1.5181057453155518, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.5181057453155518, "logits_per_char": -0.7590528726577759, "num_chars": 2}, {"sum_logits": -1.7008428573608398, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.7008428573608398, "logits_per_char": -0.8504214286804199, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 20, "native_id": "1160", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2481825351715088, "incorrect_loss_raw": 1.4517722924550374, "correct_loss_per_char": 0.6240912675857544, "incorrect_loss_per_char": 0.7258861462275187, "correct_loss_per_token": 1.2481825351715088, "incorrect_loss_per_token": 1.4517722924550374, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2481825351715088, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": true, "logits_per_token": -1.2481825351715088, "logits_per_char": -0.6240912675857544, "num_chars": 2}, {"sum_logits": -1.3949307203292847, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.3949307203292847, "logits_per_char": -0.6974653601646423, "num_chars": 2}, {"sum_logits": -1.4412974119186401, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.4412974119186401, "logits_per_char": -0.7206487059593201, "num_chars": 2}, {"sum_logits": -1.5190887451171875, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.5190887451171875, "logits_per_char": -0.7595443725585938, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 21, "native_id": "9-298", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2506552934646606, "incorrect_loss_raw": 1.4450491269429524, "correct_loss_per_char": 0.6253276467323303, "incorrect_loss_per_char": 0.7225245634714762, "correct_loss_per_token": 1.2506552934646606, "incorrect_loss_per_token": 1.4450491269429524, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2506552934646606, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": true, "logits_per_token": -1.2506552934646606, "logits_per_char": -0.6253276467323303, "num_chars": 2}, {"sum_logits": -1.4218469858169556, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.4218469858169556, "logits_per_char": -0.7109234929084778, "num_chars": 2}, {"sum_logits": -1.425185203552246, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.425185203552246, "logits_per_char": -0.712592601776123, "num_chars": 2}, {"sum_logits": -1.4881151914596558, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.4881151914596558, "logits_per_char": -0.7440575957298279, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 22, "native_id": "1189", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1572825908660889, "incorrect_loss_raw": 1.4871177275975545, "correct_loss_per_char": 0.5786412954330444, "incorrect_loss_per_char": 0.7435588637987772, "correct_loss_per_token": 1.1572825908660889, "incorrect_loss_per_token": 1.4871177275975545, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1572825908660889, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": true, "logits_per_token": -1.1572825908660889, "logits_per_char": -0.5786412954330444, "num_chars": 2}, {"sum_logits": -1.5008536577224731, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.5008536577224731, "logits_per_char": -0.7504268288612366, "num_chars": 2}, {"sum_logits": -1.40999174118042, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.40999174118042, "logits_per_char": -0.70499587059021, "num_chars": 2}, {"sum_logits": -1.5505077838897705, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.5505077838897705, "logits_per_char": -0.7752538919448853, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 23, "native_id": "8-395", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4351016283035278, "incorrect_loss_raw": 1.4010519981384277, "correct_loss_per_char": 0.7175508141517639, "incorrect_loss_per_char": 0.7005259990692139, "correct_loss_per_token": 1.4351016283035278, "incorrect_loss_per_token": 1.4010519981384277, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.121633529663086, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": true, "logits_per_token": -1.121633529663086, "logits_per_char": -0.560816764831543, "num_chars": 2}, {"sum_logits": -1.4351016283035278, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.4351016283035278, "logits_per_char": -0.7175508141517639, "num_chars": 2}, {"sum_logits": -1.5526951551437378, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.5526951551437378, "logits_per_char": -0.7763475775718689, "num_chars": 2}, {"sum_logits": -1.5288273096084595, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.5288273096084595, "logits_per_char": -0.7644136548042297, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 24, "native_id": "7-238", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4890365600585938, "incorrect_loss_raw": 1.3652312755584717, "correct_loss_per_char": 0.7445182800292969, "incorrect_loss_per_char": 0.6826156377792358, "correct_loss_per_token": 1.4890365600585938, "incorrect_loss_per_token": 1.3652312755584717, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2344447374343872, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.2344447374343872, "logits_per_char": -0.6172223687171936, "num_chars": 2}, {"sum_logits": -1.4890365600585938, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4890365600585938, "logits_per_char": -0.7445182800292969, "num_chars": 2}, {"sum_logits": -1.4098970890045166, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4098970890045166, "logits_per_char": -0.7049485445022583, "num_chars": 2}, {"sum_logits": -1.4513520002365112, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4513520002365112, "logits_per_char": -0.7256760001182556, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 25, "native_id": "7-372", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3231053352355957, "incorrect_loss_raw": 1.4199784596761067, "correct_loss_per_char": 0.6615526676177979, "incorrect_loss_per_char": 0.7099892298380533, "correct_loss_per_token": 1.3231053352355957, "incorrect_loss_per_token": 1.4199784596761067, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3048381805419922, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": true, "logits_per_token": -1.3048381805419922, "logits_per_char": -0.6524190902709961, "num_chars": 2}, {"sum_logits": -1.3231053352355957, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.3231053352355957, "logits_per_char": -0.6615526676177979, "num_chars": 2}, {"sum_logits": -1.4640692472457886, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.4640692472457886, "logits_per_char": -0.7320346236228943, "num_chars": 2}, {"sum_logits": -1.4910279512405396, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.4910279512405396, "logits_per_char": -0.7455139756202698, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 26, "native_id": "8-35", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4324148893356323, "incorrect_loss_raw": 1.3815471331278484, "correct_loss_per_char": 0.7162074446678162, "incorrect_loss_per_char": 0.6907735665639242, "correct_loss_per_token": 1.4324148893356323, "incorrect_loss_per_token": 1.3815471331278484, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3689912557601929, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.3689912557601929, "logits_per_char": -0.6844956278800964, "num_chars": 2}, {"sum_logits": -1.4357242584228516, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.4357242584228516, "logits_per_char": -0.7178621292114258, "num_chars": 2}, {"sum_logits": -1.3399258852005005, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": true, "logits_per_token": -1.3399258852005005, "logits_per_char": -0.6699629426002502, "num_chars": 2}, {"sum_logits": -1.4324148893356323, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.4324148893356323, "logits_per_char": -0.7162074446678162, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 27, "native_id": "9-271", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2971017360687256, "incorrect_loss_raw": 1.4320640563964844, "correct_loss_per_char": 0.6485508680343628, "incorrect_loss_per_char": 0.7160320281982422, "correct_loss_per_token": 1.2971017360687256, "incorrect_loss_per_token": 1.4320640563964844, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2971017360687256, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": true, "logits_per_token": -1.2971017360687256, "logits_per_char": -0.6485508680343628, "num_chars": 2}, {"sum_logits": -1.457476258277893, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.457476258277893, "logits_per_char": -0.7287381291389465, "num_chars": 2}, {"sum_logits": -1.4532819986343384, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.4532819986343384, "logits_per_char": -0.7266409993171692, "num_chars": 2}, {"sum_logits": -1.3854339122772217, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.3854339122772217, "logits_per_char": -0.6927169561386108, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 28, "native_id": "9-409", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5129870176315308, "incorrect_loss_raw": 1.3620661894480388, "correct_loss_per_char": 0.7564935088157654, "incorrect_loss_per_char": 0.6810330947240194, "correct_loss_per_token": 1.5129870176315308, "incorrect_loss_per_token": 1.3620661894480388, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2263190746307373, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": true, "logits_per_token": -1.2263190746307373, "logits_per_char": -0.6131595373153687, "num_chars": 2}, {"sum_logits": -1.5000660419464111, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.5000660419464111, "logits_per_char": -0.7500330209732056, "num_chars": 2}, {"sum_logits": -1.3598134517669678, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.3598134517669678, "logits_per_char": -0.6799067258834839, "num_chars": 2}, {"sum_logits": -1.5129870176315308, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.5129870176315308, "logits_per_char": -0.7564935088157654, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 29, "native_id": "530", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4259462356567383, "incorrect_loss_raw": 1.3936411142349243, "correct_loss_per_char": 0.7129731178283691, "incorrect_loss_per_char": 0.6968205571174622, "correct_loss_per_token": 1.4259462356567383, "incorrect_loss_per_token": 1.3936411142349243, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2244104146957397, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.2244104146957397, "logits_per_char": -0.6122052073478699, "num_chars": 2}, {"sum_logits": -1.4259462356567383, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4259462356567383, "logits_per_char": -0.7129731178283691, "num_chars": 2}, {"sum_logits": -1.3495573997497559, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.3495573997497559, "logits_per_char": -0.6747786998748779, "num_chars": 2}, {"sum_logits": -1.6069555282592773, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.6069555282592773, "logits_per_char": -0.8034777641296387, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 30, "native_id": "1426", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4597913026809692, "incorrect_loss_raw": 1.377093990643819, "correct_loss_per_char": 0.7298956513404846, "incorrect_loss_per_char": 0.6885469953219095, "correct_loss_per_token": 1.4597913026809692, "incorrect_loss_per_token": 1.377093990643819, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2303520441055298, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": true, "logits_per_token": -1.2303520441055298, "logits_per_char": -0.6151760220527649, "num_chars": 2}, {"sum_logits": -1.3991377353668213, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.3991377353668213, "logits_per_char": -0.6995688676834106, "num_chars": 2}, {"sum_logits": -1.4597913026809692, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4597913026809692, "logits_per_char": -0.7298956513404846, "num_chars": 2}, {"sum_logits": -1.5017921924591064, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.5017921924591064, "logits_per_char": -0.7508960962295532, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 31, "native_id": "8-466", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3122706413269043, "incorrect_loss_raw": 1.4263185262680054, "correct_loss_per_char": 0.6561353206634521, "incorrect_loss_per_char": 0.7131592631340027, "correct_loss_per_token": 1.3122706413269043, "incorrect_loss_per_token": 1.4263185262680054, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.353280782699585, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.353280782699585, "logits_per_char": -0.6766403913497925, "num_chars": 2}, {"sum_logits": -1.52373206615448, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.52373206615448, "logits_per_char": -0.76186603307724, "num_chars": 2}, {"sum_logits": -1.4019427299499512, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.4019427299499512, "logits_per_char": -0.7009713649749756, "num_chars": 2}, {"sum_logits": -1.3122706413269043, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": true, "logits_per_token": -1.3122706413269043, "logits_per_char": -0.6561353206634521, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 32, "native_id": "1577", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4069792032241821, "incorrect_loss_raw": 1.4065251747767131, "correct_loss_per_char": 0.7034896016120911, "incorrect_loss_per_char": 0.7032625873883566, "correct_loss_per_token": 1.4069792032241821, "incorrect_loss_per_token": 1.4065251747767131, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1547486782073975, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": true, "logits_per_token": -1.1547486782073975, "logits_per_char": -0.5773743391036987, "num_chars": 2}, {"sum_logits": -1.4069792032241821, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4069792032241821, "logits_per_char": -0.7034896016120911, "num_chars": 2}, {"sum_logits": -1.526829719543457, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.526829719543457, "logits_per_char": -0.7634148597717285, "num_chars": 2}, {"sum_logits": -1.5379971265792847, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.5379971265792847, "logits_per_char": -0.7689985632896423, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 33, "native_id": "8-257", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2290019989013672, "incorrect_loss_raw": 1.4548142751057942, "correct_loss_per_char": 0.6145009994506836, "incorrect_loss_per_char": 0.7274071375528971, "correct_loss_per_token": 1.2290019989013672, "incorrect_loss_per_token": 1.4548142751057942, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2290019989013672, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": true, "logits_per_token": -1.2290019989013672, "logits_per_char": -0.6145009994506836, "num_chars": 2}, {"sum_logits": -1.448032021522522, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.448032021522522, "logits_per_char": -0.724016010761261, "num_chars": 2}, {"sum_logits": -1.4830456972122192, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.4830456972122192, "logits_per_char": -0.7415228486061096, "num_chars": 2}, {"sum_logits": -1.4333651065826416, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.4333651065826416, "logits_per_char": -0.7166825532913208, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 34, "native_id": "378", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6104027032852173, "incorrect_loss_raw": 1.3402112325032551, "correct_loss_per_char": 0.8052013516426086, "incorrect_loss_per_char": 0.6701056162516276, "correct_loss_per_token": 1.6104027032852173, "incorrect_loss_per_token": 1.3402112325032551, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4238379001617432, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4238379001617432, "logits_per_char": -0.7119189500808716, "num_chars": 2}, {"sum_logits": -1.6104027032852173, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.6104027032852173, "logits_per_char": -0.8052013516426086, "num_chars": 2}, {"sum_logits": -1.462106466293335, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.462106466293335, "logits_per_char": -0.7310532331466675, "num_chars": 2}, {"sum_logits": -1.1346893310546875, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": true, "logits_per_token": -1.1346893310546875, "logits_per_char": -0.5673446655273438, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 35, "native_id": "8-41", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1744297742843628, "incorrect_loss_raw": 1.4866269826889038, "correct_loss_per_char": 0.5872148871421814, "incorrect_loss_per_char": 0.7433134913444519, "correct_loss_per_token": 1.1744297742843628, "incorrect_loss_per_token": 1.4866269826889038, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1744297742843628, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.1744297742843628, "logits_per_char": -0.5872148871421814, "num_chars": 2}, {"sum_logits": -1.4151463508605957, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4151463508605957, "logits_per_char": -0.7075731754302979, "num_chars": 2}, {"sum_logits": -1.5298676490783691, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.5298676490783691, "logits_per_char": -0.7649338245391846, "num_chars": 2}, {"sum_logits": -1.5148669481277466, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.5148669481277466, "logits_per_char": -0.7574334740638733, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 36, "native_id": "9-540", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2530856132507324, "incorrect_loss_raw": 1.4424906571706135, "correct_loss_per_char": 0.6265428066253662, "incorrect_loss_per_char": 0.7212453285853068, "correct_loss_per_token": 1.2530856132507324, "incorrect_loss_per_token": 1.4424906571706135, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2530856132507324, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": true, "logits_per_token": -1.2530856132507324, "logits_per_char": -0.6265428066253662, "num_chars": 2}, {"sum_logits": -1.4146140813827515, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4146140813827515, "logits_per_char": -0.7073070406913757, "num_chars": 2}, {"sum_logits": -1.4845468997955322, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4845468997955322, "logits_per_char": -0.7422734498977661, "num_chars": 2}, {"sum_logits": -1.4283109903335571, "num_tokens": 1, "num_tokens_all": 288, "is_greedy": false, "logits_per_token": -1.4283109903335571, "logits_per_char": -0.7141554951667786, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 37, "native_id": "266", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4561132192611694, "incorrect_loss_raw": 1.384933869043986, "correct_loss_per_char": 0.7280566096305847, "incorrect_loss_per_char": 0.692466934521993, "correct_loss_per_token": 1.4561132192611694, "incorrect_loss_per_token": 1.384933869043986, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1726335287094116, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.1726335287094116, "logits_per_char": -0.5863167643547058, "num_chars": 2}, {"sum_logits": -1.4682351350784302, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4682351350784302, "logits_per_char": -0.7341175675392151, "num_chars": 2}, {"sum_logits": -1.5139329433441162, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.5139329433441162, "logits_per_char": -0.7569664716720581, "num_chars": 2}, {"sum_logits": -1.4561132192611694, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4561132192611694, "logits_per_char": -0.7280566096305847, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 38, "native_id": "1309", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8023775815963745, "incorrect_loss_raw": 1.3003702561060588, "correct_loss_per_char": 0.9011887907981873, "incorrect_loss_per_char": 0.6501851280530294, "correct_loss_per_token": 1.8023775815963745, "incorrect_loss_per_token": 1.3003702561060588, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.087194561958313, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": true, "logits_per_token": -1.087194561958313, "logits_per_char": -0.5435972809791565, "num_chars": 2}, {"sum_logits": -1.3202546834945679, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.3202546834945679, "logits_per_char": -0.6601273417472839, "num_chars": 2}, {"sum_logits": -1.4936615228652954, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.4936615228652954, "logits_per_char": -0.7468307614326477, "num_chars": 2}, {"sum_logits": -1.8023775815963745, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.8023775815963745, "logits_per_char": -0.9011887907981873, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 39, "native_id": "7-1197", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1619702577590942, "incorrect_loss_raw": 1.4841669797897339, "correct_loss_per_char": 0.5809851288795471, "incorrect_loss_per_char": 0.7420834898948669, "correct_loss_per_token": 1.1619702577590942, "incorrect_loss_per_token": 1.4841669797897339, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1619702577590942, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.1619702577590942, "logits_per_char": -0.5809851288795471, "num_chars": 2}, {"sum_logits": -1.4744222164154053, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4744222164154053, "logits_per_char": -0.7372111082077026, "num_chars": 2}, {"sum_logits": -1.4267369508743286, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4267369508743286, "logits_per_char": -0.7133684754371643, "num_chars": 2}, {"sum_logits": -1.5513417720794678, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.5513417720794678, "logits_per_char": -0.7756708860397339, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 40, "native_id": "7-891", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5634214878082275, "incorrect_loss_raw": 1.3650763829549153, "correct_loss_per_char": 0.7817107439041138, "incorrect_loss_per_char": 0.6825381914774576, "correct_loss_per_token": 1.5634214878082275, "incorrect_loss_per_token": 1.3650763829549153, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0692331790924072, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": true, "logits_per_token": -1.0692331790924072, "logits_per_char": -0.5346165895462036, "num_chars": 2}, {"sum_logits": -1.5757358074188232, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.5757358074188232, "logits_per_char": -0.7878679037094116, "num_chars": 2}, {"sum_logits": -1.4502601623535156, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.4502601623535156, "logits_per_char": -0.7251300811767578, "num_chars": 2}, {"sum_logits": -1.5634214878082275, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.5634214878082275, "logits_per_char": -0.7817107439041138, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 41, "native_id": "1180", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.590935230255127, "incorrect_loss_raw": 1.3632731040318806, "correct_loss_per_char": 0.7954676151275635, "incorrect_loss_per_char": 0.6816365520159403, "correct_loss_per_token": 1.590935230255127, "incorrect_loss_per_token": 1.3632731040318806, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1002674102783203, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.1002674102783203, "logits_per_char": -0.5501337051391602, "num_chars": 2}, {"sum_logits": -1.6168570518493652, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.6168570518493652, "logits_per_char": -0.8084285259246826, "num_chars": 2}, {"sum_logits": -1.3726948499679565, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.3726948499679565, "logits_per_char": -0.6863474249839783, "num_chars": 2}, {"sum_logits": -1.590935230255127, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.590935230255127, "logits_per_char": -0.7954676151275635, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 42, "native_id": "1204", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2911944389343262, "incorrect_loss_raw": 1.4328492085138957, "correct_loss_per_char": 0.6455972194671631, "incorrect_loss_per_char": 0.7164246042569479, "correct_loss_per_token": 1.2911944389343262, "incorrect_loss_per_token": 1.4328492085138957, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2911944389343262, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": true, "logits_per_token": -1.2911944389343262, "logits_per_char": -0.6455972194671631, "num_chars": 2}, {"sum_logits": -1.5701026916503906, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.5701026916503906, "logits_per_char": -0.7850513458251953, "num_chars": 2}, {"sum_logits": -1.3808839321136475, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.3808839321136475, "logits_per_char": -0.6904419660568237, "num_chars": 2}, {"sum_logits": -1.347561001777649, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.347561001777649, "logits_per_char": -0.6737805008888245, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 43, "native_id": "7-52", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6365020275115967, "incorrect_loss_raw": 1.3268283208211262, "correct_loss_per_char": 0.8182510137557983, "incorrect_loss_per_char": 0.6634141604105631, "correct_loss_per_token": 1.6365020275115967, "incorrect_loss_per_token": 1.3268283208211262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2155433893203735, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": true, "logits_per_token": -1.2155433893203735, "logits_per_char": -0.6077716946601868, "num_chars": 2}, {"sum_logits": -1.4296197891235352, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.4296197891235352, "logits_per_char": -0.7148098945617676, "num_chars": 2}, {"sum_logits": -1.3353217840194702, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.3353217840194702, "logits_per_char": -0.6676608920097351, "num_chars": 2}, {"sum_logits": -1.6365020275115967, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.6365020275115967, "logits_per_char": -0.8182510137557983, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 44, "native_id": "1759", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.246341586112976, "incorrect_loss_raw": 1.448771317799886, "correct_loss_per_char": 0.623170793056488, "incorrect_loss_per_char": 0.724385658899943, "correct_loss_per_token": 1.246341586112976, "incorrect_loss_per_token": 1.448771317799886, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.246341586112976, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": true, "logits_per_token": -1.246341586112976, "logits_per_char": -0.623170793056488, "num_chars": 2}, {"sum_logits": -1.464762568473816, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.464762568473816, "logits_per_char": -0.732381284236908, "num_chars": 2}, {"sum_logits": -1.4879103899002075, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4879103899002075, "logits_per_char": -0.7439551949501038, "num_chars": 2}, {"sum_logits": -1.3936409950256348, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.3936409950256348, "logits_per_char": -0.6968204975128174, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 45, "native_id": "9-655", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2438687086105347, "incorrect_loss_raw": 1.4480139414469402, "correct_loss_per_char": 0.6219343543052673, "incorrect_loss_per_char": 0.7240069707234701, "correct_loss_per_token": 1.2438687086105347, "incorrect_loss_per_token": 1.4480139414469402, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2438687086105347, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.2438687086105347, "logits_per_char": -0.6219343543052673, "num_chars": 2}, {"sum_logits": -1.4859389066696167, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4859389066696167, "logits_per_char": -0.7429694533348083, "num_chars": 2}, {"sum_logits": -1.4445374011993408, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4445374011993408, "logits_per_char": -0.7222687005996704, "num_chars": 2}, {"sum_logits": -1.4135655164718628, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4135655164718628, "logits_per_char": -0.7067827582359314, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 46, "native_id": "132", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1606671810150146, "incorrect_loss_raw": 1.490432580312093, "correct_loss_per_char": 0.5803335905075073, "incorrect_loss_per_char": 0.7452162901560465, "correct_loss_per_token": 1.1606671810150146, "incorrect_loss_per_token": 1.490432580312093, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1606671810150146, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.1606671810150146, "logits_per_char": -0.5803335905075073, "num_chars": 2}, {"sum_logits": -1.6594127416610718, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.6594127416610718, "logits_per_char": -0.8297063708305359, "num_chars": 2}, {"sum_logits": -1.325363039970398, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.325363039970398, "logits_per_char": -0.662681519985199, "num_chars": 2}, {"sum_logits": -1.4865219593048096, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4865219593048096, "logits_per_char": -0.7432609796524048, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 47, "native_id": "8-79", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3436715602874756, "incorrect_loss_raw": 1.4081144730250041, "correct_loss_per_char": 0.6718357801437378, "incorrect_loss_per_char": 0.7040572365125021, "correct_loss_per_token": 1.3436715602874756, "incorrect_loss_per_token": 1.4081144730250041, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3436715602874756, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": true, "logits_per_token": -1.3436715602874756, "logits_per_char": -0.6718357801437378, "num_chars": 2}, {"sum_logits": -1.3825043439865112, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.3825043439865112, "logits_per_char": -0.6912521719932556, "num_chars": 2}, {"sum_logits": -1.4452353715896606, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.4452353715896606, "logits_per_char": -0.7226176857948303, "num_chars": 2}, {"sum_logits": -1.3966037034988403, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.3966037034988403, "logits_per_char": -0.6983018517494202, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 48, "native_id": "1835", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4517364501953125, "incorrect_loss_raw": 1.3903220891952515, "correct_loss_per_char": 0.7258682250976562, "incorrect_loss_per_char": 0.6951610445976257, "correct_loss_per_token": 1.4517364501953125, "incorrect_loss_per_token": 1.3903220891952515, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1336748600006104, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.1336748600006104, "logits_per_char": -0.5668374300003052, "num_chars": 2}, {"sum_logits": -1.4517364501953125, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4517364501953125, "logits_per_char": -0.7258682250976562, "num_chars": 2}, {"sum_logits": -1.488485336303711, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.488485336303711, "logits_per_char": -0.7442426681518555, "num_chars": 2}, {"sum_logits": -1.548806071281433, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.548806071281433, "logits_per_char": -0.7744030356407166, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 49, "native_id": "9-149", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.374218463897705, "incorrect_loss_raw": 1.4208096265792847, "correct_loss_per_char": 0.6871092319488525, "incorrect_loss_per_char": 0.7104048132896423, "correct_loss_per_token": 1.374218463897705, "incorrect_loss_per_token": 1.4208096265792847, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1401327848434448, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": true, "logits_per_token": -1.1401327848434448, "logits_per_char": -0.5700663924217224, "num_chars": 2}, {"sum_logits": -1.452409029006958, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.452409029006958, "logits_per_char": -0.726204514503479, "num_chars": 2}, {"sum_logits": -1.374218463897705, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.374218463897705, "logits_per_char": -0.6871092319488525, "num_chars": 2}, {"sum_logits": -1.6698870658874512, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.6698870658874512, "logits_per_char": -0.8349435329437256, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 50, "native_id": "695", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0389294624328613, "incorrect_loss_raw": 1.5520121256510417, "correct_loss_per_char": 0.5194647312164307, "incorrect_loss_per_char": 0.7760060628255209, "correct_loss_per_token": 1.0389294624328613, "incorrect_loss_per_token": 1.5520121256510417, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0389294624328613, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": true, "logits_per_token": -1.0389294624328613, "logits_per_char": -0.5194647312164307, "num_chars": 2}, {"sum_logits": -1.5685876607894897, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.5685876607894897, "logits_per_char": -0.7842938303947449, "num_chars": 2}, {"sum_logits": -1.397796869277954, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.397796869277954, "logits_per_char": -0.698898434638977, "num_chars": 2}, {"sum_logits": -1.6896518468856812, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.6896518468856812, "logits_per_char": -0.8448259234428406, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 51, "native_id": "8-179", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4495320320129395, "incorrect_loss_raw": 1.378138820330302, "correct_loss_per_char": 0.7247660160064697, "incorrect_loss_per_char": 0.689069410165151, "correct_loss_per_token": 1.4495320320129395, "incorrect_loss_per_token": 1.378138820330302, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3124022483825684, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.3124022483825684, "logits_per_char": -0.6562011241912842, "num_chars": 2}, {"sum_logits": -1.419935703277588, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.419935703277588, "logits_per_char": -0.709967851638794, "num_chars": 2}, {"sum_logits": -1.4020785093307495, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4020785093307495, "logits_per_char": -0.7010392546653748, "num_chars": 2}, {"sum_logits": -1.4495320320129395, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4495320320129395, "logits_per_char": -0.7247660160064697, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 52, "native_id": "7-50", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2566721439361572, "incorrect_loss_raw": 1.4409899314244587, "correct_loss_per_char": 0.6283360719680786, "incorrect_loss_per_char": 0.7204949657122294, "correct_loss_per_token": 1.2566721439361572, "incorrect_loss_per_token": 1.4409899314244587, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2566721439361572, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": true, "logits_per_token": -1.2566721439361572, "logits_per_char": -0.6283360719680786, "num_chars": 2}, {"sum_logits": -1.4562766551971436, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4562766551971436, "logits_per_char": -0.7281383275985718, "num_chars": 2}, {"sum_logits": -1.4549649953842163, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4549649953842163, "logits_per_char": -0.7274824976921082, "num_chars": 2}, {"sum_logits": -1.4117281436920166, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4117281436920166, "logits_per_char": -0.7058640718460083, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 53, "native_id": "508", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4356601238250732, "incorrect_loss_raw": 1.3940261999766033, "correct_loss_per_char": 0.7178300619125366, "incorrect_loss_per_char": 0.6970130999883016, "correct_loss_per_token": 1.4356601238250732, "incorrect_loss_per_token": 1.3940261999766033, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1435725688934326, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.1435725688934326, "logits_per_char": -0.5717862844467163, "num_chars": 2}, {"sum_logits": -1.5069074630737305, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.5069074630737305, "logits_per_char": -0.7534537315368652, "num_chars": 2}, {"sum_logits": -1.5315985679626465, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.5315985679626465, "logits_per_char": -0.7657992839813232, "num_chars": 2}, {"sum_logits": -1.4356601238250732, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4356601238250732, "logits_per_char": -0.7178300619125366, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 54, "native_id": "1674", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.59505033493042, "incorrect_loss_raw": 1.3792804678281148, "correct_loss_per_char": 0.79752516746521, "incorrect_loss_per_char": 0.6896402339140574, "correct_loss_per_token": 1.59505033493042, "incorrect_loss_per_token": 1.3792804678281148, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9790613055229187, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -0.9790613055229187, "logits_per_char": -0.48953065276145935, "num_chars": 2}, {"sum_logits": -1.485982894897461, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.485982894897461, "logits_per_char": -0.7429914474487305, "num_chars": 2}, {"sum_logits": -1.59505033493042, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.59505033493042, "logits_per_char": -0.79752516746521, "num_chars": 2}, {"sum_logits": -1.6727972030639648, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.6727972030639648, "logits_per_char": -0.8363986015319824, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 55, "native_id": "163", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2154124975204468, "incorrect_loss_raw": 1.4593661626180012, "correct_loss_per_char": 0.6077062487602234, "incorrect_loss_per_char": 0.7296830813090006, "correct_loss_per_token": 1.2154124975204468, "incorrect_loss_per_token": 1.4593661626180012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2154124975204468, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.2154124975204468, "logits_per_char": -0.6077062487602234, "num_chars": 2}, {"sum_logits": -1.5059713125228882, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.5059713125228882, "logits_per_char": -0.7529856562614441, "num_chars": 2}, {"sum_logits": -1.3970491886138916, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.3970491886138916, "logits_per_char": -0.6985245943069458, "num_chars": 2}, {"sum_logits": -1.4750779867172241, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4750779867172241, "logits_per_char": -0.7375389933586121, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 56, "native_id": "7-49", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4514116048812866, "incorrect_loss_raw": 1.399190107981364, "correct_loss_per_char": 0.7257058024406433, "incorrect_loss_per_char": 0.699595053990682, "correct_loss_per_token": 1.4514116048812866, "incorrect_loss_per_token": 1.399190107981364, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1357383728027344, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": true, "logits_per_token": -1.1357383728027344, "logits_per_char": -0.5678691864013672, "num_chars": 2}, {"sum_logits": -1.3945691585540771, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.3945691585540771, "logits_per_char": -0.6972845792770386, "num_chars": 2}, {"sum_logits": -1.4514116048812866, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.4514116048812866, "logits_per_char": -0.7257058024406433, "num_chars": 2}, {"sum_logits": -1.6672627925872803, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.6672627925872803, "logits_per_char": -0.8336313962936401, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 57, "native_id": "8-393", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3099521398544312, "incorrect_loss_raw": 1.423459529876709, "correct_loss_per_char": 0.6549760699272156, "incorrect_loss_per_char": 0.7117297649383545, "correct_loss_per_token": 1.3099521398544312, "incorrect_loss_per_token": 1.423459529876709, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.370261549949646, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.370261549949646, "logits_per_char": -0.685130774974823, "num_chars": 2}, {"sum_logits": -1.342061161994934, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.342061161994934, "logits_per_char": -0.671030580997467, "num_chars": 2}, {"sum_logits": -1.5580558776855469, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": false, "logits_per_token": -1.5580558776855469, "logits_per_char": -0.7790279388427734, "num_chars": 2}, {"sum_logits": -1.3099521398544312, "num_tokens": 1, "num_tokens_all": 327, "is_greedy": true, "logits_per_token": -1.3099521398544312, "logits_per_char": -0.6549760699272156, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 58, "native_id": "788", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.576416015625, "incorrect_loss_raw": 1.3448006709416707, "correct_loss_per_char": 0.7882080078125, "incorrect_loss_per_char": 0.6724003354708353, "correct_loss_per_token": 1.576416015625, "incorrect_loss_per_token": 1.3448006709416707, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.215348482131958, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.215348482131958, "logits_per_char": -0.607674241065979, "num_chars": 2}, {"sum_logits": -1.576416015625, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.576416015625, "logits_per_char": -0.7882080078125, "num_chars": 2}, {"sum_logits": -1.3760826587677002, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.3760826587677002, "logits_per_char": -0.6880413293838501, "num_chars": 2}, {"sum_logits": -1.442970871925354, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.442970871925354, "logits_per_char": -0.721485435962677, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 59, "native_id": "9-29", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2946845293045044, "incorrect_loss_raw": 1.429991602897644, "correct_loss_per_char": 0.6473422646522522, "incorrect_loss_per_char": 0.714995801448822, "correct_loss_per_token": 1.2946845293045044, "incorrect_loss_per_token": 1.429991602897644, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2946845293045044, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": true, "logits_per_token": -1.2946845293045044, "logits_per_char": -0.6473422646522522, "num_chars": 2}, {"sum_logits": -1.4176201820373535, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.4176201820373535, "logits_per_char": -0.7088100910186768, "num_chars": 2}, {"sum_logits": -1.539622187614441, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.539622187614441, "logits_per_char": -0.7698110938072205, "num_chars": 2}, {"sum_logits": -1.3327324390411377, "num_tokens": 1, "num_tokens_all": 297, "is_greedy": false, "logits_per_token": -1.3327324390411377, "logits_per_char": -0.6663662195205688, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 60, "native_id": "9-368", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4003291130065918, "incorrect_loss_raw": 1.4026013612747192, "correct_loss_per_char": 0.7001645565032959, "incorrect_loss_per_char": 0.7013006806373596, "correct_loss_per_token": 1.4003291130065918, "incorrect_loss_per_token": 1.4026013612747192, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2124226093292236, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": true, "logits_per_token": -1.2124226093292236, "logits_per_char": -0.6062113046646118, "num_chars": 2}, {"sum_logits": -1.3859471082687378, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.3859471082687378, "logits_per_char": -0.6929735541343689, "num_chars": 2}, {"sum_logits": -1.4003291130065918, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.4003291130065918, "logits_per_char": -0.7001645565032959, "num_chars": 2}, {"sum_logits": -1.6094343662261963, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.6094343662261963, "logits_per_char": -0.8047171831130981, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 61, "native_id": "7-671", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4981954097747803, "incorrect_loss_raw": 1.3745189507802327, "correct_loss_per_char": 0.7490977048873901, "incorrect_loss_per_char": 0.6872594753901163, "correct_loss_per_token": 1.4981954097747803, "incorrect_loss_per_token": 1.3745189507802327, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1339647769927979, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": true, "logits_per_token": -1.1339647769927979, "logits_per_char": -0.5669823884963989, "num_chars": 2}, {"sum_logits": -1.4840327501296997, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.4840327501296997, "logits_per_char": -0.7420163750648499, "num_chars": 2}, {"sum_logits": -1.4981954097747803, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.4981954097747803, "logits_per_char": -0.7490977048873901, "num_chars": 2}, {"sum_logits": -1.5055593252182007, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.5055593252182007, "logits_per_char": -0.7527796626091003, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 62, "native_id": "1272", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1747610569000244, "incorrect_loss_raw": 1.478629509607951, "correct_loss_per_char": 0.5873805284500122, "incorrect_loss_per_char": 0.7393147548039755, "correct_loss_per_token": 1.1747610569000244, "incorrect_loss_per_token": 1.478629509607951, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1747610569000244, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": true, "logits_per_token": -1.1747610569000244, "logits_per_char": -0.5873805284500122, "num_chars": 2}, {"sum_logits": -1.4293242692947388, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4293242692947388, "logits_per_char": -0.7146621346473694, "num_chars": 2}, {"sum_logits": -1.5299217700958252, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.5299217700958252, "logits_per_char": -0.7649608850479126, "num_chars": 2}, {"sum_logits": -1.4766424894332886, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4766424894332886, "logits_per_char": -0.7383212447166443, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 63, "native_id": "648", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4914859533309937, "incorrect_loss_raw": 1.37088938554128, "correct_loss_per_char": 0.7457429766654968, "incorrect_loss_per_char": 0.68544469277064, "correct_loss_per_token": 1.4914859533309937, "incorrect_loss_per_token": 1.37088938554128, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2121810913085938, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.2121810913085938, "logits_per_char": -0.6060905456542969, "num_chars": 2}, {"sum_logits": -1.4914859533309937, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4914859533309937, "logits_per_char": -0.7457429766654968, "num_chars": 2}, {"sum_logits": -1.52776300907135, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.52776300907135, "logits_per_char": -0.763881504535675, "num_chars": 2}, {"sum_logits": -1.3727240562438965, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.3727240562438965, "logits_per_char": -0.6863620281219482, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 64, "native_id": "9-1180", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6110773086547852, "incorrect_loss_raw": 1.334460695584615, "correct_loss_per_char": 0.8055386543273926, "incorrect_loss_per_char": 0.6672303477923075, "correct_loss_per_token": 1.6110773086547852, "incorrect_loss_per_token": 1.334460695584615, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2026535272598267, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": true, "logits_per_token": -1.2026535272598267, "logits_per_char": -0.6013267636299133, "num_chars": 2}, {"sum_logits": -1.6110773086547852, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.6110773086547852, "logits_per_char": -0.8055386543273926, "num_chars": 2}, {"sum_logits": -1.4529410600662231, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.4529410600662231, "logits_per_char": -0.7264705300331116, "num_chars": 2}, {"sum_logits": -1.3477874994277954, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.3477874994277954, "logits_per_char": -0.6738937497138977, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 65, "native_id": "9-227", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4196854829788208, "incorrect_loss_raw": 1.391738732655843, "correct_loss_per_char": 0.7098427414894104, "incorrect_loss_per_char": 0.6958693663279215, "correct_loss_per_token": 1.4196854829788208, "incorrect_loss_per_token": 1.391738732655843, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2398796081542969, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": true, "logits_per_token": -1.2398796081542969, "logits_per_char": -0.6199398040771484, "num_chars": 2}, {"sum_logits": -1.3920774459838867, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.3920774459838867, "logits_per_char": -0.6960387229919434, "num_chars": 2}, {"sum_logits": -1.4196854829788208, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.4196854829788208, "logits_per_char": -0.7098427414894104, "num_chars": 2}, {"sum_logits": -1.5432591438293457, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.5432591438293457, "logits_per_char": -0.7716295719146729, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 66, "native_id": "1582", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0900558233261108, "incorrect_loss_raw": 1.5210315783818562, "correct_loss_per_char": 0.5450279116630554, "incorrect_loss_per_char": 0.7605157891909281, "correct_loss_per_token": 1.0900558233261108, "incorrect_loss_per_token": 1.5210315783818562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0900558233261108, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": true, "logits_per_token": -1.0900558233261108, "logits_per_char": -0.5450279116630554, "num_chars": 2}, {"sum_logits": -1.5314675569534302, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.5314675569534302, "logits_per_char": -0.7657337784767151, "num_chars": 2}, {"sum_logits": -1.451002597808838, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.451002597808838, "logits_per_char": -0.725501298904419, "num_chars": 2}, {"sum_logits": -1.5806245803833008, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.5806245803833008, "logits_per_char": -0.7903122901916504, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 67, "native_id": "8-125", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.347767949104309, "incorrect_loss_raw": 1.4059575001398723, "correct_loss_per_char": 0.6738839745521545, "incorrect_loss_per_char": 0.7029787500699362, "correct_loss_per_token": 1.347767949104309, "incorrect_loss_per_token": 1.4059575001398723, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.347767949104309, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.347767949104309, "logits_per_char": -0.6738839745521545, "num_chars": 2}, {"sum_logits": -1.4027483463287354, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.4027483463287354, "logits_per_char": -0.7013741731643677, "num_chars": 2}, {"sum_logits": -1.4038039445877075, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.4038039445877075, "logits_per_char": -0.7019019722938538, "num_chars": 2}, {"sum_logits": -1.4113202095031738, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.4113202095031738, "logits_per_char": -0.7056601047515869, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 68, "native_id": "1923", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2052066326141357, "incorrect_loss_raw": 1.4693067073822021, "correct_loss_per_char": 0.6026033163070679, "incorrect_loss_per_char": 0.7346533536911011, "correct_loss_per_token": 1.2052066326141357, "incorrect_loss_per_token": 1.4693067073822021, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2052066326141357, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": true, "logits_per_token": -1.2052066326141357, "logits_per_char": -0.6026033163070679, "num_chars": 2}, {"sum_logits": -1.4451361894607544, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4451361894607544, "logits_per_char": -0.7225680947303772, "num_chars": 2}, {"sum_logits": -1.349051594734192, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.349051594734192, "logits_per_char": -0.674525797367096, "num_chars": 2}, {"sum_logits": -1.6137323379516602, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.6137323379516602, "logits_per_char": -0.8068661689758301, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 69, "native_id": "9-229", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5318487882614136, "incorrect_loss_raw": 1.3666763703028362, "correct_loss_per_char": 0.7659243941307068, "incorrect_loss_per_char": 0.6833381851514181, "correct_loss_per_token": 1.5318487882614136, "incorrect_loss_per_token": 1.3666763703028362, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1173175573349, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": true, "logits_per_token": -1.1173175573349, "logits_per_char": -0.55865877866745, "num_chars": 2}, {"sum_logits": -1.4532469511032104, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4532469511032104, "logits_per_char": -0.7266234755516052, "num_chars": 2}, {"sum_logits": -1.529464602470398, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.529464602470398, "logits_per_char": -0.764732301235199, "num_chars": 2}, {"sum_logits": -1.5318487882614136, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.5318487882614136, "logits_per_char": -0.7659243941307068, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 70, "native_id": "1702", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5375926494598389, "incorrect_loss_raw": 1.354914625485738, "correct_loss_per_char": 0.7687963247299194, "incorrect_loss_per_char": 0.677457312742869, "correct_loss_per_token": 1.5375926494598389, "incorrect_loss_per_token": 1.354914625485738, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1994521617889404, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": true, "logits_per_token": -1.1994521617889404, "logits_per_char": -0.5997260808944702, "num_chars": 2}, {"sum_logits": -1.5375926494598389, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.5375926494598389, "logits_per_char": -0.7687963247299194, "num_chars": 2}, {"sum_logits": -1.4473069906234741, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.4473069906234741, "logits_per_char": -0.7236534953117371, "num_chars": 2}, {"sum_logits": -1.4179847240447998, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.4179847240447998, "logits_per_char": -0.7089923620223999, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 71, "native_id": "8-260", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3867011070251465, "incorrect_loss_raw": 1.394998590151469, "correct_loss_per_char": 0.6933505535125732, "incorrect_loss_per_char": 0.6974992950757345, "correct_loss_per_token": 1.3867011070251465, "incorrect_loss_per_token": 1.394998590151469, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4060909748077393, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.4060909748077393, "logits_per_char": -0.7030454874038696, "num_chars": 2}, {"sum_logits": -1.4394171237945557, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.4394171237945557, "logits_per_char": -0.7197085618972778, "num_chars": 2}, {"sum_logits": -1.3867011070251465, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.3867011070251465, "logits_per_char": -0.6933505535125732, "num_chars": 2}, {"sum_logits": -1.3394876718521118, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": true, "logits_per_token": -1.3394876718521118, "logits_per_char": -0.6697438359260559, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 72, "native_id": "9-491", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4447120428085327, "incorrect_loss_raw": 1.3816739718119304, "correct_loss_per_char": 0.7223560214042664, "incorrect_loss_per_char": 0.6908369859059652, "correct_loss_per_token": 1.4447120428085327, "incorrect_loss_per_token": 1.3816739718119304, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2197993993759155, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": true, "logits_per_token": -1.2197993993759155, "logits_per_char": -0.6098996996879578, "num_chars": 2}, {"sum_logits": -1.4808987379074097, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4808987379074097, "logits_per_char": -0.7404493689537048, "num_chars": 2}, {"sum_logits": -1.4447120428085327, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4447120428085327, "logits_per_char": -0.7223560214042664, "num_chars": 2}, {"sum_logits": -1.4443237781524658, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4443237781524658, "logits_per_char": -0.7221618890762329, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 73, "native_id": "75", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3155170679092407, "incorrect_loss_raw": 1.4388847351074219, "correct_loss_per_char": 0.6577585339546204, "incorrect_loss_per_char": 0.7194423675537109, "correct_loss_per_token": 1.3155170679092407, "incorrect_loss_per_token": 1.4388847351074219, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3155170679092407, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.3155170679092407, "logits_per_char": -0.6577585339546204, "num_chars": 2}, {"sum_logits": -1.7277859449386597, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.7277859449386597, "logits_per_char": -0.8638929724693298, "num_chars": 2}, {"sum_logits": -1.3054827451705933, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.3054827451705933, "logits_per_char": -0.6527413725852966, "num_chars": 2}, {"sum_logits": -1.2833855152130127, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.2833855152130127, "logits_per_char": -0.6416927576065063, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 74, "native_id": "1215", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5117583274841309, "incorrect_loss_raw": 1.3568655649820964, "correct_loss_per_char": 0.7558791637420654, "incorrect_loss_per_char": 0.6784327824910482, "correct_loss_per_token": 1.5117583274841309, "incorrect_loss_per_token": 1.3568655649820964, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.290136694908142, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.290136694908142, "logits_per_char": -0.645068347454071, "num_chars": 2}, {"sum_logits": -1.4282745122909546, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4282745122909546, "logits_per_char": -0.7141372561454773, "num_chars": 2}, {"sum_logits": -1.5117583274841309, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.5117583274841309, "logits_per_char": -0.7558791637420654, "num_chars": 2}, {"sum_logits": -1.3521854877471924, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.3521854877471924, "logits_per_char": -0.6760927438735962, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 75, "native_id": "8-93", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4193413257598877, "incorrect_loss_raw": 1.3925323883692424, "correct_loss_per_char": 0.7096706628799438, "incorrect_loss_per_char": 0.6962661941846212, "correct_loss_per_token": 1.4193413257598877, "incorrect_loss_per_token": 1.3925323883692424, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2200502157211304, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": true, "logits_per_token": -1.2200502157211304, "logits_per_char": -0.6100251078605652, "num_chars": 2}, {"sum_logits": -1.4744224548339844, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4744224548339844, "logits_per_char": -0.7372112274169922, "num_chars": 2}, {"sum_logits": -1.4193413257598877, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4193413257598877, "logits_per_char": -0.7096706628799438, "num_chars": 2}, {"sum_logits": -1.4831244945526123, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4831244945526123, "logits_per_char": -0.7415622472763062, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 76, "native_id": "7-988", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3199352025985718, "incorrect_loss_raw": 1.418503999710083, "correct_loss_per_char": 0.6599676012992859, "incorrect_loss_per_char": 0.7092519998550415, "correct_loss_per_token": 1.3199352025985718, "incorrect_loss_per_token": 1.418503999710083, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3199352025985718, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": true, "logits_per_token": -1.3199352025985718, "logits_per_char": -0.6599676012992859, "num_chars": 2}, {"sum_logits": -1.337058424949646, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.337058424949646, "logits_per_char": -0.668529212474823, "num_chars": 2}, {"sum_logits": -1.4236714839935303, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4236714839935303, "logits_per_char": -0.7118357419967651, "num_chars": 2}, {"sum_logits": -1.4947820901870728, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4947820901870728, "logits_per_char": -0.7473910450935364, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 77, "native_id": "9-1139", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3554041385650635, "incorrect_loss_raw": 1.4107461373011272, "correct_loss_per_char": 0.6777020692825317, "incorrect_loss_per_char": 0.7053730686505636, "correct_loss_per_token": 1.3554041385650635, "incorrect_loss_per_token": 1.4107461373011272, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2913682460784912, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": true, "logits_per_token": -1.2913682460784912, "logits_per_char": -0.6456841230392456, "num_chars": 2}, {"sum_logits": -1.4407544136047363, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4407544136047363, "logits_per_char": -0.7203772068023682, "num_chars": 2}, {"sum_logits": -1.3554041385650635, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.3554041385650635, "logits_per_char": -0.6777020692825317, "num_chars": 2}, {"sum_logits": -1.5001157522201538, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.5001157522201538, "logits_per_char": -0.7500578761100769, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 78, "native_id": "1545", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4784682989120483, "incorrect_loss_raw": 1.3678054412206013, "correct_loss_per_char": 0.7392341494560242, "incorrect_loss_per_char": 0.6839027206103007, "correct_loss_per_token": 1.4784682989120483, "incorrect_loss_per_token": 1.3678054412206013, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3307374715805054, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": true, "logits_per_token": -1.3307374715805054, "logits_per_char": -0.6653687357902527, "num_chars": 2}, {"sum_logits": -1.4784682989120483, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.4784682989120483, "logits_per_char": -0.7392341494560242, "num_chars": 2}, {"sum_logits": -1.3331202268600464, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.3331202268600464, "logits_per_char": -0.6665601134300232, "num_chars": 2}, {"sum_logits": -1.4395586252212524, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.4395586252212524, "logits_per_char": -0.7197793126106262, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 79, "native_id": "7-664", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3622673749923706, "incorrect_loss_raw": 1.4054286082585652, "correct_loss_per_char": 0.6811336874961853, "incorrect_loss_per_char": 0.7027143041292826, "correct_loss_per_token": 1.3622673749923706, "incorrect_loss_per_token": 1.4054286082585652, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3290376663208008, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": true, "logits_per_token": -1.3290376663208008, "logits_per_char": -0.6645188331604004, "num_chars": 2}, {"sum_logits": -1.5196126699447632, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.5196126699447632, "logits_per_char": -0.7598063349723816, "num_chars": 2}, {"sum_logits": -1.3622673749923706, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.3622673749923706, "logits_per_char": -0.6811336874961853, "num_chars": 2}, {"sum_logits": -1.3676354885101318, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.3676354885101318, "logits_per_char": -0.6838177442550659, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 80, "native_id": "8-53", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3735697269439697, "incorrect_loss_raw": 1.413358251253764, "correct_loss_per_char": 0.6867848634719849, "incorrect_loss_per_char": 0.706679125626882, "correct_loss_per_token": 1.3735697269439697, "incorrect_loss_per_token": 1.413358251253764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2784243822097778, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": true, "logits_per_token": -1.2784243822097778, "logits_per_char": -0.6392121911048889, "num_chars": 2}, {"sum_logits": -1.6016229391098022, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.6016229391098022, "logits_per_char": -0.8008114695549011, "num_chars": 2}, {"sum_logits": -1.3735697269439697, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.3735697269439697, "logits_per_char": -0.6867848634719849, "num_chars": 2}, {"sum_logits": -1.3600274324417114, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.3600274324417114, "logits_per_char": -0.6800137162208557, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 81, "native_id": "7-1044", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3099606037139893, "incorrect_loss_raw": 1.4481208324432373, "correct_loss_per_char": 0.6549803018569946, "incorrect_loss_per_char": 0.7240604162216187, "correct_loss_per_token": 1.3099606037139893, "incorrect_loss_per_token": 1.4481208324432373, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3099606037139893, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.3099606037139893, "logits_per_char": -0.6549803018569946, "num_chars": 2}, {"sum_logits": -1.711014747619629, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.711014747619629, "logits_per_char": -0.8555073738098145, "num_chars": 2}, {"sum_logits": -1.3924415111541748, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.3924415111541748, "logits_per_char": -0.6962207555770874, "num_chars": 2}, {"sum_logits": -1.2409062385559082, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.2409062385559082, "logits_per_char": -0.6204531192779541, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 82, "native_id": "7-1122", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2143380641937256, "incorrect_loss_raw": 1.4616434971491497, "correct_loss_per_char": 0.6071690320968628, "incorrect_loss_per_char": 0.7308217485745748, "correct_loss_per_token": 1.2143380641937256, "incorrect_loss_per_token": 1.4616434971491497, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2143380641937256, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.2143380641937256, "logits_per_char": -0.6071690320968628, "num_chars": 2}, {"sum_logits": -1.4912445545196533, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4912445545196533, "logits_per_char": -0.7456222772598267, "num_chars": 2}, {"sum_logits": -1.3768905401229858, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.3768905401229858, "logits_per_char": -0.6884452700614929, "num_chars": 2}, {"sum_logits": -1.5167953968048096, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.5167953968048096, "logits_per_char": -0.7583976984024048, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 83, "native_id": "9-79", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4450477361679077, "incorrect_loss_raw": 1.3768571217854817, "correct_loss_per_char": 0.7225238680839539, "incorrect_loss_per_char": 0.6884285608927408, "correct_loss_per_token": 1.4450477361679077, "incorrect_loss_per_token": 1.3768571217854817, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4030019044876099, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.4030019044876099, "logits_per_char": -0.7015009522438049, "num_chars": 2}, {"sum_logits": -1.4450477361679077, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.4450477361679077, "logits_per_char": -0.7225238680839539, "num_chars": 2}, {"sum_logits": -1.4087916612625122, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.4087916612625122, "logits_per_char": -0.7043958306312561, "num_chars": 2}, {"sum_logits": -1.3187777996063232, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": true, "logits_per_token": -1.3187777996063232, "logits_per_char": -0.6593888998031616, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 84, "native_id": "7-157", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4480798244476318, "incorrect_loss_raw": 1.3760964473088582, "correct_loss_per_char": 0.7240399122238159, "incorrect_loss_per_char": 0.6880482236544291, "correct_loss_per_token": 1.4480798244476318, "incorrect_loss_per_token": 1.3760964473088582, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.332950472831726, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.332950472831726, "logits_per_char": -0.666475236415863, "num_chars": 2}, {"sum_logits": -1.3953454494476318, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.3953454494476318, "logits_per_char": -0.6976727247238159, "num_chars": 2}, {"sum_logits": -1.3999934196472168, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.3999934196472168, "logits_per_char": -0.6999967098236084, "num_chars": 2}, {"sum_logits": -1.4480798244476318, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4480798244476318, "logits_per_char": -0.7240399122238159, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 85, "native_id": "9-1164", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2866278886795044, "incorrect_loss_raw": 1.4374353090922039, "correct_loss_per_char": 0.6433139443397522, "incorrect_loss_per_char": 0.7187176545461019, "correct_loss_per_token": 1.2866278886795044, "incorrect_loss_per_token": 1.4374353090922039, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2675533294677734, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.2675533294677734, "logits_per_char": -0.6337766647338867, "num_chars": 2}, {"sum_logits": -1.2866278886795044, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.2866278886795044, "logits_per_char": -0.6433139443397522, "num_chars": 2}, {"sum_logits": -1.4889143705368042, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4889143705368042, "logits_per_char": -0.7444571852684021, "num_chars": 2}, {"sum_logits": -1.5558382272720337, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.5558382272720337, "logits_per_char": -0.7779191136360168, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 86, "native_id": "8-63", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4741512537002563, "incorrect_loss_raw": 1.3716684182484944, "correct_loss_per_char": 0.7370756268501282, "incorrect_loss_per_char": 0.6858342091242472, "correct_loss_per_token": 1.4741512537002563, "incorrect_loss_per_token": 1.3716684182484944, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2435827255249023, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": true, "logits_per_token": -1.2435827255249023, "logits_per_char": -0.6217913627624512, "num_chars": 2}, {"sum_logits": -1.4177989959716797, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4177989959716797, "logits_per_char": -0.7088994979858398, "num_chars": 2}, {"sum_logits": -1.4536235332489014, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4536235332489014, "logits_per_char": -0.7268117666244507, "num_chars": 2}, {"sum_logits": -1.4741512537002563, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4741512537002563, "logits_per_char": -0.7370756268501282, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 87, "native_id": "8-308", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3479681015014648, "incorrect_loss_raw": 1.4169304768244426, "correct_loss_per_char": 0.6739840507507324, "incorrect_loss_per_char": 0.7084652384122213, "correct_loss_per_token": 1.3479681015014648, "incorrect_loss_per_token": 1.4169304768244426, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.226014256477356, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": true, "logits_per_token": -1.226014256477356, "logits_per_char": -0.613007128238678, "num_chars": 2}, {"sum_logits": -1.5288543701171875, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.5288543701171875, "logits_per_char": -0.7644271850585938, "num_chars": 2}, {"sum_logits": -1.4959228038787842, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.4959228038787842, "logits_per_char": -0.7479614019393921, "num_chars": 2}, {"sum_logits": -1.3479681015014648, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.3479681015014648, "logits_per_char": -0.6739840507507324, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 88, "native_id": "326", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4997323751449585, "incorrect_loss_raw": 1.3790652751922607, "correct_loss_per_char": 0.7498661875724792, "incorrect_loss_per_char": 0.6895326375961304, "correct_loss_per_token": 1.4997323751449585, "incorrect_loss_per_token": 1.3790652751922607, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1500072479248047, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": true, "logits_per_token": -1.1500072479248047, "logits_per_char": -0.5750036239624023, "num_chars": 2}, {"sum_logits": -1.5984370708465576, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.5984370708465576, "logits_per_char": -0.7992185354232788, "num_chars": 2}, {"sum_logits": -1.4997323751449585, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.4997323751449585, "logits_per_char": -0.7498661875724792, "num_chars": 2}, {"sum_logits": -1.38875150680542, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.38875150680542, "logits_per_char": -0.69437575340271, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 89, "native_id": "1184", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1793673038482666, "incorrect_loss_raw": 1.4767555793126423, "correct_loss_per_char": 0.5896836519241333, "incorrect_loss_per_char": 0.7383777896563212, "correct_loss_per_token": 1.1793673038482666, "incorrect_loss_per_token": 1.4767555793126423, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1793673038482666, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.1793673038482666, "logits_per_char": -0.5896836519241333, "num_chars": 2}, {"sum_logits": -1.4078272581100464, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4078272581100464, "logits_per_char": -0.7039136290550232, "num_chars": 2}, {"sum_logits": -1.4567644596099854, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4567644596099854, "logits_per_char": -0.7283822298049927, "num_chars": 2}, {"sum_logits": -1.5656750202178955, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.5656750202178955, "logits_per_char": -0.7828375101089478, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 90, "native_id": "359", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.401350975036621, "incorrect_loss_raw": 1.39678156375885, "correct_loss_per_char": 0.7006754875183105, "incorrect_loss_per_char": 0.698390781879425, "correct_loss_per_token": 1.401350975036621, "incorrect_loss_per_token": 1.39678156375885, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.401350975036621, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.401350975036621, "logits_per_char": -0.7006754875183105, "num_chars": 2}, {"sum_logits": -1.4212734699249268, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.4212734699249268, "logits_per_char": -0.7106367349624634, "num_chars": 2}, {"sum_logits": -1.456136703491211, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.456136703491211, "logits_per_char": -0.7280683517456055, "num_chars": 2}, {"sum_logits": -1.3129345178604126, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": true, "logits_per_token": -1.3129345178604126, "logits_per_char": -0.6564672589302063, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 91, "native_id": "9-350", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3491010665893555, "incorrect_loss_raw": 1.415557066599528, "correct_loss_per_char": 0.6745505332946777, "incorrect_loss_per_char": 0.707778533299764, "correct_loss_per_token": 1.3491010665893555, "incorrect_loss_per_token": 1.415557066599528, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2668359279632568, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": true, "logits_per_token": -1.2668359279632568, "logits_per_char": -0.6334179639816284, "num_chars": 2}, {"sum_logits": -1.460405945777893, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.460405945777893, "logits_per_char": -0.7302029728889465, "num_chars": 2}, {"sum_logits": -1.519429326057434, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.519429326057434, "logits_per_char": -0.759714663028717, "num_chars": 2}, {"sum_logits": -1.3491010665893555, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.3491010665893555, "logits_per_char": -0.6745505332946777, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 92, "native_id": "7-140", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4449195861816406, "incorrect_loss_raw": 1.3948134183883667, "correct_loss_per_char": 0.7224597930908203, "incorrect_loss_per_char": 0.6974067091941833, "correct_loss_per_token": 1.4449195861816406, "incorrect_loss_per_token": 1.3948134183883667, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.141016960144043, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.141016960144043, "logits_per_char": -0.5705084800720215, "num_chars": 2}, {"sum_logits": -1.4446200132369995, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4446200132369995, "logits_per_char": -0.7223100066184998, "num_chars": 2}, {"sum_logits": -1.4449195861816406, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4449195861816406, "logits_per_char": -0.7224597930908203, "num_chars": 2}, {"sum_logits": -1.5988032817840576, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.5988032817840576, "logits_per_char": -0.7994016408920288, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 93, "native_id": "591", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.247588872909546, "incorrect_loss_raw": 1.449742277463277, "correct_loss_per_char": 0.623794436454773, "incorrect_loss_per_char": 0.7248711387316386, "correct_loss_per_token": 1.247588872909546, "incorrect_loss_per_token": 1.449742277463277, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.247588872909546, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.247588872909546, "logits_per_char": -0.623794436454773, "num_chars": 2}, {"sum_logits": -1.51602041721344, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.51602041721344, "logits_per_char": -0.75801020860672, "num_chars": 2}, {"sum_logits": -1.5372916460037231, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.5372916460037231, "logits_per_char": -0.7686458230018616, "num_chars": 2}, {"sum_logits": -1.2959147691726685, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.2959147691726685, "logits_per_char": -0.6479573845863342, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 94, "native_id": "7-391", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.437900185585022, "incorrect_loss_raw": 1.4050386349360149, "correct_loss_per_char": 0.718950092792511, "incorrect_loss_per_char": 0.7025193174680074, "correct_loss_per_token": 1.437900185585022, "incorrect_loss_per_token": 1.4050386349360149, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1469084024429321, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": true, "logits_per_token": -1.1469084024429321, "logits_per_char": -0.5734542012214661, "num_chars": 2}, {"sum_logits": -1.3991838693618774, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.3991838693618774, "logits_per_char": -0.6995919346809387, "num_chars": 2}, {"sum_logits": -1.437900185585022, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.437900185585022, "logits_per_char": -0.718950092792511, "num_chars": 2}, {"sum_logits": -1.6690236330032349, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.6690236330032349, "logits_per_char": -0.8345118165016174, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 95, "native_id": "1672", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.463104009628296, "incorrect_loss_raw": 1.3793492317199707, "correct_loss_per_char": 0.731552004814148, "incorrect_loss_per_char": 0.6896746158599854, "correct_loss_per_token": 1.463104009628296, "incorrect_loss_per_token": 1.3793492317199707, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1891692876815796, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": true, "logits_per_token": -1.1891692876815796, "logits_per_char": -0.5945846438407898, "num_chars": 2}, {"sum_logits": -1.4515575170516968, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.4515575170516968, "logits_per_char": -0.7257787585258484, "num_chars": 2}, {"sum_logits": -1.463104009628296, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.463104009628296, "logits_per_char": -0.731552004814148, "num_chars": 2}, {"sum_logits": -1.4973208904266357, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.4973208904266357, "logits_per_char": -0.7486604452133179, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 96, "native_id": "9-464", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4239858388900757, "incorrect_loss_raw": 1.4087038437525432, "correct_loss_per_char": 0.7119929194450378, "incorrect_loss_per_char": 0.7043519218762716, "correct_loss_per_token": 1.4239858388900757, "incorrect_loss_per_token": 1.4087038437525432, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.101104974746704, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.101104974746704, "logits_per_char": -0.550552487373352, "num_chars": 2}, {"sum_logits": -1.5232181549072266, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.5232181549072266, "logits_per_char": -0.7616090774536133, "num_chars": 2}, {"sum_logits": -1.4239858388900757, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4239858388900757, "logits_per_char": -0.7119929194450378, "num_chars": 2}, {"sum_logits": -1.6017884016036987, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.6017884016036987, "logits_per_char": -0.8008942008018494, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 97, "native_id": "9-983", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4046635627746582, "incorrect_loss_raw": 1.39807923634847, "correct_loss_per_char": 0.7023317813873291, "incorrect_loss_per_char": 0.699039618174235, "correct_loss_per_token": 1.4046635627746582, "incorrect_loss_per_token": 1.39807923634847, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2033171653747559, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.2033171653747559, "logits_per_char": -0.6016585826873779, "num_chars": 2}, {"sum_logits": -1.4046635627746582, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4046635627746582, "logits_per_char": -0.7023317813873291, "num_chars": 2}, {"sum_logits": -1.4442312717437744, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4442312717437744, "logits_per_char": -0.7221156358718872, "num_chars": 2}, {"sum_logits": -1.5466892719268799, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.5466892719268799, "logits_per_char": -0.7733446359634399, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 98, "native_id": "9-179", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1617391109466553, "incorrect_loss_raw": 1.4851365089416504, "correct_loss_per_char": 0.5808695554733276, "incorrect_loss_per_char": 0.7425682544708252, "correct_loss_per_token": 1.1617391109466553, "incorrect_loss_per_token": 1.4851365089416504, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1617391109466553, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.1617391109466553, "logits_per_char": -0.5808695554733276, "num_chars": 2}, {"sum_logits": -1.5439132452011108, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.5439132452011108, "logits_per_char": -0.7719566226005554, "num_chars": 2}, {"sum_logits": -1.3962006568908691, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.3962006568908691, "logits_per_char": -0.6981003284454346, "num_chars": 2}, {"sum_logits": -1.5152956247329712, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.5152956247329712, "logits_per_char": -0.7576478123664856, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 99, "native_id": "7-942", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4812488555908203, "incorrect_loss_raw": 1.4118676980336506, "correct_loss_per_char": 0.7406244277954102, "incorrect_loss_per_char": 0.7059338490168253, "correct_loss_per_token": 1.4812488555908203, "incorrect_loss_per_token": 1.4118676980336506, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0075907707214355, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": true, "logits_per_token": -1.0075907707214355, "logits_per_char": -0.5037953853607178, "num_chars": 2}, {"sum_logits": -1.5064154863357544, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.5064154863357544, "logits_per_char": -0.7532077431678772, "num_chars": 2}, {"sum_logits": -1.4812488555908203, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.4812488555908203, "logits_per_char": -0.7406244277954102, "num_chars": 2}, {"sum_logits": -1.7215968370437622, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.7215968370437622, "logits_per_char": -0.8607984185218811, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 100, "native_id": "7-100", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4649507999420166, "incorrect_loss_raw": 1.3817295630772908, "correct_loss_per_char": 0.7324753999710083, "incorrect_loss_per_char": 0.6908647815386454, "correct_loss_per_token": 1.4649507999420166, "incorrect_loss_per_token": 1.3817295630772908, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1712011098861694, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": true, "logits_per_token": -1.1712011098861694, "logits_per_char": -0.5856005549430847, "num_chars": 2}, {"sum_logits": -1.4649507999420166, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.4649507999420166, "logits_per_char": -0.7324753999710083, "num_chars": 2}, {"sum_logits": -1.4380592107772827, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.4380592107772827, "logits_per_char": -0.7190296053886414, "num_chars": 2}, {"sum_logits": -1.5359283685684204, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.5359283685684204, "logits_per_char": -0.7679641842842102, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 101, "native_id": "9-30", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4436010122299194, "incorrect_loss_raw": 1.3874166011810303, "correct_loss_per_char": 0.7218005061149597, "incorrect_loss_per_char": 0.6937083005905151, "correct_loss_per_token": 1.4436010122299194, "incorrect_loss_per_token": 1.3874166011810303, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1829272508621216, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": true, "logits_per_token": -1.1829272508621216, "logits_per_char": -0.5914636254310608, "num_chars": 2}, {"sum_logits": -1.4436010122299194, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.4436010122299194, "logits_per_char": -0.7218005061149597, "num_chars": 2}, {"sum_logits": -1.4730297327041626, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.4730297327041626, "logits_per_char": -0.7365148663520813, "num_chars": 2}, {"sum_logits": -1.5062928199768066, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.5062928199768066, "logits_per_char": -0.7531464099884033, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 102, "native_id": "1709", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3448246717453003, "incorrect_loss_raw": 1.4108582337697346, "correct_loss_per_char": 0.6724123358726501, "incorrect_loss_per_char": 0.7054291168848673, "correct_loss_per_token": 1.3448246717453003, "incorrect_loss_per_token": 1.4108582337697346, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3448246717453003, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.3448246717453003, "logits_per_char": -0.6724123358726501, "num_chars": 2}, {"sum_logits": -1.3289347887039185, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": true, "logits_per_token": -1.3289347887039185, "logits_per_char": -0.6644673943519592, "num_chars": 2}, {"sum_logits": -1.4488197565078735, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.4488197565078735, "logits_per_char": -0.7244098782539368, "num_chars": 2}, {"sum_logits": -1.454820156097412, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.454820156097412, "logits_per_char": -0.727410078048706, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 103, "native_id": "8-491", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4163135290145874, "incorrect_loss_raw": 1.3904997110366821, "correct_loss_per_char": 0.7081567645072937, "incorrect_loss_per_char": 0.6952498555183411, "correct_loss_per_token": 1.4163135290145874, "incorrect_loss_per_token": 1.3904997110366821, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.246759295463562, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.246759295463562, "logits_per_char": -0.623379647731781, "num_chars": 2}, {"sum_logits": -1.3739784955978394, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.3739784955978394, "logits_per_char": -0.6869892477989197, "num_chars": 2}, {"sum_logits": -1.550761342048645, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.550761342048645, "logits_per_char": -0.7753806710243225, "num_chars": 2}, {"sum_logits": -1.4163135290145874, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.4163135290145874, "logits_per_char": -0.7081567645072937, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 104, "native_id": "44", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4722743034362793, "incorrect_loss_raw": 1.36738121509552, "correct_loss_per_char": 0.7361371517181396, "incorrect_loss_per_char": 0.68369060754776, "correct_loss_per_token": 1.4722743034362793, "incorrect_loss_per_token": 1.36738121509552, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.318464756011963, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.318464756011963, "logits_per_char": -0.6592323780059814, "num_chars": 2}, {"sum_logits": -1.4722743034362793, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4722743034362793, "logits_per_char": -0.7361371517181396, "num_chars": 2}, {"sum_logits": -1.4113925695419312, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4113925695419312, "logits_per_char": -0.7056962847709656, "num_chars": 2}, {"sum_logits": -1.372286319732666, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.372286319732666, "logits_per_char": -0.686143159866333, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 105, "native_id": "1023", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5488723516464233, "incorrect_loss_raw": 1.3497564395268757, "correct_loss_per_char": 0.7744361758232117, "incorrect_loss_per_char": 0.6748782197634379, "correct_loss_per_token": 1.5488723516464233, "incorrect_loss_per_token": 1.3497564395268757, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2179762125015259, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": true, "logits_per_token": -1.2179762125015259, "logits_per_char": -0.6089881062507629, "num_chars": 2}, {"sum_logits": -1.4068416357040405, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.4068416357040405, "logits_per_char": -0.7034208178520203, "num_chars": 2}, {"sum_logits": -1.424451470375061, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.424451470375061, "logits_per_char": -0.7122257351875305, "num_chars": 2}, {"sum_logits": -1.5488723516464233, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.5488723516464233, "logits_per_char": -0.7744361758232117, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 106, "native_id": "1911", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4919650554656982, "incorrect_loss_raw": 1.3621107737223308, "correct_loss_per_char": 0.7459825277328491, "incorrect_loss_per_char": 0.6810553868611654, "correct_loss_per_token": 1.4919650554656982, "incorrect_loss_per_token": 1.3621107737223308, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3581715822219849, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.3581715822219849, "logits_per_char": -0.6790857911109924, "num_chars": 2}, {"sum_logits": -1.4919650554656982, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.4919650554656982, "logits_per_char": -0.7459825277328491, "num_chars": 2}, {"sum_logits": -1.445726990699768, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.445726990699768, "logits_per_char": -0.722863495349884, "num_chars": 2}, {"sum_logits": -1.2824337482452393, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": true, "logits_per_token": -1.2824337482452393, "logits_per_char": -0.6412168741226196, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 107, "native_id": "429", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2850760221481323, "incorrect_loss_raw": 1.4342902104059856, "correct_loss_per_char": 0.6425380110740662, "incorrect_loss_per_char": 0.7171451052029928, "correct_loss_per_token": 1.2850760221481323, "incorrect_loss_per_token": 1.4342902104059856, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2850760221481323, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": true, "logits_per_token": -1.2850760221481323, "logits_per_char": -0.6425380110740662, "num_chars": 2}, {"sum_logits": -1.429760456085205, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.429760456085205, "logits_per_char": -0.7148802280426025, "num_chars": 2}, {"sum_logits": -1.417097568511963, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.417097568511963, "logits_per_char": -0.7085487842559814, "num_chars": 2}, {"sum_logits": -1.4560126066207886, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4560126066207886, "logits_per_char": -0.7280063033103943, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 108, "native_id": "8-49", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2499433755874634, "incorrect_loss_raw": 1.444490671157837, "correct_loss_per_char": 0.6249716877937317, "incorrect_loss_per_char": 0.7222453355789185, "correct_loss_per_token": 1.2499433755874634, "incorrect_loss_per_token": 1.444490671157837, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3740489482879639, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.3740489482879639, "logits_per_char": -0.6870244741439819, "num_chars": 2}, {"sum_logits": -1.4691784381866455, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.4691784381866455, "logits_per_char": -0.7345892190933228, "num_chars": 2}, {"sum_logits": -1.4902446269989014, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": false, "logits_per_token": -1.4902446269989014, "logits_per_char": -0.7451223134994507, "num_chars": 2}, {"sum_logits": -1.2499433755874634, "num_tokens": 1, "num_tokens_all": 303, "is_greedy": true, "logits_per_token": -1.2499433755874634, "logits_per_char": -0.6249716877937317, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 109, "native_id": "520", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.378218650817871, "incorrect_loss_raw": 1.4094150463740032, "correct_loss_per_char": 0.6891093254089355, "incorrect_loss_per_char": 0.7047075231870016, "correct_loss_per_token": 1.378218650817871, "incorrect_loss_per_token": 1.4094150463740032, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2345131635665894, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": true, "logits_per_token": -1.2345131635665894, "logits_per_char": -0.6172565817832947, "num_chars": 2}, {"sum_logits": -1.5323823690414429, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.5323823690414429, "logits_per_char": -0.7661911845207214, "num_chars": 2}, {"sum_logits": -1.378218650817871, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.378218650817871, "logits_per_char": -0.6891093254089355, "num_chars": 2}, {"sum_logits": -1.461349606513977, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.461349606513977, "logits_per_char": -0.7306748032569885, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 110, "native_id": "7-1128", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3407965898513794, "incorrect_loss_raw": 1.4101504882176716, "correct_loss_per_char": 0.6703982949256897, "incorrect_loss_per_char": 0.7050752441088358, "correct_loss_per_token": 1.3407965898513794, "incorrect_loss_per_token": 1.4101504882176716, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.425121545791626, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.425121545791626, "logits_per_char": -0.712560772895813, "num_chars": 2}, {"sum_logits": -1.4274364709854126, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.4274364709854126, "logits_per_char": -0.7137182354927063, "num_chars": 2}, {"sum_logits": -1.3407965898513794, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": true, "logits_per_token": -1.3407965898513794, "logits_per_char": -0.6703982949256897, "num_chars": 2}, {"sum_logits": -1.3778934478759766, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.3778934478759766, "logits_per_char": -0.6889467239379883, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 111, "native_id": "7-394", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5158510208129883, "incorrect_loss_raw": 1.3555939594904582, "correct_loss_per_char": 0.7579255104064941, "incorrect_loss_per_char": 0.6777969797452291, "correct_loss_per_token": 1.5158510208129883, "incorrect_loss_per_token": 1.3555939594904582, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.272383689880371, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.272383689880371, "logits_per_char": -0.6361918449401855, "num_chars": 2}, {"sum_logits": -1.5158510208129883, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5158510208129883, "logits_per_char": -0.7579255104064941, "num_chars": 2}, {"sum_logits": -1.4126896858215332, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4126896858215332, "logits_per_char": -0.7063448429107666, "num_chars": 2}, {"sum_logits": -1.3817085027694702, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.3817085027694702, "logits_per_char": -0.6908542513847351, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 112, "native_id": "9-1166", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3182446956634521, "incorrect_loss_raw": 1.4464388291041057, "correct_loss_per_char": 0.6591223478317261, "incorrect_loss_per_char": 0.7232194145520529, "correct_loss_per_token": 1.3182446956634521, "incorrect_loss_per_token": 1.4464388291041057, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.167847752571106, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": true, "logits_per_token": -1.167847752571106, "logits_per_char": -0.583923876285553, "num_chars": 2}, {"sum_logits": -1.3182446956634521, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.3182446956634521, "logits_per_char": -0.6591223478317261, "num_chars": 2}, {"sum_logits": -1.4300825595855713, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4300825595855713, "logits_per_char": -0.7150412797927856, "num_chars": 2}, {"sum_logits": -1.7413861751556396, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.7413861751556396, "logits_per_char": -0.8706930875778198, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 113, "native_id": "7-884", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4659724235534668, "incorrect_loss_raw": 1.3904369274775188, "correct_loss_per_char": 0.7329862117767334, "incorrect_loss_per_char": 0.6952184637387594, "correct_loss_per_token": 1.4659724235534668, "incorrect_loss_per_token": 1.3904369274775188, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1005322933197021, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": true, "logits_per_token": -1.1005322933197021, "logits_per_char": -0.5502661466598511, "num_chars": 2}, {"sum_logits": -1.5333284139633179, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.5333284139633179, "logits_per_char": -0.7666642069816589, "num_chars": 2}, {"sum_logits": -1.4659724235534668, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.4659724235534668, "logits_per_char": -0.7329862117767334, "num_chars": 2}, {"sum_logits": -1.5374500751495361, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.5374500751495361, "logits_per_char": -0.7687250375747681, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 114, "native_id": "9-501", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.173777461051941, "incorrect_loss_raw": 1.4824609359105427, "correct_loss_per_char": 0.5868887305259705, "incorrect_loss_per_char": 0.7412304679552714, "correct_loss_per_token": 1.173777461051941, "incorrect_loss_per_token": 1.4824609359105427, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.173777461051941, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": true, "logits_per_token": -1.173777461051941, "logits_per_char": -0.5868887305259705, "num_chars": 2}, {"sum_logits": -1.4966567754745483, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4966567754745483, "logits_per_char": -0.7483283877372742, "num_chars": 2}, {"sum_logits": -1.5112595558166504, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.5112595558166504, "logits_per_char": -0.7556297779083252, "num_chars": 2}, {"sum_logits": -1.4394664764404297, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4394664764404297, "logits_per_char": -0.7197332382202148, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 115, "native_id": "9-757", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4262341260910034, "incorrect_loss_raw": 1.3837382793426514, "correct_loss_per_char": 0.7131170630455017, "incorrect_loss_per_char": 0.6918691396713257, "correct_loss_per_token": 1.4262341260910034, "incorrect_loss_per_token": 1.3837382793426514, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3160287141799927, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": true, "logits_per_token": -1.3160287141799927, "logits_per_char": -0.6580143570899963, "num_chars": 2}, {"sum_logits": -1.412092685699463, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.412092685699463, "logits_per_char": -0.7060463428497314, "num_chars": 2}, {"sum_logits": -1.4262341260910034, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4262341260910034, "logits_per_char": -0.7131170630455017, "num_chars": 2}, {"sum_logits": -1.4230934381484985, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4230934381484985, "logits_per_char": -0.7115467190742493, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 116, "native_id": "7-725", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5268299579620361, "incorrect_loss_raw": 1.3588144381841023, "correct_loss_per_char": 0.7634149789810181, "incorrect_loss_per_char": 0.6794072190920512, "correct_loss_per_token": 1.5268299579620361, "incorrect_loss_per_token": 1.3588144381841023, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2069138288497925, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": true, "logits_per_token": -1.2069138288497925, "logits_per_char": -0.6034569144248962, "num_chars": 2}, {"sum_logits": -1.4748930931091309, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4748930931091309, "logits_per_char": -0.7374465465545654, "num_chars": 2}, {"sum_logits": -1.3946363925933838, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.3946363925933838, "logits_per_char": -0.6973181962966919, "num_chars": 2}, {"sum_logits": -1.5268299579620361, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.5268299579620361, "logits_per_char": -0.7634149789810181, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 117, "native_id": "1300", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4371272325515747, "incorrect_loss_raw": 1.3797205686569214, "correct_loss_per_char": 0.7185636162757874, "incorrect_loss_per_char": 0.6898602843284607, "correct_loss_per_token": 1.4371272325515747, "incorrect_loss_per_token": 1.3797205686569214, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3702563047409058, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.3702563047409058, "logits_per_char": -0.6851281523704529, "num_chars": 2}, {"sum_logits": -1.330590009689331, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": true, "logits_per_token": -1.330590009689331, "logits_per_char": -0.6652950048446655, "num_chars": 2}, {"sum_logits": -1.4383153915405273, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.4383153915405273, "logits_per_char": -0.7191576957702637, "num_chars": 2}, {"sum_logits": -1.4371272325515747, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.4371272325515747, "logits_per_char": -0.7185636162757874, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 118, "native_id": "9-230", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3096271753311157, "incorrect_loss_raw": 1.4264397223790486, "correct_loss_per_char": 0.6548135876655579, "incorrect_loss_per_char": 0.7132198611895243, "correct_loss_per_token": 1.3096271753311157, "incorrect_loss_per_token": 1.4264397223790486, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3096271753311157, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": true, "logits_per_token": -1.3096271753311157, "logits_per_char": -0.6548135876655579, "num_chars": 2}, {"sum_logits": -1.5331627130508423, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.5331627130508423, "logits_per_char": -0.7665813565254211, "num_chars": 2}, {"sum_logits": -1.3738993406295776, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.3738993406295776, "logits_per_char": -0.6869496703147888, "num_chars": 2}, {"sum_logits": -1.372257113456726, "num_tokens": 1, "num_tokens_all": 291, "is_greedy": false, "logits_per_token": -1.372257113456726, "logits_per_char": -0.686128556728363, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 119, "native_id": "9-988", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5175813436508179, "incorrect_loss_raw": 1.374492605527242, "correct_loss_per_char": 0.7587906718254089, "incorrect_loss_per_char": 0.687246302763621, "correct_loss_per_token": 1.5175813436508179, "incorrect_loss_per_token": 1.374492605527242, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1013927459716797, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": true, "logits_per_token": -1.1013927459716797, "logits_per_char": -0.5506963729858398, "num_chars": 2}, {"sum_logits": -1.5175813436508179, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.5175813436508179, "logits_per_char": -0.7587906718254089, "num_chars": 2}, {"sum_logits": -1.545466661453247, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.545466661453247, "logits_per_char": -0.7727333307266235, "num_chars": 2}, {"sum_logits": -1.4766184091567993, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.4766184091567993, "logits_per_char": -0.7383092045783997, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 120, "native_id": "9-393", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2743686437606812, "incorrect_loss_raw": 1.440450628598531, "correct_loss_per_char": 0.6371843218803406, "incorrect_loss_per_char": 0.7202253142992655, "correct_loss_per_token": 1.2743686437606812, "incorrect_loss_per_token": 1.440450628598531, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2743686437606812, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": true, "logits_per_token": -1.2743686437606812, "logits_per_char": -0.6371843218803406, "num_chars": 2}, {"sum_logits": -1.4476473331451416, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.4476473331451416, "logits_per_char": -0.7238236665725708, "num_chars": 2}, {"sum_logits": -1.319045901298523, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.319045901298523, "logits_per_char": -0.6595229506492615, "num_chars": 2}, {"sum_logits": -1.5546586513519287, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.5546586513519287, "logits_per_char": -0.7773293256759644, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 121, "native_id": "7-823", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5167816877365112, "incorrect_loss_raw": 1.3589963515599568, "correct_loss_per_char": 0.7583908438682556, "incorrect_loss_per_char": 0.6794981757799784, "correct_loss_per_token": 1.5167816877365112, "incorrect_loss_per_token": 1.3589963515599568, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2200907468795776, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": true, "logits_per_token": -1.2200907468795776, "logits_per_char": -0.6100453734397888, "num_chars": 2}, {"sum_logits": -1.430492877960205, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.430492877960205, "logits_per_char": -0.7152464389801025, "num_chars": 2}, {"sum_logits": -1.426405429840088, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.426405429840088, "logits_per_char": -0.713202714920044, "num_chars": 2}, {"sum_logits": -1.5167816877365112, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.5167816877365112, "logits_per_char": -0.7583908438682556, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 122, "native_id": "9-24", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.202352523803711, "incorrect_loss_raw": 1.46491273244222, "correct_loss_per_char": 0.6011762619018555, "incorrect_loss_per_char": 0.73245636622111, "correct_loss_per_token": 1.202352523803711, "incorrect_loss_per_token": 1.46491273244222, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.202352523803711, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.202352523803711, "logits_per_char": -0.6011762619018555, "num_chars": 2}, {"sum_logits": -1.4385477304458618, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4385477304458618, "logits_per_char": -0.7192738652229309, "num_chars": 2}, {"sum_logits": -1.4629830121994019, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4629830121994019, "logits_per_char": -0.7314915060997009, "num_chars": 2}, {"sum_logits": -1.4932074546813965, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4932074546813965, "logits_per_char": -0.7466037273406982, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 123, "native_id": "570", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.442431926727295, "incorrect_loss_raw": 1.3934112787246704, "correct_loss_per_char": 0.7212159633636475, "incorrect_loss_per_char": 0.6967056393623352, "correct_loss_per_token": 1.442431926727295, "incorrect_loss_per_token": 1.3934112787246704, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1375610828399658, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.1375610828399658, "logits_per_char": -0.5687805414199829, "num_chars": 2}, {"sum_logits": -1.514426827430725, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.514426827430725, "logits_per_char": -0.7572134137153625, "num_chars": 2}, {"sum_logits": -1.5282459259033203, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.5282459259033203, "logits_per_char": -0.7641229629516602, "num_chars": 2}, {"sum_logits": -1.442431926727295, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.442431926727295, "logits_per_char": -0.7212159633636475, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 124, "native_id": "9-124", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4318283796310425, "incorrect_loss_raw": 1.3912876844406128, "correct_loss_per_char": 0.7159141898155212, "incorrect_loss_per_char": 0.6956438422203064, "correct_loss_per_token": 1.4318283796310425, "incorrect_loss_per_token": 1.3912876844406128, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2357157468795776, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": true, "logits_per_token": -1.2357157468795776, "logits_per_char": -0.6178578734397888, "num_chars": 2}, {"sum_logits": -1.4318283796310425, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4318283796310425, "logits_per_char": -0.7159141898155212, "num_chars": 2}, {"sum_logits": -1.3789373636245728, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.3789373636245728, "logits_per_char": -0.6894686818122864, "num_chars": 2}, {"sum_logits": -1.559209942817688, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.559209942817688, "logits_per_char": -0.779604971408844, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 125, "native_id": "9-199", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.473307490348816, "incorrect_loss_raw": 1.3749701976776123, "correct_loss_per_char": 0.736653745174408, "incorrect_loss_per_char": 0.6874850988388062, "correct_loss_per_token": 1.473307490348816, "incorrect_loss_per_token": 1.3749701976776123, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.473307490348816, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.473307490348816, "logits_per_char": -0.736653745174408, "num_chars": 2}, {"sum_logits": -1.518869161605835, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.518869161605835, "logits_per_char": -0.7594345808029175, "num_chars": 2}, {"sum_logits": -1.2616815567016602, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": true, "logits_per_token": -1.2616815567016602, "logits_per_char": -0.6308407783508301, "num_chars": 2}, {"sum_logits": -1.3443598747253418, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.3443598747253418, "logits_per_char": -0.6721799373626709, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 126, "native_id": "767", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1920968294143677, "incorrect_loss_raw": 1.4764726161956787, "correct_loss_per_char": 0.5960484147071838, "incorrect_loss_per_char": 0.7382363080978394, "correct_loss_per_token": 1.1920968294143677, "incorrect_loss_per_token": 1.4764726161956787, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1920968294143677, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.1920968294143677, "logits_per_char": -0.5960484147071838, "num_chars": 2}, {"sum_logits": -1.555396318435669, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.555396318435669, "logits_per_char": -0.7776981592178345, "num_chars": 2}, {"sum_logits": -1.3121811151504517, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.3121811151504517, "logits_per_char": -0.6560905575752258, "num_chars": 2}, {"sum_logits": -1.5618404150009155, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.5618404150009155, "logits_per_char": -0.7809202075004578, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 127, "native_id": "28", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2492603063583374, "incorrect_loss_raw": 1.4658072392145793, "correct_loss_per_char": 0.6246301531791687, "incorrect_loss_per_char": 0.7329036196072897, "correct_loss_per_token": 1.2492603063583374, "incorrect_loss_per_token": 1.4658072392145793, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.280007243156433, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.280007243156433, "logits_per_char": -0.6400036215782166, "num_chars": 2}, {"sum_logits": -1.7604343891143799, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.7604343891143799, "logits_per_char": -0.8802171945571899, "num_chars": 2}, {"sum_logits": -1.3569800853729248, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.3569800853729248, "logits_per_char": -0.6784900426864624, "num_chars": 2}, {"sum_logits": -1.2492603063583374, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.2492603063583374, "logits_per_char": -0.6246301531791687, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 128, "native_id": "9-1134", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4457095861434937, "incorrect_loss_raw": 1.393107016881307, "correct_loss_per_char": 0.7228547930717468, "incorrect_loss_per_char": 0.6965535084406534, "correct_loss_per_token": 1.4457095861434937, "incorrect_loss_per_token": 1.393107016881307, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1846750974655151, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.1846750974655151, "logits_per_char": -0.5923375487327576, "num_chars": 2}, {"sum_logits": -1.4457095861434937, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4457095861434937, "logits_per_char": -0.7228547930717468, "num_chars": 2}, {"sum_logits": -1.3988219499588013, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.3988219499588013, "logits_per_char": -0.6994109749794006, "num_chars": 2}, {"sum_logits": -1.5958240032196045, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.5958240032196045, "logits_per_char": -0.7979120016098022, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 129, "native_id": "9-1030", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.440747857093811, "incorrect_loss_raw": 1.3833809693654378, "correct_loss_per_char": 0.7203739285469055, "incorrect_loss_per_char": 0.6916904846827189, "correct_loss_per_token": 1.440747857093811, "incorrect_loss_per_token": 1.3833809693654378, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2311921119689941, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": true, "logits_per_token": -1.2311921119689941, "logits_per_char": -0.6155960559844971, "num_chars": 2}, {"sum_logits": -1.4390960931777954, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.4390960931777954, "logits_per_char": -0.7195480465888977, "num_chars": 2}, {"sum_logits": -1.440747857093811, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.440747857093811, "logits_per_char": -0.7203739285469055, "num_chars": 2}, {"sum_logits": -1.479854702949524, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.479854702949524, "logits_per_char": -0.739927351474762, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 130, "native_id": "9-18", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3624098300933838, "incorrect_loss_raw": 1.4050463438034058, "correct_loss_per_char": 0.6812049150466919, "incorrect_loss_per_char": 0.7025231719017029, "correct_loss_per_token": 1.3624098300933838, "incorrect_loss_per_token": 1.4050463438034058, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3153349161148071, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": true, "logits_per_token": -1.3153349161148071, "logits_per_char": -0.6576674580574036, "num_chars": 2}, {"sum_logits": -1.3624098300933838, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.3624098300933838, "logits_per_char": -0.6812049150466919, "num_chars": 2}, {"sum_logits": -1.5127863883972168, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.5127863883972168, "logits_per_char": -0.7563931941986084, "num_chars": 2}, {"sum_logits": -1.3870177268981934, "num_tokens": 1, "num_tokens_all": 289, "is_greedy": false, "logits_per_token": -1.3870177268981934, "logits_per_char": -0.6935088634490967, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 131, "native_id": "8-378", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4090567827224731, "incorrect_loss_raw": 1.3889238834381104, "correct_loss_per_char": 0.7045283913612366, "incorrect_loss_per_char": 0.6944619417190552, "correct_loss_per_token": 1.4090567827224731, "incorrect_loss_per_token": 1.3889238834381104, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2906030416488647, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": true, "logits_per_token": -1.2906030416488647, "logits_per_char": -0.6453015208244324, "num_chars": 2}, {"sum_logits": -1.350080132484436, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.350080132484436, "logits_per_char": -0.675040066242218, "num_chars": 2}, {"sum_logits": -1.5260884761810303, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.5260884761810303, "logits_per_char": -0.7630442380905151, "num_chars": 2}, {"sum_logits": -1.4090567827224731, "num_tokens": 1, "num_tokens_all": 302, "is_greedy": false, "logits_per_token": -1.4090567827224731, "logits_per_char": -0.7045283913612366, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 132, "native_id": "7-677", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.425162672996521, "incorrect_loss_raw": 1.3843305110931396, "correct_loss_per_char": 0.7125813364982605, "incorrect_loss_per_char": 0.6921652555465698, "correct_loss_per_token": 1.425162672996521, "incorrect_loss_per_token": 1.3843305110931396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2575169801712036, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": true, "logits_per_token": -1.2575169801712036, "logits_per_char": -0.6287584900856018, "num_chars": 2}, {"sum_logits": -1.4958773851394653, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.4958773851394653, "logits_per_char": -0.7479386925697327, "num_chars": 2}, {"sum_logits": -1.425162672996521, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.425162672996521, "logits_per_char": -0.7125813364982605, "num_chars": 2}, {"sum_logits": -1.39959716796875, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.39959716796875, "logits_per_char": -0.699798583984375, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 133, "native_id": "9-786", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.095961093902588, "incorrect_loss_raw": 1.5159800450007122, "correct_loss_per_char": 0.547980546951294, "incorrect_loss_per_char": 0.7579900225003561, "correct_loss_per_token": 1.095961093902588, "incorrect_loss_per_token": 1.5159800450007122, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.095961093902588, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": true, "logits_per_token": -1.095961093902588, "logits_per_char": -0.547980546951294, "num_chars": 2}, {"sum_logits": -1.4876376390457153, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.4876376390457153, "logits_per_char": -0.7438188195228577, "num_chars": 2}, {"sum_logits": -1.4714818000793457, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.4714818000793457, "logits_per_char": -0.7357409000396729, "num_chars": 2}, {"sum_logits": -1.5888206958770752, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.5888206958770752, "logits_per_char": -0.7944103479385376, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 134, "native_id": "9-463", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5048967599868774, "incorrect_loss_raw": 1.3635305563608806, "correct_loss_per_char": 0.7524483799934387, "incorrect_loss_per_char": 0.6817652781804403, "correct_loss_per_token": 1.5048967599868774, "incorrect_loss_per_token": 1.3635305563608806, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2604608535766602, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.2604608535766602, "logits_per_char": -0.6302304267883301, "num_chars": 2}, {"sum_logits": -1.4688812494277954, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4688812494277954, "logits_per_char": -0.7344406247138977, "num_chars": 2}, {"sum_logits": -1.361249566078186, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.361249566078186, "logits_per_char": -0.680624783039093, "num_chars": 2}, {"sum_logits": -1.5048967599868774, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.5048967599868774, "logits_per_char": -0.7524483799934387, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 135, "native_id": "7-71", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1294691562652588, "incorrect_loss_raw": 1.4969839255015056, "correct_loss_per_char": 0.5647345781326294, "incorrect_loss_per_char": 0.7484919627507528, "correct_loss_per_token": 1.1294691562652588, "incorrect_loss_per_token": 1.4969839255015056, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1294691562652588, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": true, "logits_per_token": -1.1294691562652588, "logits_per_char": -0.5647345781326294, "num_chars": 2}, {"sum_logits": -1.4880985021591187, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.4880985021591187, "logits_per_char": -0.7440492510795593, "num_chars": 2}, {"sum_logits": -1.4707167148590088, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.4707167148590088, "logits_per_char": -0.7353583574295044, "num_chars": 2}, {"sum_logits": -1.5321365594863892, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.5321365594863892, "logits_per_char": -0.7660682797431946, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 136, "native_id": "9-1053", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4237014055252075, "incorrect_loss_raw": 1.3892339865366619, "correct_loss_per_char": 0.7118507027626038, "incorrect_loss_per_char": 0.6946169932683309, "correct_loss_per_token": 1.4237014055252075, "incorrect_loss_per_token": 1.3892339865366619, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.425182819366455, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.425182819366455, "logits_per_char": -0.7125914096832275, "num_chars": 2}, {"sum_logits": -1.4073128700256348, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4073128700256348, "logits_per_char": -0.7036564350128174, "num_chars": 2}, {"sum_logits": -1.3352062702178955, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": true, "logits_per_token": -1.3352062702178955, "logits_per_char": -0.6676031351089478, "num_chars": 2}, {"sum_logits": -1.4237014055252075, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4237014055252075, "logits_per_char": -0.7118507027626038, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 137, "native_id": "9-437", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4356194734573364, "incorrect_loss_raw": 1.3993022839228313, "correct_loss_per_char": 0.7178097367286682, "incorrect_loss_per_char": 0.6996511419614156, "correct_loss_per_token": 1.4356194734573364, "incorrect_loss_per_token": 1.3993022839228313, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1320505142211914, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": true, "logits_per_token": -1.1320505142211914, "logits_per_char": -0.5660252571105957, "num_chars": 2}, {"sum_logits": -1.4356194734573364, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.4356194734573364, "logits_per_char": -0.7178097367286682, "num_chars": 2}, {"sum_logits": -1.5163073539733887, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.5163073539733887, "logits_per_char": -0.7581536769866943, "num_chars": 2}, {"sum_logits": -1.5495489835739136, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.5495489835739136, "logits_per_char": -0.7747744917869568, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 138, "native_id": "1787", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4195775985717773, "incorrect_loss_raw": 1.3920604387919109, "correct_loss_per_char": 0.7097887992858887, "incorrect_loss_per_char": 0.6960302193959554, "correct_loss_per_token": 1.4195775985717773, "incorrect_loss_per_token": 1.3920604387919109, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1994731426239014, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": true, "logits_per_token": -1.1994731426239014, "logits_per_char": -0.5997365713119507, "num_chars": 2}, {"sum_logits": -1.4195775985717773, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4195775985717773, "logits_per_char": -0.7097887992858887, "num_chars": 2}, {"sum_logits": -1.449539303779602, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.449539303779602, "logits_per_char": -0.724769651889801, "num_chars": 2}, {"sum_logits": -1.527168869972229, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.527168869972229, "logits_per_char": -0.7635844349861145, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 139, "native_id": "7-107", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2729260921478271, "incorrect_loss_raw": 1.4535682996114094, "correct_loss_per_char": 0.6364630460739136, "incorrect_loss_per_char": 0.7267841498057047, "correct_loss_per_token": 1.2729260921478271, "incorrect_loss_per_token": 1.4535682996114094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2729260921478271, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": true, "logits_per_token": -1.2729260921478271, "logits_per_char": -0.6364630460739136, "num_chars": 2}, {"sum_logits": -1.6426997184753418, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.6426997184753418, "logits_per_char": -0.8213498592376709, "num_chars": 2}, {"sum_logits": -1.3591256141662598, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.3591256141662598, "logits_per_char": -0.6795628070831299, "num_chars": 2}, {"sum_logits": -1.358879566192627, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.358879566192627, "logits_per_char": -0.6794397830963135, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 140, "native_id": "769", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3747062683105469, "incorrect_loss_raw": 1.4246691862742107, "correct_loss_per_char": 0.6873531341552734, "incorrect_loss_per_char": 0.7123345931371053, "correct_loss_per_token": 1.3747062683105469, "incorrect_loss_per_token": 1.4246691862742107, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1215217113494873, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": true, "logits_per_token": -1.1215217113494873, "logits_per_char": -0.5607608556747437, "num_chars": 2}, {"sum_logits": -1.505691647529602, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.505691647529602, "logits_per_char": -0.752845823764801, "num_chars": 2}, {"sum_logits": -1.3747062683105469, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.3747062683105469, "logits_per_char": -0.6873531341552734, "num_chars": 2}, {"sum_logits": -1.6467941999435425, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.6467941999435425, "logits_per_char": -0.8233970999717712, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 141, "native_id": "9-73", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.396752953529358, "incorrect_loss_raw": 1.3953103224436443, "correct_loss_per_char": 0.698376476764679, "incorrect_loss_per_char": 0.6976551612218221, "correct_loss_per_token": 1.396752953529358, "incorrect_loss_per_token": 1.3953103224436443, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.396752953529358, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.396752953529358, "logits_per_char": -0.698376476764679, "num_chars": 2}, {"sum_logits": -1.4592705965042114, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.4592705965042114, "logits_per_char": -0.7296352982521057, "num_chars": 2}, {"sum_logits": -1.3414576053619385, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": true, "logits_per_token": -1.3414576053619385, "logits_per_char": -0.6707288026809692, "num_chars": 2}, {"sum_logits": -1.3852027654647827, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.3852027654647827, "logits_per_char": -0.6926013827323914, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 142, "native_id": "9-1194", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5814305543899536, "incorrect_loss_raw": 1.3474949200948079, "correct_loss_per_char": 0.7907152771949768, "incorrect_loss_per_char": 0.6737474600474039, "correct_loss_per_token": 1.5814305543899536, "incorrect_loss_per_token": 1.3474949200948079, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.189256191253662, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": true, "logits_per_token": -1.189256191253662, "logits_per_char": -0.594628095626831, "num_chars": 2}, {"sum_logits": -1.463030219078064, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.463030219078064, "logits_per_char": -0.731515109539032, "num_chars": 2}, {"sum_logits": -1.3901983499526978, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.3901983499526978, "logits_per_char": -0.6950991749763489, "num_chars": 2}, {"sum_logits": -1.5814305543899536, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.5814305543899536, "logits_per_char": -0.7907152771949768, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 143, "native_id": "9-416", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4281033277511597, "incorrect_loss_raw": 1.386202335357666, "correct_loss_per_char": 0.7140516638755798, "incorrect_loss_per_char": 0.693101167678833, "correct_loss_per_token": 1.4281033277511597, "incorrect_loss_per_token": 1.386202335357666, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.273808479309082, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.273808479309082, "logits_per_char": -0.636904239654541, "num_chars": 2}, {"sum_logits": -1.4264932870864868, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4264932870864868, "logits_per_char": -0.7132466435432434, "num_chars": 2}, {"sum_logits": -1.4583052396774292, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4583052396774292, "logits_per_char": -0.7291526198387146, "num_chars": 2}, {"sum_logits": -1.4281033277511597, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4281033277511597, "logits_per_char": -0.7140516638755798, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 144, "native_id": "470", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2405309677124023, "incorrect_loss_raw": 1.4499495029449463, "correct_loss_per_char": 0.6202654838562012, "incorrect_loss_per_char": 0.7249747514724731, "correct_loss_per_token": 1.2405309677124023, "incorrect_loss_per_token": 1.4499495029449463, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2405309677124023, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": true, "logits_per_token": -1.2405309677124023, "logits_per_char": -0.6202654838562012, "num_chars": 2}, {"sum_logits": -1.5609887838363647, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.5609887838363647, "logits_per_char": -0.7804943919181824, "num_chars": 2}, {"sum_logits": -1.3695805072784424, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.3695805072784424, "logits_per_char": -0.6847902536392212, "num_chars": 2}, {"sum_logits": -1.4192792177200317, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4192792177200317, "logits_per_char": -0.7096396088600159, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 145, "native_id": "1297", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2953548431396484, "incorrect_loss_raw": 1.4362325270970662, "correct_loss_per_char": 0.6476774215698242, "incorrect_loss_per_char": 0.7181162635485331, "correct_loss_per_token": 1.2953548431396484, "incorrect_loss_per_token": 1.4362325270970662, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.374648094177246, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.374648094177246, "logits_per_char": -0.687324047088623, "num_chars": 2}, {"sum_logits": -1.6166515350341797, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.6166515350341797, "logits_per_char": -0.8083257675170898, "num_chars": 2}, {"sum_logits": -1.317397952079773, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.317397952079773, "logits_per_char": -0.6586989760398865, "num_chars": 2}, {"sum_logits": -1.2953548431396484, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": true, "logits_per_token": -1.2953548431396484, "logits_per_char": -0.6476774215698242, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 146, "native_id": "8-346", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3650175333023071, "incorrect_loss_raw": 1.416130542755127, "correct_loss_per_char": 0.6825087666511536, "incorrect_loss_per_char": 0.7080652713775635, "correct_loss_per_token": 1.3650175333023071, "incorrect_loss_per_token": 1.416130542755127, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3650175333023071, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.3650175333023071, "logits_per_char": -0.6825087666511536, "num_chars": 2}, {"sum_logits": -1.5411287546157837, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.5411287546157837, "logits_per_char": -0.7705643773078918, "num_chars": 2}, {"sum_logits": -1.4655276536941528, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4655276536941528, "logits_per_char": -0.7327638268470764, "num_chars": 2}, {"sum_logits": -1.2417352199554443, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": true, "logits_per_token": -1.2417352199554443, "logits_per_char": -0.6208676099777222, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 147, "native_id": "7-807", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1132166385650635, "incorrect_loss_raw": 1.5152138471603394, "correct_loss_per_char": 0.5566083192825317, "incorrect_loss_per_char": 0.7576069235801697, "correct_loss_per_token": 1.1132166385650635, "incorrect_loss_per_token": 1.5152138471603394, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1132166385650635, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": true, "logits_per_token": -1.1132166385650635, "logits_per_char": -0.5566083192825317, "num_chars": 2}, {"sum_logits": -1.338990569114685, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.338990569114685, "logits_per_char": -0.6694952845573425, "num_chars": 2}, {"sum_logits": -1.5397052764892578, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.5397052764892578, "logits_per_char": -0.7698526382446289, "num_chars": 2}, {"sum_logits": -1.6669456958770752, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.6669456958770752, "logits_per_char": -0.8334728479385376, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 148, "native_id": "8-463", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.213842511177063, "incorrect_loss_raw": 1.4608407020568848, "correct_loss_per_char": 0.6069212555885315, "incorrect_loss_per_char": 0.7304203510284424, "correct_loss_per_token": 1.213842511177063, "incorrect_loss_per_token": 1.4608407020568848, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.213842511177063, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": true, "logits_per_token": -1.213842511177063, "logits_per_char": -0.6069212555885315, "num_chars": 2}, {"sum_logits": -1.474629521369934, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.474629521369934, "logits_per_char": -0.737314760684967, "num_chars": 2}, {"sum_logits": -1.5080422163009644, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.5080422163009644, "logits_per_char": -0.7540211081504822, "num_chars": 2}, {"sum_logits": -1.3998503684997559, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.3998503684997559, "logits_per_char": -0.6999251842498779, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 149, "native_id": "9-110", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.276686668395996, "incorrect_loss_raw": 1.4378873904546101, "correct_loss_per_char": 0.638343334197998, "incorrect_loss_per_char": 0.7189436952273051, "correct_loss_per_token": 1.276686668395996, "incorrect_loss_per_token": 1.4378873904546101, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.276686668395996, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": true, "logits_per_token": -1.276686668395996, "logits_per_char": -0.638343334197998, "num_chars": 2}, {"sum_logits": -1.3658586740493774, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.3658586740493774, "logits_per_char": -0.6829293370246887, "num_chars": 2}, {"sum_logits": -1.40996515750885, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.40996515750885, "logits_per_char": -0.704982578754425, "num_chars": 2}, {"sum_logits": -1.537838339805603, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.537838339805603, "logits_per_char": -0.7689191699028015, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 150, "native_id": "1611", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4051306247711182, "incorrect_loss_raw": 1.3861221472422283, "correct_loss_per_char": 0.7025653123855591, "incorrect_loss_per_char": 0.6930610736211141, "correct_loss_per_token": 1.4051306247711182, "incorrect_loss_per_token": 1.3861221472422283, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3952395915985107, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.3952395915985107, "logits_per_char": -0.6976197957992554, "num_chars": 2}, {"sum_logits": -1.4278690814971924, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.4278690814971924, "logits_per_char": -0.7139345407485962, "num_chars": 2}, {"sum_logits": -1.4051306247711182, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.4051306247711182, "logits_per_char": -0.7025653123855591, "num_chars": 2}, {"sum_logits": -1.3352577686309814, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.3352577686309814, "logits_per_char": -0.6676288843154907, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 151, "native_id": "9-942", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3902655839920044, "incorrect_loss_raw": 1.3946055968602498, "correct_loss_per_char": 0.6951327919960022, "incorrect_loss_per_char": 0.6973027984301249, "correct_loss_per_token": 1.3902655839920044, "incorrect_loss_per_token": 1.3946055968602498, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.371626853942871, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.371626853942871, "logits_per_char": -0.6858134269714355, "num_chars": 2}, {"sum_logits": -1.4456634521484375, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4456634521484375, "logits_per_char": -0.7228317260742188, "num_chars": 2}, {"sum_logits": -1.366526484489441, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.366526484489441, "logits_per_char": -0.6832632422447205, "num_chars": 2}, {"sum_logits": -1.3902655839920044, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.3902655839920044, "logits_per_char": -0.6951327919960022, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 152, "native_id": "9-1102", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5749120712280273, "incorrect_loss_raw": 1.3418097496032715, "correct_loss_per_char": 0.7874560356140137, "incorrect_loss_per_char": 0.6709048748016357, "correct_loss_per_token": 1.5749120712280273, "incorrect_loss_per_token": 1.3418097496032715, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2772927284240723, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.2772927284240723, "logits_per_char": -0.6386463642120361, "num_chars": 2}, {"sum_logits": -1.5749120712280273, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.5749120712280273, "logits_per_char": -0.7874560356140137, "num_chars": 2}, {"sum_logits": -1.3644009828567505, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.3644009828567505, "logits_per_char": -0.6822004914283752, "num_chars": 2}, {"sum_logits": -1.3837355375289917, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.3837355375289917, "logits_per_char": -0.6918677687644958, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 153, "native_id": "9-774", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4616965055465698, "incorrect_loss_raw": 1.374687910079956, "correct_loss_per_char": 0.7308482527732849, "incorrect_loss_per_char": 0.687343955039978, "correct_loss_per_token": 1.4616965055465698, "incorrect_loss_per_token": 1.374687910079956, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3468388319015503, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.3468388319015503, "logits_per_char": -0.6734194159507751, "num_chars": 2}, {"sum_logits": -1.512984037399292, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.512984037399292, "logits_per_char": -0.756492018699646, "num_chars": 2}, {"sum_logits": -1.4616965055465698, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.4616965055465698, "logits_per_char": -0.7308482527732849, "num_chars": 2}, {"sum_logits": -1.2642408609390259, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.2642408609390259, "logits_per_char": -0.6321204304695129, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 154, "native_id": "8-333", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.248374104499817, "incorrect_loss_raw": 1.444720943768819, "correct_loss_per_char": 0.6241870522499084, "incorrect_loss_per_char": 0.7223604718844095, "correct_loss_per_token": 1.248374104499817, "incorrect_loss_per_token": 1.444720943768819, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.248374104499817, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": true, "logits_per_token": -1.248374104499817, "logits_per_char": -0.6241870522499084, "num_chars": 2}, {"sum_logits": -1.4949274063110352, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.4949274063110352, "logits_per_char": -0.7474637031555176, "num_chars": 2}, {"sum_logits": -1.465173363685608, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.465173363685608, "logits_per_char": -0.732586681842804, "num_chars": 2}, {"sum_logits": -1.3740620613098145, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.3740620613098145, "logits_per_char": -0.6870310306549072, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 155, "native_id": "9-573", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4781955480575562, "incorrect_loss_raw": 1.3721566200256348, "correct_loss_per_char": 0.7390977740287781, "incorrect_loss_per_char": 0.6860783100128174, "correct_loss_per_token": 1.4781955480575562, "incorrect_loss_per_token": 1.3721566200256348, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2709686756134033, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": true, "logits_per_token": -1.2709686756134033, "logits_per_char": -0.6354843378067017, "num_chars": 2}, {"sum_logits": -1.4781955480575562, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.4781955480575562, "logits_per_char": -0.7390977740287781, "num_chars": 2}, {"sum_logits": -1.3660832643508911, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.3660832643508911, "logits_per_char": -0.6830416321754456, "num_chars": 2}, {"sum_logits": -1.4794179201126099, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.4794179201126099, "logits_per_char": -0.7397089600563049, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 156, "native_id": "1955", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2609552145004272, "incorrect_loss_raw": 1.441567341486613, "correct_loss_per_char": 0.6304776072502136, "incorrect_loss_per_char": 0.7207836707433065, "correct_loss_per_token": 1.2609552145004272, "incorrect_loss_per_token": 1.441567341486613, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2609552145004272, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": true, "logits_per_token": -1.2609552145004272, "logits_per_char": -0.6304776072502136, "num_chars": 2}, {"sum_logits": -1.5020039081573486, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.5020039081573486, "logits_per_char": -0.7510019540786743, "num_chars": 2}, {"sum_logits": -1.3906885385513306, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.3906885385513306, "logits_per_char": -0.6953442692756653, "num_chars": 2}, {"sum_logits": -1.4320095777511597, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.4320095777511597, "logits_per_char": -0.7160047888755798, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 157, "native_id": "8-45", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4714250564575195, "incorrect_loss_raw": 1.3741167783737183, "correct_loss_per_char": 0.7357125282287598, "incorrect_loss_per_char": 0.6870583891868591, "correct_loss_per_token": 1.4714250564575195, "incorrect_loss_per_token": 1.3741167783737183, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.235042691230774, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.235042691230774, "logits_per_char": -0.617521345615387, "num_chars": 2}, {"sum_logits": -1.5259093046188354, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.5259093046188354, "logits_per_char": -0.7629546523094177, "num_chars": 2}, {"sum_logits": -1.4714250564575195, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4714250564575195, "logits_per_char": -0.7357125282287598, "num_chars": 2}, {"sum_logits": -1.3613983392715454, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.3613983392715454, "logits_per_char": -0.6806991696357727, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 158, "native_id": "9-674", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4149709939956665, "incorrect_loss_raw": 1.3957420984903972, "correct_loss_per_char": 0.7074854969978333, "incorrect_loss_per_char": 0.6978710492451986, "correct_loss_per_token": 1.4149709939956665, "incorrect_loss_per_token": 1.3957420984903972, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2145473957061768, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": true, "logits_per_token": -1.2145473957061768, "logits_per_char": -0.6072736978530884, "num_chars": 2}, {"sum_logits": -1.5554031133651733, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.5554031133651733, "logits_per_char": -0.7777015566825867, "num_chars": 2}, {"sum_logits": -1.4149709939956665, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4149709939956665, "logits_per_char": -0.7074854969978333, "num_chars": 2}, {"sum_logits": -1.4172757863998413, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4172757863998413, "logits_per_char": -0.7086378931999207, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 159, "native_id": "898", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3385475873947144, "incorrect_loss_raw": 1.433294415473938, "correct_loss_per_char": 0.6692737936973572, "incorrect_loss_per_char": 0.716647207736969, "correct_loss_per_token": 1.3385475873947144, "incorrect_loss_per_token": 1.433294415473938, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1885530948638916, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": true, "logits_per_token": -1.1885530948638916, "logits_per_char": -0.5942765474319458, "num_chars": 2}, {"sum_logits": -1.3385475873947144, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.3385475873947144, "logits_per_char": -0.6692737936973572, "num_chars": 2}, {"sum_logits": -1.3992165327072144, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.3992165327072144, "logits_per_char": -0.6996082663536072, "num_chars": 2}, {"sum_logits": -1.712113618850708, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.712113618850708, "logits_per_char": -0.856056809425354, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 160, "native_id": "7-1159", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.464365839958191, "incorrect_loss_raw": 1.374221642812093, "correct_loss_per_char": 0.7321829199790955, "incorrect_loss_per_char": 0.6871108214060465, "correct_loss_per_token": 1.464365839958191, "incorrect_loss_per_token": 1.374221642812093, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2432910203933716, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": true, "logits_per_token": -1.2432910203933716, "logits_per_char": -0.6216455101966858, "num_chars": 2}, {"sum_logits": -1.3688486814498901, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.3688486814498901, "logits_per_char": -0.6844243407249451, "num_chars": 2}, {"sum_logits": -1.464365839958191, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.464365839958191, "logits_per_char": -0.7321829199790955, "num_chars": 2}, {"sum_logits": -1.5105252265930176, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.5105252265930176, "logits_per_char": -0.7552626132965088, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 161, "native_id": "568", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1870156526565552, "incorrect_loss_raw": 1.4728147586186726, "correct_loss_per_char": 0.5935078263282776, "incorrect_loss_per_char": 0.7364073793093363, "correct_loss_per_token": 1.1870156526565552, "incorrect_loss_per_token": 1.4728147586186726, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1870156526565552, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": true, "logits_per_token": -1.1870156526565552, "logits_per_char": -0.5935078263282776, "num_chars": 2}, {"sum_logits": -1.4803677797317505, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4803677797317505, "logits_per_char": -0.7401838898658752, "num_chars": 2}, {"sum_logits": -1.4580976963043213, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4580976963043213, "logits_per_char": -0.7290488481521606, "num_chars": 2}, {"sum_logits": -1.4799787998199463, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4799787998199463, "logits_per_char": -0.7399893999099731, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 162, "native_id": "9-877", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4114755392074585, "incorrect_loss_raw": 1.3888633648554485, "correct_loss_per_char": 0.7057377696037292, "incorrect_loss_per_char": 0.6944316824277242, "correct_loss_per_token": 1.4114755392074585, "incorrect_loss_per_token": 1.3888633648554485, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3211389780044556, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": true, "logits_per_token": -1.3211389780044556, "logits_per_char": -0.6605694890022278, "num_chars": 2}, {"sum_logits": -1.4114755392074585, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4114755392074585, "logits_per_char": -0.7057377696037292, "num_chars": 2}, {"sum_logits": -1.4690604209899902, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4690604209899902, "logits_per_char": -0.7345302104949951, "num_chars": 2}, {"sum_logits": -1.3763906955718994, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.3763906955718994, "logits_per_char": -0.6881953477859497, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 163, "native_id": "406", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4210336208343506, "incorrect_loss_raw": 1.388205925623576, "correct_loss_per_char": 0.7105168104171753, "incorrect_loss_per_char": 0.694102962811788, "correct_loss_per_token": 1.4210336208343506, "incorrect_loss_per_token": 1.388205925623576, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2662476301193237, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": true, "logits_per_token": -1.2662476301193237, "logits_per_char": -0.6331238150596619, "num_chars": 2}, {"sum_logits": -1.3704276084899902, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.3704276084899902, "logits_per_char": -0.6852138042449951, "num_chars": 2}, {"sum_logits": -1.4210336208343506, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.4210336208343506, "logits_per_char": -0.7105168104171753, "num_chars": 2}, {"sum_logits": -1.5279425382614136, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.5279425382614136, "logits_per_char": -0.7639712691307068, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 164, "native_id": "7-1132", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4031751155853271, "incorrect_loss_raw": 1.3990796407063801, "correct_loss_per_char": 0.7015875577926636, "incorrect_loss_per_char": 0.6995398203531901, "correct_loss_per_token": 1.4031751155853271, "incorrect_loss_per_token": 1.3990796407063801, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4031751155853271, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4031751155853271, "logits_per_char": -0.7015875577926636, "num_chars": 2}, {"sum_logits": -1.350407361984253, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.350407361984253, "logits_per_char": -0.6752036809921265, "num_chars": 2}, {"sum_logits": -1.4994431734085083, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4994431734085083, "logits_per_char": -0.7497215867042542, "num_chars": 2}, {"sum_logits": -1.3473883867263794, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.3473883867263794, "logits_per_char": -0.6736941933631897, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 165, "native_id": "7-479", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4286727905273438, "incorrect_loss_raw": 1.416631023089091, "correct_loss_per_char": 0.7143363952636719, "incorrect_loss_per_char": 0.7083155115445455, "correct_loss_per_token": 1.4286727905273438, "incorrect_loss_per_token": 1.416631023089091, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2165777683258057, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": true, "logits_per_token": -1.2165777683258057, "logits_per_char": -0.6082888841629028, "num_chars": 2}, {"sum_logits": -1.78814697265625, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.78814697265625, "logits_per_char": -0.894073486328125, "num_chars": 2}, {"sum_logits": -1.4286727905273438, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.4286727905273438, "logits_per_char": -0.7143363952636719, "num_chars": 2}, {"sum_logits": -1.2451683282852173, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.2451683282852173, "logits_per_char": -0.6225841641426086, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 166, "native_id": "609", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1847492456436157, "incorrect_loss_raw": 1.478805939356486, "correct_loss_per_char": 0.5923746228218079, "incorrect_loss_per_char": 0.739402969678243, "correct_loss_per_token": 1.1847492456436157, "incorrect_loss_per_token": 1.478805939356486, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1847492456436157, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": true, "logits_per_token": -1.1847492456436157, "logits_per_char": -0.5923746228218079, "num_chars": 2}, {"sum_logits": -1.4914984703063965, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.4914984703063965, "logits_per_char": -0.7457492351531982, "num_chars": 2}, {"sum_logits": -1.367893934249878, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.367893934249878, "logits_per_char": -0.683946967124939, "num_chars": 2}, {"sum_logits": -1.5770254135131836, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.5770254135131836, "logits_per_char": -0.7885127067565918, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 167, "native_id": "1568", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3018630743026733, "incorrect_loss_raw": 1.4373836914698284, "correct_loss_per_char": 0.6509315371513367, "incorrect_loss_per_char": 0.7186918457349142, "correct_loss_per_token": 1.3018630743026733, "incorrect_loss_per_token": 1.4373836914698284, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.376319408416748, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.376319408416748, "logits_per_char": -0.688159704208374, "num_chars": 2}, {"sum_logits": -1.6443500518798828, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.6443500518798828, "logits_per_char": -0.8221750259399414, "num_chars": 2}, {"sum_logits": -1.3018630743026733, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.3018630743026733, "logits_per_char": -0.6509315371513367, "num_chars": 2}, {"sum_logits": -1.291481614112854, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": true, "logits_per_token": -1.291481614112854, "logits_per_char": -0.645740807056427, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 168, "native_id": "9-418", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.399035096168518, "incorrect_loss_raw": 1.3896973927815754, "correct_loss_per_char": 0.699517548084259, "incorrect_loss_per_char": 0.6948486963907877, "correct_loss_per_token": 1.399035096168518, "incorrect_loss_per_token": 1.3896973927815754, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3471635580062866, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.3471635580062866, "logits_per_char": -0.6735817790031433, "num_chars": 2}, {"sum_logits": -1.4110409021377563, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.4110409021377563, "logits_per_char": -0.7055204510688782, "num_chars": 2}, {"sum_logits": -1.399035096168518, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.399035096168518, "logits_per_char": -0.699517548084259, "num_chars": 2}, {"sum_logits": -1.4108877182006836, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.4108877182006836, "logits_per_char": -0.7054438591003418, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 169, "native_id": "7-1050", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.505098819732666, "incorrect_loss_raw": 1.3601232767105103, "correct_loss_per_char": 0.752549409866333, "incorrect_loss_per_char": 0.6800616383552551, "correct_loss_per_token": 1.505098819732666, "incorrect_loss_per_token": 1.3601232767105103, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2945473194122314, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": true, "logits_per_token": -1.2945473194122314, "logits_per_char": -0.6472736597061157, "num_chars": 2}, {"sum_logits": -1.505098819732666, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.505098819732666, "logits_per_char": -0.752549409866333, "num_chars": 2}, {"sum_logits": -1.3509331941604614, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.3509331941604614, "logits_per_char": -0.6754665970802307, "num_chars": 2}, {"sum_logits": -1.434889316558838, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.434889316558838, "logits_per_char": -0.717444658279419, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 170, "native_id": "9-510", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4856621026992798, "incorrect_loss_raw": 1.3666833639144897, "correct_loss_per_char": 0.7428310513496399, "incorrect_loss_per_char": 0.6833416819572449, "correct_loss_per_token": 1.4856621026992798, "incorrect_loss_per_token": 1.3666833639144897, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4139820337295532, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.4139820337295532, "logits_per_char": -0.7069910168647766, "num_chars": 2}, {"sum_logits": -1.315157175064087, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": true, "logits_per_token": -1.315157175064087, "logits_per_char": -0.6575785875320435, "num_chars": 2}, {"sum_logits": -1.4856621026992798, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.4856621026992798, "logits_per_char": -0.7428310513496399, "num_chars": 2}, {"sum_logits": -1.370910882949829, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.370910882949829, "logits_per_char": -0.6854554414749146, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 171, "native_id": "9-519", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2427273988723755, "incorrect_loss_raw": 1.448647101720174, "correct_loss_per_char": 0.6213636994361877, "incorrect_loss_per_char": 0.724323550860087, "correct_loss_per_token": 1.2427273988723755, "incorrect_loss_per_token": 1.448647101720174, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2427273988723755, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.2427273988723755, "logits_per_char": -0.6213636994361877, "num_chars": 2}, {"sum_logits": -1.4574726819992065, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4574726819992065, "logits_per_char": -0.7287363409996033, "num_chars": 2}, {"sum_logits": -1.3861020803451538, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.3861020803451538, "logits_per_char": -0.6930510401725769, "num_chars": 2}, {"sum_logits": -1.502366542816162, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.502366542816162, "logits_per_char": -0.751183271408081, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 172, "native_id": "9-637", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.454558253288269, "incorrect_loss_raw": 1.3890792926152546, "correct_loss_per_char": 0.7272791266441345, "incorrect_loss_per_char": 0.6945396463076273, "correct_loss_per_token": 1.454558253288269, "incorrect_loss_per_token": 1.3890792926152546, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1583819389343262, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": true, "logits_per_token": -1.1583819389343262, "logits_per_char": -0.5791909694671631, "num_chars": 2}, {"sum_logits": -1.560418725013733, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.560418725013733, "logits_per_char": -0.7802093625068665, "num_chars": 2}, {"sum_logits": -1.454558253288269, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.454558253288269, "logits_per_char": -0.7272791266441345, "num_chars": 2}, {"sum_logits": -1.448437213897705, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.448437213897705, "logits_per_char": -0.7242186069488525, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 173, "native_id": "473", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2175407409667969, "incorrect_loss_raw": 1.461944580078125, "correct_loss_per_char": 0.6087703704833984, "incorrect_loss_per_char": 0.7309722900390625, "correct_loss_per_token": 1.2175407409667969, "incorrect_loss_per_token": 1.461944580078125, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2175407409667969, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.2175407409667969, "logits_per_char": -0.6087703704833984, "num_chars": 2}, {"sum_logits": -1.4656416177749634, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4656416177749634, "logits_per_char": -0.7328208088874817, "num_chars": 2}, {"sum_logits": -1.420486330986023, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.420486330986023, "logits_per_char": -0.7102431654930115, "num_chars": 2}, {"sum_logits": -1.4997057914733887, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4997057914733887, "logits_per_char": -0.7498528957366943, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 174, "native_id": "8-445", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.382562518119812, "incorrect_loss_raw": 1.3987449804941814, "correct_loss_per_char": 0.691281259059906, "incorrect_loss_per_char": 0.6993724902470907, "correct_loss_per_token": 1.382562518119812, "incorrect_loss_per_token": 1.3987449804941814, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3054898977279663, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": true, "logits_per_token": -1.3054898977279663, "logits_per_char": -0.6527449488639832, "num_chars": 2}, {"sum_logits": -1.382562518119812, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.382562518119812, "logits_per_char": -0.691281259059906, "num_chars": 2}, {"sum_logits": -1.415502905845642, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.415502905845642, "logits_per_char": -0.707751452922821, "num_chars": 2}, {"sum_logits": -1.4752421379089355, "num_tokens": 1, "num_tokens_all": 301, "is_greedy": false, "logits_per_token": -1.4752421379089355, "logits_per_char": -0.7376210689544678, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 175, "native_id": "9-575", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7162598371505737, "incorrect_loss_raw": 1.3200023571650188, "correct_loss_per_char": 0.8581299185752869, "incorrect_loss_per_char": 0.6600011785825094, "correct_loss_per_token": 1.7162598371505737, "incorrect_loss_per_token": 1.3200023571650188, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1642959117889404, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": true, "logits_per_token": -1.1642959117889404, "logits_per_char": -0.5821479558944702, "num_chars": 2}, {"sum_logits": -1.7162598371505737, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.7162598371505737, "logits_per_char": -0.8581299185752869, "num_chars": 2}, {"sum_logits": -1.471154808998108, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.471154808998108, "logits_per_char": -0.735577404499054, "num_chars": 2}, {"sum_logits": -1.3245563507080078, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.3245563507080078, "logits_per_char": -0.6622781753540039, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 176, "native_id": "7-284", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4029611349105835, "incorrect_loss_raw": 1.3925884167353313, "correct_loss_per_char": 0.7014805674552917, "incorrect_loss_per_char": 0.6962942083676656, "correct_loss_per_token": 1.4029611349105835, "incorrect_loss_per_token": 1.3925884167353313, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2988355159759521, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": true, "logits_per_token": -1.2988355159759521, "logits_per_char": -0.6494177579879761, "num_chars": 2}, {"sum_logits": -1.4029611349105835, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.4029611349105835, "logits_per_char": -0.7014805674552917, "num_chars": 2}, {"sum_logits": -1.408445954322815, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.408445954322815, "logits_per_char": -0.7042229771614075, "num_chars": 2}, {"sum_logits": -1.4704837799072266, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.4704837799072266, "logits_per_char": -0.7352418899536133, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 177, "native_id": "8-135", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2011654376983643, "incorrect_loss_raw": 1.465871771176656, "correct_loss_per_char": 0.6005827188491821, "incorrect_loss_per_char": 0.732935885588328, "correct_loss_per_token": 1.2011654376983643, "incorrect_loss_per_token": 1.465871771176656, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2011654376983643, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": true, "logits_per_token": -1.2011654376983643, "logits_per_char": -0.6005827188491821, "num_chars": 2}, {"sum_logits": -1.4139827489852905, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.4139827489852905, "logits_per_char": -0.7069913744926453, "num_chars": 2}, {"sum_logits": -1.4882146120071411, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.4882146120071411, "logits_per_char": -0.7441073060035706, "num_chars": 2}, {"sum_logits": -1.4954179525375366, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.4954179525375366, "logits_per_char": -0.7477089762687683, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 178, "native_id": "397", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3706835508346558, "incorrect_loss_raw": 1.4010942379633586, "correct_loss_per_char": 0.6853417754173279, "incorrect_loss_per_char": 0.7005471189816793, "correct_loss_per_token": 1.3706835508346558, "incorrect_loss_per_token": 1.4010942379633586, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3516634702682495, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": true, "logits_per_token": -1.3516634702682495, "logits_per_char": -0.6758317351341248, "num_chars": 2}, {"sum_logits": -1.4758154153823853, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.4758154153823853, "logits_per_char": -0.7379077076911926, "num_chars": 2}, {"sum_logits": -1.375803828239441, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.375803828239441, "logits_per_char": -0.6879019141197205, "num_chars": 2}, {"sum_logits": -1.3706835508346558, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.3706835508346558, "logits_per_char": -0.6853417754173279, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 179, "native_id": "9-32", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5476055145263672, "incorrect_loss_raw": 1.3708688418070476, "correct_loss_per_char": 0.7738027572631836, "incorrect_loss_per_char": 0.6854344209035238, "correct_loss_per_token": 1.5476055145263672, "incorrect_loss_per_token": 1.3708688418070476, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0921282768249512, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.0921282768249512, "logits_per_char": -0.5460641384124756, "num_chars": 2}, {"sum_logits": -1.5476055145263672, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.5476055145263672, "logits_per_char": -0.7738027572631836, "num_chars": 2}, {"sum_logits": -1.4115498065948486, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4115498065948486, "logits_per_char": -0.7057749032974243, "num_chars": 2}, {"sum_logits": -1.6089284420013428, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.6089284420013428, "logits_per_char": -0.8044642210006714, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 180, "native_id": "48", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.463114857673645, "incorrect_loss_raw": 1.3776063124338787, "correct_loss_per_char": 0.7315574288368225, "incorrect_loss_per_char": 0.6888031562169393, "correct_loss_per_token": 1.463114857673645, "incorrect_loss_per_token": 1.3776063124338787, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1991642713546753, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": true, "logits_per_token": -1.1991642713546753, "logits_per_char": -0.5995821356773376, "num_chars": 2}, {"sum_logits": -1.4228628873825073, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.4228628873825073, "logits_per_char": -0.7114314436912537, "num_chars": 2}, {"sum_logits": -1.463114857673645, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.463114857673645, "logits_per_char": -0.7315574288368225, "num_chars": 2}, {"sum_logits": -1.5107917785644531, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.5107917785644531, "logits_per_char": -0.7553958892822266, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 181, "native_id": "8-69", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.428996205329895, "incorrect_loss_raw": 1.4013526439666748, "correct_loss_per_char": 0.7144981026649475, "incorrect_loss_per_char": 0.7006763219833374, "correct_loss_per_token": 1.428996205329895, "incorrect_loss_per_token": 1.4013526439666748, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1581822633743286, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.1581822633743286, "logits_per_char": -0.5790911316871643, "num_chars": 2}, {"sum_logits": -1.5603152513504028, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.5603152513504028, "logits_per_char": -0.7801576256752014, "num_chars": 2}, {"sum_logits": -1.428996205329895, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.428996205329895, "logits_per_char": -0.7144981026649475, "num_chars": 2}, {"sum_logits": -1.485560417175293, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.485560417175293, "logits_per_char": -0.7427802085876465, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 182, "native_id": "9-159", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.514886498451233, "incorrect_loss_raw": 1.3567417462666829, "correct_loss_per_char": 0.7574432492256165, "incorrect_loss_per_char": 0.6783708731333414, "correct_loss_per_token": 1.514886498451233, "incorrect_loss_per_token": 1.3567417462666829, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.38214910030365, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.38214910030365, "logits_per_char": -0.691074550151825, "num_chars": 2}, {"sum_logits": -1.4340288639068604, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.4340288639068604, "logits_per_char": -0.7170144319534302, "num_chars": 2}, {"sum_logits": -1.514886498451233, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.514886498451233, "logits_per_char": -0.7574432492256165, "num_chars": 2}, {"sum_logits": -1.2540472745895386, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.2540472745895386, "logits_per_char": -0.6270236372947693, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 183, "native_id": "9-317", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.351715326309204, "incorrect_loss_raw": 1.4082517623901367, "correct_loss_per_char": 0.675857663154602, "incorrect_loss_per_char": 0.7041258811950684, "correct_loss_per_token": 1.351715326309204, "incorrect_loss_per_token": 1.4082517623901367, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3584810495376587, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.3584810495376587, "logits_per_char": -0.6792405247688293, "num_chars": 2}, {"sum_logits": -1.4868781566619873, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4868781566619873, "logits_per_char": -0.7434390783309937, "num_chars": 2}, {"sum_logits": -1.351715326309204, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.351715326309204, "logits_per_char": -0.675857663154602, "num_chars": 2}, {"sum_logits": -1.3793960809707642, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.3793960809707642, "logits_per_char": -0.6896980404853821, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 184, "native_id": "423", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6334477663040161, "incorrect_loss_raw": 1.3350963989893596, "correct_loss_per_char": 0.8167238831520081, "incorrect_loss_per_char": 0.6675481994946798, "correct_loss_per_token": 1.6334477663040161, "incorrect_loss_per_token": 1.3350963989893596, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1237618923187256, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": true, "logits_per_token": -1.1237618923187256, "logits_per_char": -0.5618809461593628, "num_chars": 2}, {"sum_logits": -1.4190075397491455, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.4190075397491455, "logits_per_char": -0.7095037698745728, "num_chars": 2}, {"sum_logits": -1.4625197649002075, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.4625197649002075, "logits_per_char": -0.7312598824501038, "num_chars": 2}, {"sum_logits": -1.6334477663040161, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.6334477663040161, "logits_per_char": -0.8167238831520081, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 185, "native_id": "8-304", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4490711688995361, "incorrect_loss_raw": 1.3933898607889812, "correct_loss_per_char": 0.7245355844497681, "incorrect_loss_per_char": 0.6966949303944906, "correct_loss_per_token": 1.4490711688995361, "incorrect_loss_per_token": 1.3933898607889812, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4490711688995361, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4490711688995361, "logits_per_char": -0.7245355844497681, "num_chars": 2}, {"sum_logits": -1.6195094585418701, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.6195094585418701, "logits_per_char": -0.8097547292709351, "num_chars": 2}, {"sum_logits": -1.378045916557312, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.378045916557312, "logits_per_char": -0.689022958278656, "num_chars": 2}, {"sum_logits": -1.1826142072677612, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.1826142072677612, "logits_per_char": -0.5913071036338806, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 186, "native_id": "785", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3871798515319824, "incorrect_loss_raw": 1.4044617811838787, "correct_loss_per_char": 0.6935899257659912, "incorrect_loss_per_char": 0.7022308905919393, "correct_loss_per_token": 1.3871798515319824, "incorrect_loss_per_token": 1.4044617811838787, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2614052295684814, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": true, "logits_per_token": -1.2614052295684814, "logits_per_char": -0.6307026147842407, "num_chars": 2}, {"sum_logits": -1.6074962615966797, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.6074962615966797, "logits_per_char": -0.8037481307983398, "num_chars": 2}, {"sum_logits": -1.3444838523864746, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.3444838523864746, "logits_per_char": -0.6722419261932373, "num_chars": 2}, {"sum_logits": -1.3871798515319824, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.3871798515319824, "logits_per_char": -0.6935899257659912, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 187, "native_id": "9-1087", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3377695083618164, "incorrect_loss_raw": 1.4202542304992676, "correct_loss_per_char": 0.6688847541809082, "incorrect_loss_per_char": 0.7101271152496338, "correct_loss_per_token": 1.3377695083618164, "incorrect_loss_per_token": 1.4202542304992676, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.262380599975586, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.262380599975586, "logits_per_char": -0.631190299987793, "num_chars": 2}, {"sum_logits": -1.4019807577133179, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4019807577133179, "logits_per_char": -0.7009903788566589, "num_chars": 2}, {"sum_logits": -1.3377695083618164, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.3377695083618164, "logits_per_char": -0.6688847541809082, "num_chars": 2}, {"sum_logits": -1.596401333808899, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.596401333808899, "logits_per_char": -0.7982006669044495, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 188, "native_id": "485", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5336978435516357, "incorrect_loss_raw": 1.3585588534673054, "correct_loss_per_char": 0.7668489217758179, "incorrect_loss_per_char": 0.6792794267336527, "correct_loss_per_token": 1.5336978435516357, "incorrect_loss_per_token": 1.3585588534673054, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1751315593719482, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": true, "logits_per_token": -1.1751315593719482, "logits_per_char": -0.5875657796859741, "num_chars": 2}, {"sum_logits": -1.4405876398086548, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.4405876398086548, "logits_per_char": -0.7202938199043274, "num_chars": 2}, {"sum_logits": -1.4599573612213135, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.4599573612213135, "logits_per_char": -0.7299786806106567, "num_chars": 2}, {"sum_logits": -1.5336978435516357, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.5336978435516357, "logits_per_char": -0.7668489217758179, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 189, "native_id": "9-908", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4712023735046387, "incorrect_loss_raw": 1.371112545331319, "correct_loss_per_char": 0.7356011867523193, "incorrect_loss_per_char": 0.6855562726656595, "correct_loss_per_token": 1.4712023735046387, "incorrect_loss_per_token": 1.371112545331319, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.349806308746338, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.349806308746338, "logits_per_char": -0.674903154373169, "num_chars": 2}, {"sum_logits": -1.4652336835861206, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.4652336835861206, "logits_per_char": -0.7326168417930603, "num_chars": 2}, {"sum_logits": -1.4712023735046387, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.4712023735046387, "logits_per_char": -0.7356011867523193, "num_chars": 2}, {"sum_logits": -1.298297643661499, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": true, "logits_per_token": -1.298297643661499, "logits_per_char": -0.6491488218307495, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 190, "native_id": "1231", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4481898546218872, "incorrect_loss_raw": 1.3945694367090862, "correct_loss_per_char": 0.7240949273109436, "incorrect_loss_per_char": 0.6972847183545431, "correct_loss_per_token": 1.4481898546218872, "incorrect_loss_per_token": 1.3945694367090862, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1752086877822876, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": true, "logits_per_token": -1.1752086877822876, "logits_per_char": -0.5876043438911438, "num_chars": 2}, {"sum_logits": -1.374794363975525, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.374794363975525, "logits_per_char": -0.6873971819877625, "num_chars": 2}, {"sum_logits": -1.4481898546218872, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.4481898546218872, "logits_per_char": -0.7240949273109436, "num_chars": 2}, {"sum_logits": -1.6337052583694458, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.6337052583694458, "logits_per_char": -0.8168526291847229, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 191, "native_id": "810", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.452406883239746, "incorrect_loss_raw": 1.3742538293202717, "correct_loss_per_char": 0.726203441619873, "incorrect_loss_per_char": 0.6871269146601359, "correct_loss_per_token": 1.452406883239746, "incorrect_loss_per_token": 1.3742538293202717, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.390721082687378, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.390721082687378, "logits_per_char": -0.695360541343689, "num_chars": 2}, {"sum_logits": -1.452406883239746, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.452406883239746, "logits_per_char": -0.726203441619873, "num_chars": 2}, {"sum_logits": -1.4152767658233643, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4152767658233643, "logits_per_char": -0.7076383829116821, "num_chars": 2}, {"sum_logits": -1.3167636394500732, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": true, "logits_per_token": -1.3167636394500732, "logits_per_char": -0.6583818197250366, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 192, "native_id": "158", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6661391258239746, "incorrect_loss_raw": 1.3265533844629924, "correct_loss_per_char": 0.8330695629119873, "incorrect_loss_per_char": 0.6632766922314962, "correct_loss_per_token": 1.6661391258239746, "incorrect_loss_per_token": 1.3265533844629924, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1277426481246948, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.1277426481246948, "logits_per_char": -0.5638713240623474, "num_chars": 2}, {"sum_logits": -1.3905153274536133, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.3905153274536133, "logits_per_char": -0.6952576637268066, "num_chars": 2}, {"sum_logits": -1.461402177810669, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.461402177810669, "logits_per_char": -0.7307010889053345, "num_chars": 2}, {"sum_logits": -1.6661391258239746, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.6661391258239746, "logits_per_char": -0.8330695629119873, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 193, "native_id": "7-445", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2820806503295898, "incorrect_loss_raw": 1.4320991039276123, "correct_loss_per_char": 0.6410403251647949, "incorrect_loss_per_char": 0.7160495519638062, "correct_loss_per_token": 1.2820806503295898, "incorrect_loss_per_token": 1.4320991039276123, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2820806503295898, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.2820806503295898, "logits_per_char": -0.6410403251647949, "num_chars": 2}, {"sum_logits": -1.460959792137146, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.460959792137146, "logits_per_char": -0.730479896068573, "num_chars": 2}, {"sum_logits": -1.397955060005188, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.397955060005188, "logits_per_char": -0.698977530002594, "num_chars": 2}, {"sum_logits": -1.437382459640503, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.437382459640503, "logits_per_char": -0.7186912298202515, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 194, "native_id": "1502", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4434144496917725, "incorrect_loss_raw": 1.384295384089152, "correct_loss_per_char": 0.7217072248458862, "incorrect_loss_per_char": 0.692147692044576, "correct_loss_per_token": 1.4434144496917725, "incorrect_loss_per_token": 1.384295384089152, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2206562757492065, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": true, "logits_per_token": -1.2206562757492065, "logits_per_char": -0.6103281378746033, "num_chars": 2}, {"sum_logits": -1.4335718154907227, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.4335718154907227, "logits_per_char": -0.7167859077453613, "num_chars": 2}, {"sum_logits": -1.4434144496917725, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.4434144496917725, "logits_per_char": -0.7217072248458862, "num_chars": 2}, {"sum_logits": -1.4986580610275269, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.4986580610275269, "logits_per_char": -0.7493290305137634, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 195, "native_id": "1200", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5227880477905273, "incorrect_loss_raw": 1.3708083629608154, "correct_loss_per_char": 0.7613940238952637, "incorrect_loss_per_char": 0.6854041814804077, "correct_loss_per_token": 1.5227880477905273, "incorrect_loss_per_token": 1.3708083629608154, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.313269853591919, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.313269853591919, "logits_per_char": -0.6566349267959595, "num_chars": 2}, {"sum_logits": -1.5227880477905273, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.5227880477905273, "logits_per_char": -0.7613940238952637, "num_chars": 2}, {"sum_logits": -1.5669169425964355, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.5669169425964355, "logits_per_char": -0.7834584712982178, "num_chars": 2}, {"sum_logits": -1.2322382926940918, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.2322382926940918, "logits_per_char": -0.6161191463470459, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 196, "native_id": "437", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4213311672210693, "incorrect_loss_raw": 1.3958667914072673, "correct_loss_per_char": 0.7106655836105347, "incorrect_loss_per_char": 0.6979333957036337, "correct_loss_per_token": 1.4213311672210693, "incorrect_loss_per_token": 1.3958667914072673, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4213311672210693, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.4213311672210693, "logits_per_char": -0.7106655836105347, "num_chars": 2}, {"sum_logits": -1.3788714408874512, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.3788714408874512, "logits_per_char": -0.6894357204437256, "num_chars": 2}, {"sum_logits": -1.5709507465362549, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.5709507465362549, "logits_per_char": -0.7854753732681274, "num_chars": 2}, {"sum_logits": -1.2377781867980957, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": true, "logits_per_token": -1.2377781867980957, "logits_per_char": -0.6188890933990479, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 197, "native_id": "8-205", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1071561574935913, "incorrect_loss_raw": 1.5087162653605144, "correct_loss_per_char": 0.5535780787467957, "incorrect_loss_per_char": 0.7543581326802572, "correct_loss_per_token": 1.1071561574935913, "incorrect_loss_per_token": 1.5087162653605144, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1071561574935913, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": true, "logits_per_token": -1.1071561574935913, "logits_per_char": -0.5535780787467957, "num_chars": 2}, {"sum_logits": -1.477624535560608, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.477624535560608, "logits_per_char": -0.738812267780304, "num_chars": 2}, {"sum_logits": -1.521559715270996, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.521559715270996, "logits_per_char": -0.760779857635498, "num_chars": 2}, {"sum_logits": -1.526964545249939, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.526964545249939, "logits_per_char": -0.7634822726249695, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 198, "native_id": "9-270", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4466279745101929, "incorrect_loss_raw": 1.3882978757222493, "correct_loss_per_char": 0.7233139872550964, "incorrect_loss_per_char": 0.6941489378611246, "correct_loss_per_token": 1.4466279745101929, "incorrect_loss_per_token": 1.3882978757222493, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2059693336486816, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.2059693336486816, "logits_per_char": -0.6029846668243408, "num_chars": 2}, {"sum_logits": -1.3516138792037964, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.3516138792037964, "logits_per_char": -0.6758069396018982, "num_chars": 2}, {"sum_logits": -1.4466279745101929, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4466279745101929, "logits_per_char": -0.7233139872550964, "num_chars": 2}, {"sum_logits": -1.60731041431427, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.60731041431427, "logits_per_char": -0.803655207157135, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 199, "native_id": "8-130", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4787691831588745, "incorrect_loss_raw": 1.3678806622823079, "correct_loss_per_char": 0.7393845915794373, "incorrect_loss_per_char": 0.6839403311411539, "correct_loss_per_token": 1.4787691831588745, "incorrect_loss_per_token": 1.3678806622823079, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3697247505187988, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.3697247505187988, "logits_per_char": -0.6848623752593994, "num_chars": 2}, {"sum_logits": -1.430611252784729, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.430611252784729, "logits_per_char": -0.7153056263923645, "num_chars": 2}, {"sum_logits": -1.4787691831588745, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.4787691831588745, "logits_per_char": -0.7393845915794373, "num_chars": 2}, {"sum_logits": -1.303305983543396, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.303305983543396, "logits_per_char": -0.651652991771698, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 200, "native_id": "229", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.532618522644043, "incorrect_loss_raw": 1.351969838142395, "correct_loss_per_char": 0.7663092613220215, "incorrect_loss_per_char": 0.6759849190711975, "correct_loss_per_token": 1.532618522644043, "incorrect_loss_per_token": 1.351969838142395, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.283759355545044, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": true, "logits_per_token": -1.283759355545044, "logits_per_char": -0.641879677772522, "num_chars": 2}, {"sum_logits": -1.416391372680664, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.416391372680664, "logits_per_char": -0.708195686340332, "num_chars": 2}, {"sum_logits": -1.355758786201477, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.355758786201477, "logits_per_char": -0.6778793931007385, "num_chars": 2}, {"sum_logits": -1.532618522644043, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.532618522644043, "logits_per_char": -0.7663092613220215, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 201, "native_id": "9-390", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.351854681968689, "incorrect_loss_raw": 1.4183414777119954, "correct_loss_per_char": 0.6759273409843445, "incorrect_loss_per_char": 0.7091707388559977, "correct_loss_per_token": 1.351854681968689, "incorrect_loss_per_token": 1.4183414777119954, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3055777549743652, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": true, "logits_per_token": -1.3055777549743652, "logits_per_char": -0.6527888774871826, "num_chars": 2}, {"sum_logits": -1.631266474723816, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.631266474723816, "logits_per_char": -0.815633237361908, "num_chars": 2}, {"sum_logits": -1.3181802034378052, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.3181802034378052, "logits_per_char": -0.6590901017189026, "num_chars": 2}, {"sum_logits": -1.351854681968689, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.351854681968689, "logits_per_char": -0.6759273409843445, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 202, "native_id": "8-107", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5956106185913086, "incorrect_loss_raw": 1.3342594305674236, "correct_loss_per_char": 0.7978053092956543, "incorrect_loss_per_char": 0.6671297152837118, "correct_loss_per_token": 1.5956106185913086, "incorrect_loss_per_token": 1.3342594305674236, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4024642705917358, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.4024642705917358, "logits_per_char": -0.7012321352958679, "num_chars": 2}, {"sum_logits": -1.3378092050552368, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.3378092050552368, "logits_per_char": -0.6689046025276184, "num_chars": 2}, {"sum_logits": -1.5956106185913086, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5956106185913086, "logits_per_char": -0.7978053092956543, "num_chars": 2}, {"sum_logits": -1.2625048160552979, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.2625048160552979, "logits_per_char": -0.6312524080276489, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 203, "native_id": "7-527", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1991978883743286, "incorrect_loss_raw": 1.4678236643473308, "correct_loss_per_char": 0.5995989441871643, "incorrect_loss_per_char": 0.7339118321736654, "correct_loss_per_token": 1.1991978883743286, "incorrect_loss_per_token": 1.4678236643473308, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1991978883743286, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": true, "logits_per_token": -1.1991978883743286, "logits_per_char": -0.5995989441871643, "num_chars": 2}, {"sum_logits": -1.5487719774246216, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.5487719774246216, "logits_per_char": -0.7743859887123108, "num_chars": 2}, {"sum_logits": -1.370259165763855, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.370259165763855, "logits_per_char": -0.6851295828819275, "num_chars": 2}, {"sum_logits": -1.4844398498535156, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.4844398498535156, "logits_per_char": -0.7422199249267578, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 204, "native_id": "7-333", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4773602485656738, "incorrect_loss_raw": 1.3745386600494385, "correct_loss_per_char": 0.7386801242828369, "incorrect_loss_per_char": 0.6872693300247192, "correct_loss_per_token": 1.4773602485656738, "incorrect_loss_per_token": 1.3745386600494385, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1901206970214844, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.1901206970214844, "logits_per_char": -0.5950603485107422, "num_chars": 2}, {"sum_logits": -1.4551193714141846, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4551193714141846, "logits_per_char": -0.7275596857070923, "num_chars": 2}, {"sum_logits": -1.4773602485656738, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4773602485656738, "logits_per_char": -0.7386801242828369, "num_chars": 2}, {"sum_logits": -1.4783759117126465, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4783759117126465, "logits_per_char": -0.7391879558563232, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 205, "native_id": "9-44", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4863766431808472, "incorrect_loss_raw": 1.3825873931248982, "correct_loss_per_char": 0.7431883215904236, "incorrect_loss_per_char": 0.6912936965624491, "correct_loss_per_token": 1.4863766431808472, "incorrect_loss_per_token": 1.3825873931248982, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1207735538482666, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": true, "logits_per_token": -1.1207735538482666, "logits_per_char": -0.5603867769241333, "num_chars": 2}, {"sum_logits": -1.467436671257019, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.467436671257019, "logits_per_char": -0.7337183356285095, "num_chars": 2}, {"sum_logits": -1.4863766431808472, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.4863766431808472, "logits_per_char": -0.7431883215904236, "num_chars": 2}, {"sum_logits": -1.5595519542694092, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.5595519542694092, "logits_per_char": -0.7797759771347046, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 206, "native_id": "7-160", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6741600036621094, "incorrect_loss_raw": 1.331586519877116, "correct_loss_per_char": 0.8370800018310547, "incorrect_loss_per_char": 0.665793259938558, "correct_loss_per_token": 1.6741600036621094, "incorrect_loss_per_token": 1.331586519877116, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.088171124458313, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.088171124458313, "logits_per_char": -0.5440855622291565, "num_chars": 2}, {"sum_logits": -1.3885948657989502, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.3885948657989502, "logits_per_char": -0.6942974328994751, "num_chars": 2}, {"sum_logits": -1.5179935693740845, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.5179935693740845, "logits_per_char": -0.7589967846870422, "num_chars": 2}, {"sum_logits": -1.6741600036621094, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.6741600036621094, "logits_per_char": -0.8370800018310547, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 207, "native_id": "1942", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4727212190628052, "incorrect_loss_raw": 1.369158665339152, "correct_loss_per_char": 0.7363606095314026, "incorrect_loss_per_char": 0.684579332669576, "correct_loss_per_token": 1.4727212190628052, "incorrect_loss_per_token": 1.369158665339152, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3239413499832153, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": true, "logits_per_token": -1.3239413499832153, "logits_per_char": -0.6619706749916077, "num_chars": 2}, {"sum_logits": -1.324966549873352, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.324966549873352, "logits_per_char": -0.662483274936676, "num_chars": 2}, {"sum_logits": -1.4585680961608887, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.4585680961608887, "logits_per_char": -0.7292840480804443, "num_chars": 2}, {"sum_logits": -1.4727212190628052, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.4727212190628052, "logits_per_char": -0.7363606095314026, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 208, "native_id": "9-597", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4016059637069702, "incorrect_loss_raw": 1.3933453957239788, "correct_loss_per_char": 0.7008029818534851, "incorrect_loss_per_char": 0.6966726978619894, "correct_loss_per_token": 1.4016059637069702, "incorrect_loss_per_token": 1.3933453957239788, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.263753890991211, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": true, "logits_per_token": -1.263753890991211, "logits_per_char": -0.6318769454956055, "num_chars": 2}, {"sum_logits": -1.4016059637069702, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.4016059637069702, "logits_per_char": -0.7008029818534851, "num_chars": 2}, {"sum_logits": -1.4652760028839111, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.4652760028839111, "logits_per_char": -0.7326380014419556, "num_chars": 2}, {"sum_logits": -1.451006293296814, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.451006293296814, "logits_per_char": -0.725503146648407, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 209, "native_id": "9-35", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2820985317230225, "incorrect_loss_raw": 1.4340643882751465, "correct_loss_per_char": 0.6410492658615112, "incorrect_loss_per_char": 0.7170321941375732, "correct_loss_per_token": 1.2820985317230225, "incorrect_loss_per_token": 1.4340643882751465, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2820985317230225, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": true, "logits_per_token": -1.2820985317230225, "logits_per_char": -0.6410492658615112, "num_chars": 2}, {"sum_logits": -1.4029275178909302, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4029275178909302, "logits_per_char": -0.7014637589454651, "num_chars": 2}, {"sum_logits": -1.4433575868606567, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4433575868606567, "logits_per_char": -0.7216787934303284, "num_chars": 2}, {"sum_logits": -1.4559080600738525, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4559080600738525, "logits_per_char": -0.7279540300369263, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 210, "native_id": "1161", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4713281393051147, "incorrect_loss_raw": 1.3691780964533489, "correct_loss_per_char": 0.7356640696525574, "incorrect_loss_per_char": 0.6845890482266744, "correct_loss_per_token": 1.4713281393051147, "incorrect_loss_per_token": 1.3691780964533489, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3197907209396362, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": true, "logits_per_token": -1.3197907209396362, "logits_per_char": -0.6598953604698181, "num_chars": 2}, {"sum_logits": -1.4713281393051147, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.4713281393051147, "logits_per_char": -0.7356640696525574, "num_chars": 2}, {"sum_logits": -1.407227635383606, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.407227635383606, "logits_per_char": -0.703613817691803, "num_chars": 2}, {"sum_logits": -1.3805159330368042, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.3805159330368042, "logits_per_char": -0.6902579665184021, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 211, "native_id": "7-171", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5319796800613403, "incorrect_loss_raw": 1.3495104710261028, "correct_loss_per_char": 0.7659898400306702, "incorrect_loss_per_char": 0.6747552355130514, "correct_loss_per_token": 1.5319796800613403, "incorrect_loss_per_token": 1.3495104710261028, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3781516551971436, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.3781516551971436, "logits_per_char": -0.6890758275985718, "num_chars": 2}, {"sum_logits": -1.5319796800613403, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.5319796800613403, "logits_per_char": -0.7659898400306702, "num_chars": 2}, {"sum_logits": -1.3092652559280396, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": true, "logits_per_token": -1.3092652559280396, "logits_per_char": -0.6546326279640198, "num_chars": 2}, {"sum_logits": -1.361114501953125, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.361114501953125, "logits_per_char": -0.6805572509765625, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 212, "native_id": "1139", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5624395608901978, "incorrect_loss_raw": 1.3512945572535198, "correct_loss_per_char": 0.7812197804450989, "incorrect_loss_per_char": 0.6756472786267599, "correct_loss_per_token": 1.5624395608901978, "incorrect_loss_per_token": 1.3512945572535198, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1800585985183716, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": true, "logits_per_token": -1.1800585985183716, "logits_per_char": -0.5900292992591858, "num_chars": 2}, {"sum_logits": -1.3903719186782837, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.3903719186782837, "logits_per_char": -0.6951859593391418, "num_chars": 2}, {"sum_logits": -1.4834531545639038, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.4834531545639038, "logits_per_char": -0.7417265772819519, "num_chars": 2}, {"sum_logits": -1.5624395608901978, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.5624395608901978, "logits_per_char": -0.7812197804450989, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 213, "native_id": "1924", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4508768320083618, "incorrect_loss_raw": 1.3954238494237263, "correct_loss_per_char": 0.7254384160041809, "incorrect_loss_per_char": 0.6977119247118632, "correct_loss_per_token": 1.4508768320083618, "incorrect_loss_per_token": 1.3954238494237263, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1127558946609497, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": true, "logits_per_token": -1.1127558946609497, "logits_per_char": -0.5563779473304749, "num_chars": 2}, {"sum_logits": -1.4508768320083618, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4508768320083618, "logits_per_char": -0.7254384160041809, "num_chars": 2}, {"sum_logits": -1.4976537227630615, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4976537227630615, "logits_per_char": -0.7488268613815308, "num_chars": 2}, {"sum_logits": -1.575861930847168, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.575861930847168, "logits_per_char": -0.787930965423584, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 214, "native_id": "9-440", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4825531244277954, "incorrect_loss_raw": 1.373331069946289, "correct_loss_per_char": 0.7412765622138977, "incorrect_loss_per_char": 0.6866655349731445, "correct_loss_per_token": 1.4825531244277954, "incorrect_loss_per_token": 1.373331069946289, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2058897018432617, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": true, "logits_per_token": -1.2058897018432617, "logits_per_char": -0.6029448509216309, "num_chars": 2}, {"sum_logits": -1.4825531244277954, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4825531244277954, "logits_per_char": -0.7412765622138977, "num_chars": 2}, {"sum_logits": -1.502042293548584, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.502042293548584, "logits_per_char": -0.751021146774292, "num_chars": 2}, {"sum_logits": -1.4120612144470215, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4120612144470215, "logits_per_char": -0.7060306072235107, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 215, "native_id": "9-528", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5801304578781128, "incorrect_loss_raw": 1.340313156445821, "correct_loss_per_char": 0.7900652289390564, "incorrect_loss_per_char": 0.6701565782229105, "correct_loss_per_token": 1.5801304578781128, "incorrect_loss_per_token": 1.340313156445821, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.272696614265442, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": true, "logits_per_token": -1.272696614265442, "logits_per_char": -0.636348307132721, "num_chars": 2}, {"sum_logits": -1.348351240158081, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.348351240158081, "logits_per_char": -0.6741756200790405, "num_chars": 2}, {"sum_logits": -1.5801304578781128, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.5801304578781128, "logits_per_char": -0.7900652289390564, "num_chars": 2}, {"sum_logits": -1.3998916149139404, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.3998916149139404, "logits_per_char": -0.6999458074569702, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 216, "native_id": "170", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6111090183258057, "incorrect_loss_raw": 1.4061240355173747, "correct_loss_per_char": 0.8055545091629028, "incorrect_loss_per_char": 0.7030620177586874, "correct_loss_per_token": 1.6111090183258057, "incorrect_loss_per_token": 1.4061240355173747, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9448800086975098, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": true, "logits_per_token": -0.9448800086975098, "logits_per_char": -0.4724400043487549, "num_chars": 2}, {"sum_logits": -1.435912847518921, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.435912847518921, "logits_per_char": -0.7179564237594604, "num_chars": 2}, {"sum_logits": -1.6111090183258057, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.6111090183258057, "logits_per_char": -0.8055545091629028, "num_chars": 2}, {"sum_logits": -1.8375792503356934, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.8375792503356934, "logits_per_char": -0.9187896251678467, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 217, "native_id": "395", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5148729085922241, "incorrect_loss_raw": 1.364307125409444, "correct_loss_per_char": 0.7574364542961121, "incorrect_loss_per_char": 0.682153562704722, "correct_loss_per_token": 1.5148729085922241, "incorrect_loss_per_token": 1.364307125409444, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.212600827217102, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.212600827217102, "logits_per_char": -0.606300413608551, "num_chars": 2}, {"sum_logits": -1.5256659984588623, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.5256659984588623, "logits_per_char": -0.7628329992294312, "num_chars": 2}, {"sum_logits": -1.3546545505523682, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.3546545505523682, "logits_per_char": -0.6773272752761841, "num_chars": 2}, {"sum_logits": -1.5148729085922241, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.5148729085922241, "logits_per_char": -0.7574364542961121, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 218, "native_id": "9-633", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4915428161621094, "incorrect_loss_raw": 1.3662065664927165, "correct_loss_per_char": 0.7457714080810547, "incorrect_loss_per_char": 0.6831032832463583, "correct_loss_per_token": 1.4915428161621094, "incorrect_loss_per_token": 1.3662065664927165, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2907205820083618, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.2907205820083618, "logits_per_char": -0.6453602910041809, "num_chars": 2}, {"sum_logits": -1.3237121105194092, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.3237121105194092, "logits_per_char": -0.6618560552597046, "num_chars": 2}, {"sum_logits": -1.4841870069503784, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.4841870069503784, "logits_per_char": -0.7420935034751892, "num_chars": 2}, {"sum_logits": -1.4915428161621094, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.4915428161621094, "logits_per_char": -0.7457714080810547, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 219, "native_id": "9-504", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2943743467330933, "incorrect_loss_raw": 1.4329365094502766, "correct_loss_per_char": 0.6471871733665466, "incorrect_loss_per_char": 0.7164682547251383, "correct_loss_per_token": 1.2943743467330933, "incorrect_loss_per_token": 1.4329365094502766, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2943743467330933, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.2943743467330933, "logits_per_char": -0.6471871733665466, "num_chars": 2}, {"sum_logits": -1.4430217742919922, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4430217742919922, "logits_per_char": -0.7215108871459961, "num_chars": 2}, {"sum_logits": -1.3081656694412231, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.3081656694412231, "logits_per_char": -0.6540828347206116, "num_chars": 2}, {"sum_logits": -1.5476220846176147, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.5476220846176147, "logits_per_char": -0.7738110423088074, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 220, "native_id": "8-192", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3301687240600586, "incorrect_loss_raw": 1.421152114868164, "correct_loss_per_char": 0.6650843620300293, "incorrect_loss_per_char": 0.710576057434082, "correct_loss_per_token": 1.3301687240600586, "incorrect_loss_per_token": 1.421152114868164, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3301687240600586, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.3301687240600586, "logits_per_char": -0.6650843620300293, "num_chars": 2}, {"sum_logits": -1.3350313901901245, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.3350313901901245, "logits_per_char": -0.6675156950950623, "num_chars": 2}, {"sum_logits": -1.4807004928588867, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4807004928588867, "logits_per_char": -0.7403502464294434, "num_chars": 2}, {"sum_logits": -1.447724461555481, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.447724461555481, "logits_per_char": -0.7238622307777405, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 221, "native_id": "7-1108", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5071125030517578, "incorrect_loss_raw": 1.3720473448435466, "correct_loss_per_char": 0.7535562515258789, "incorrect_loss_per_char": 0.6860236724217733, "correct_loss_per_token": 1.5071125030517578, "incorrect_loss_per_token": 1.3720473448435466, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.128785490989685, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": true, "logits_per_token": -1.128785490989685, "logits_per_char": -0.5643927454948425, "num_chars": 2}, {"sum_logits": -1.5101807117462158, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.5101807117462158, "logits_per_char": -0.7550903558731079, "num_chars": 2}, {"sum_logits": -1.4771758317947388, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.4771758317947388, "logits_per_char": -0.7385879158973694, "num_chars": 2}, {"sum_logits": -1.5071125030517578, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.5071125030517578, "logits_per_char": -0.7535562515258789, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 222, "native_id": "7-852", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2585711479187012, "incorrect_loss_raw": 1.4419421752293904, "correct_loss_per_char": 0.6292855739593506, "incorrect_loss_per_char": 0.7209710876146952, "correct_loss_per_token": 1.2585711479187012, "incorrect_loss_per_token": 1.4419421752293904, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2585711479187012, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": true, "logits_per_token": -1.2585711479187012, "logits_per_char": -0.6292855739593506, "num_chars": 2}, {"sum_logits": -1.4375332593917847, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.4375332593917847, "logits_per_char": -0.7187666296958923, "num_chars": 2}, {"sum_logits": -1.3932008743286133, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.3932008743286133, "logits_per_char": -0.6966004371643066, "num_chars": 2}, {"sum_logits": -1.4950923919677734, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.4950923919677734, "logits_per_char": -0.7475461959838867, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 223, "native_id": "761", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5158743858337402, "incorrect_loss_raw": 1.3590563933054607, "correct_loss_per_char": 0.7579371929168701, "incorrect_loss_per_char": 0.6795281966527303, "correct_loss_per_token": 1.5158743858337402, "incorrect_loss_per_token": 1.3590563933054607, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.272687315940857, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": true, "logits_per_token": -1.272687315940857, "logits_per_char": -0.6363436579704285, "num_chars": 2}, {"sum_logits": -1.3720837831497192, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.3720837831497192, "logits_per_char": -0.6860418915748596, "num_chars": 2}, {"sum_logits": -1.4323980808258057, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.4323980808258057, "logits_per_char": -0.7161990404129028, "num_chars": 2}, {"sum_logits": -1.5158743858337402, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.5158743858337402, "logits_per_char": -0.7579371929168701, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 224, "native_id": "8-318", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4039928913116455, "incorrect_loss_raw": 1.3921801249186199, "correct_loss_per_char": 0.7019964456558228, "incorrect_loss_per_char": 0.6960900624593099, "correct_loss_per_token": 1.4039928913116455, "incorrect_loss_per_token": 1.3921801249186199, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2405200004577637, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": true, "logits_per_token": -1.2405200004577637, "logits_per_char": -0.6202600002288818, "num_chars": 2}, {"sum_logits": -1.4039928913116455, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.4039928913116455, "logits_per_char": -0.7019964456558228, "num_chars": 2}, {"sum_logits": -1.4328794479370117, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.4328794479370117, "logits_per_char": -0.7164397239685059, "num_chars": 2}, {"sum_logits": -1.503140926361084, "num_tokens": 1, "num_tokens_all": 279, "is_greedy": false, "logits_per_token": -1.503140926361084, "logits_per_char": -0.751570463180542, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 225, "native_id": "636", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4655482769012451, "incorrect_loss_raw": 1.387934406598409, "correct_loss_per_char": 0.7327741384506226, "incorrect_loss_per_char": 0.6939672032992045, "correct_loss_per_token": 1.4655482769012451, "incorrect_loss_per_token": 1.387934406598409, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1589627265930176, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.1589627265930176, "logits_per_char": -0.5794813632965088, "num_chars": 2}, {"sum_logits": -1.4106833934783936, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4106833934783936, "logits_per_char": -0.7053416967391968, "num_chars": 2}, {"sum_logits": -1.4655482769012451, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4655482769012451, "logits_per_char": -0.7327741384506226, "num_chars": 2}, {"sum_logits": -1.594157099723816, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.594157099723816, "logits_per_char": -0.797078549861908, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 226, "native_id": "7-444", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.632215976715088, "incorrect_loss_raw": 1.3434904019037883, "correct_loss_per_char": 0.816107988357544, "incorrect_loss_per_char": 0.6717452009518942, "correct_loss_per_token": 1.632215976715088, "incorrect_loss_per_token": 1.3434904019037883, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0898213386535645, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": true, "logits_per_token": -1.0898213386535645, "logits_per_char": -0.5449106693267822, "num_chars": 2}, {"sum_logits": -1.473899006843567, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.473899006843567, "logits_per_char": -0.7369495034217834, "num_chars": 2}, {"sum_logits": -1.4667508602142334, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4667508602142334, "logits_per_char": -0.7333754301071167, "num_chars": 2}, {"sum_logits": -1.632215976715088, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.632215976715088, "logits_per_char": -0.816107988357544, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 227, "native_id": "8-57", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5784105062484741, "incorrect_loss_raw": 1.3379572629928589, "correct_loss_per_char": 0.7892052531242371, "incorrect_loss_per_char": 0.6689786314964294, "correct_loss_per_token": 1.5784105062484741, "incorrect_loss_per_token": 1.3379572629928589, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.337607502937317, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.337607502937317, "logits_per_char": -0.6688037514686584, "num_chars": 2}, {"sum_logits": -1.5784105062484741, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.5784105062484741, "logits_per_char": -0.7892052531242371, "num_chars": 2}, {"sum_logits": -1.4003593921661377, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.4003593921661377, "logits_per_char": -0.7001796960830688, "num_chars": 2}, {"sum_logits": -1.275904893875122, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": true, "logits_per_token": -1.275904893875122, "logits_per_char": -0.637952446937561, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 228, "native_id": "9-187", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.416109561920166, "incorrect_loss_raw": 1.3991005023320515, "correct_loss_per_char": 0.708054780960083, "incorrect_loss_per_char": 0.6995502511660258, "correct_loss_per_token": 1.416109561920166, "incorrect_loss_per_token": 1.3991005023320515, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1921874284744263, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": true, "logits_per_token": -1.1921874284744263, "logits_per_char": -0.5960937142372131, "num_chars": 2}, {"sum_logits": -1.416109561920166, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.416109561920166, "logits_per_char": -0.708054780960083, "num_chars": 2}, {"sum_logits": -1.4779067039489746, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4779067039489746, "logits_per_char": -0.7389533519744873, "num_chars": 2}, {"sum_logits": -1.527207374572754, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.527207374572754, "logits_per_char": -0.763603687286377, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 229, "native_id": "1345", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4627645015716553, "incorrect_loss_raw": 1.3830764691034954, "correct_loss_per_char": 0.7313822507858276, "incorrect_loss_per_char": 0.6915382345517477, "correct_loss_per_token": 1.4627645015716553, "incorrect_loss_per_token": 1.3830764691034954, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1582744121551514, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": true, "logits_per_token": -1.1582744121551514, "logits_per_char": -0.5791372060775757, "num_chars": 2}, {"sum_logits": -1.4627645015716553, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.4627645015716553, "logits_per_char": -0.7313822507858276, "num_chars": 2}, {"sum_logits": -1.4904325008392334, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.4904325008392334, "logits_per_char": -0.7452162504196167, "num_chars": 2}, {"sum_logits": -1.500522494316101, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.500522494316101, "logits_per_char": -0.7502612471580505, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 230, "native_id": "8-59", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4171366691589355, "incorrect_loss_raw": 1.3874760468800862, "correct_loss_per_char": 0.7085683345794678, "incorrect_loss_per_char": 0.6937380234400431, "correct_loss_per_token": 1.4171366691589355, "incorrect_loss_per_token": 1.3874760468800862, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3506544828414917, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.3506544828414917, "logits_per_char": -0.6753272414207458, "num_chars": 2}, {"sum_logits": -1.465818166732788, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.465818166732788, "logits_per_char": -0.732909083366394, "num_chars": 2}, {"sum_logits": -1.345955491065979, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": true, "logits_per_token": -1.345955491065979, "logits_per_char": -0.6729777455329895, "num_chars": 2}, {"sum_logits": -1.4171366691589355, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.4171366691589355, "logits_per_char": -0.7085683345794678, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 231, "native_id": "178", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4412654638290405, "incorrect_loss_raw": 1.379520098368327, "correct_loss_per_char": 0.7206327319145203, "incorrect_loss_per_char": 0.6897600491841634, "correct_loss_per_token": 1.4412654638290405, "incorrect_loss_per_token": 1.379520098368327, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2579537630081177, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": true, "logits_per_token": -1.2579537630081177, "logits_per_char": -0.6289768815040588, "num_chars": 2}, {"sum_logits": -1.4412654638290405, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4412654638290405, "logits_per_char": -0.7206327319145203, "num_chars": 2}, {"sum_logits": -1.3971011638641357, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.3971011638641357, "logits_per_char": -0.6985505819320679, "num_chars": 2}, {"sum_logits": -1.483505368232727, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.483505368232727, "logits_per_char": -0.7417526841163635, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 232, "native_id": "9-1186", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0911507606506348, "incorrect_loss_raw": 1.518723766009013, "correct_loss_per_char": 0.5455753803253174, "incorrect_loss_per_char": 0.7593618830045065, "correct_loss_per_token": 1.0911507606506348, "incorrect_loss_per_token": 1.518723766009013, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0911507606506348, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.0911507606506348, "logits_per_char": -0.5455753803253174, "num_chars": 2}, {"sum_logits": -1.4897706508636475, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4897706508636475, "logits_per_char": -0.7448853254318237, "num_chars": 2}, {"sum_logits": -1.447706937789917, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.447706937789917, "logits_per_char": -0.7238534688949585, "num_chars": 2}, {"sum_logits": -1.6186937093734741, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.6186937093734741, "logits_per_char": -0.8093468546867371, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 233, "native_id": "82", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3535192012786865, "incorrect_loss_raw": 1.4250806172688801, "correct_loss_per_char": 0.6767596006393433, "incorrect_loss_per_char": 0.7125403086344401, "correct_loss_per_token": 1.3535192012786865, "incorrect_loss_per_token": 1.4250806172688801, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3081411123275757, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.3081411123275757, "logits_per_char": -0.6540705561637878, "num_chars": 2}, {"sum_logits": -1.710469126701355, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.710469126701355, "logits_per_char": -0.8552345633506775, "num_chars": 2}, {"sum_logits": -1.3535192012786865, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.3535192012786865, "logits_per_char": -0.6767596006393433, "num_chars": 2}, {"sum_logits": -1.25663161277771, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": true, "logits_per_token": -1.25663161277771, "logits_per_char": -0.628315806388855, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 234, "native_id": "8-165", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.424269437789917, "incorrect_loss_raw": 1.3829610347747803, "correct_loss_per_char": 0.7121347188949585, "incorrect_loss_per_char": 0.6914805173873901, "correct_loss_per_token": 1.424269437789917, "incorrect_loss_per_token": 1.3829610347747803, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.346356749534607, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.346356749534607, "logits_per_char": -0.6731783747673035, "num_chars": 2}, {"sum_logits": -1.4000662565231323, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4000662565231323, "logits_per_char": -0.7000331282615662, "num_chars": 2}, {"sum_logits": -1.424269437789917, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.424269437789917, "logits_per_char": -0.7121347188949585, "num_chars": 2}, {"sum_logits": -1.4024600982666016, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4024600982666016, "logits_per_char": -0.7012300491333008, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 235, "native_id": "404", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4401252269744873, "incorrect_loss_raw": 1.3866467078526814, "correct_loss_per_char": 0.7200626134872437, "incorrect_loss_per_char": 0.6933233539263407, "correct_loss_per_token": 1.4401252269744873, "incorrect_loss_per_token": 1.3866467078526814, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2119768857955933, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.2119768857955933, "logits_per_char": -0.6059884428977966, "num_chars": 2}, {"sum_logits": -1.4401252269744873, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4401252269744873, "logits_per_char": -0.7200626134872437, "num_chars": 2}, {"sum_logits": -1.4059921503067017, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4059921503067017, "logits_per_char": -0.7029960751533508, "num_chars": 2}, {"sum_logits": -1.5419710874557495, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.5419710874557495, "logits_per_char": -0.7709855437278748, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 236, "native_id": "279", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1229389905929565, "incorrect_loss_raw": 1.503151297569275, "correct_loss_per_char": 0.5614694952964783, "incorrect_loss_per_char": 0.7515756487846375, "correct_loss_per_token": 1.1229389905929565, "incorrect_loss_per_token": 1.503151297569275, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1229389905929565, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": true, "logits_per_token": -1.1229389905929565, "logits_per_char": -0.5614694952964783, "num_chars": 2}, {"sum_logits": -1.506574034690857, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.506574034690857, "logits_per_char": -0.7532870173454285, "num_chars": 2}, {"sum_logits": -1.4509128332138062, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4509128332138062, "logits_per_char": -0.7254564166069031, "num_chars": 2}, {"sum_logits": -1.5519670248031616, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.5519670248031616, "logits_per_char": -0.7759835124015808, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 237, "native_id": "9-532", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2731146812438965, "incorrect_loss_raw": 1.4445818662643433, "correct_loss_per_char": 0.6365573406219482, "incorrect_loss_per_char": 0.7222909331321716, "correct_loss_per_token": 1.2731146812438965, "incorrect_loss_per_token": 1.4445818662643433, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2731146812438965, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.2731146812438965, "logits_per_char": -0.6365573406219482, "num_chars": 2}, {"sum_logits": -1.5923289060592651, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.5923289060592651, "logits_per_char": -0.7961644530296326, "num_chars": 2}, {"sum_logits": -1.2561817169189453, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.2561817169189453, "logits_per_char": -0.6280908584594727, "num_chars": 2}, {"sum_logits": -1.4852349758148193, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4852349758148193, "logits_per_char": -0.7426174879074097, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 238, "native_id": "268", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.433350920677185, "incorrect_loss_raw": 1.3786732753117878, "correct_loss_per_char": 0.7166754603385925, "incorrect_loss_per_char": 0.6893366376558939, "correct_loss_per_token": 1.433350920677185, "incorrect_loss_per_token": 1.3786732753117878, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3346531391143799, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": true, "logits_per_token": -1.3346531391143799, "logits_per_char": -0.6673265695571899, "num_chars": 2}, {"sum_logits": -1.4302042722702026, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.4302042722702026, "logits_per_char": -0.7151021361351013, "num_chars": 2}, {"sum_logits": -1.433350920677185, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.433350920677185, "logits_per_char": -0.7166754603385925, "num_chars": 2}, {"sum_logits": -1.3711624145507812, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.3711624145507812, "logits_per_char": -0.6855812072753906, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 239, "native_id": "7-1018", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4177747964859009, "incorrect_loss_raw": 1.3853628238042195, "correct_loss_per_char": 0.7088873982429504, "incorrect_loss_per_char": 0.6926814119021097, "correct_loss_per_token": 1.4177747964859009, "incorrect_loss_per_token": 1.3853628238042195, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.293297529220581, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": true, "logits_per_token": -1.293297529220581, "logits_per_char": -0.6466487646102905, "num_chars": 2}, {"sum_logits": -1.4233239889144897, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4233239889144897, "logits_per_char": -0.7116619944572449, "num_chars": 2}, {"sum_logits": -1.439466953277588, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.439466953277588, "logits_per_char": -0.719733476638794, "num_chars": 2}, {"sum_logits": -1.4177747964859009, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4177747964859009, "logits_per_char": -0.7088873982429504, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 240, "native_id": "1756", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.254528284072876, "incorrect_loss_raw": 1.447612961133321, "correct_loss_per_char": 0.627264142036438, "incorrect_loss_per_char": 0.7238064805666605, "correct_loss_per_token": 1.254528284072876, "incorrect_loss_per_token": 1.447612961133321, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.254528284072876, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": true, "logits_per_token": -1.254528284072876, "logits_per_char": -0.627264142036438, "num_chars": 2}, {"sum_logits": -1.40669846534729, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.40669846534729, "logits_per_char": -0.703349232673645, "num_chars": 2}, {"sum_logits": -1.4044510126113892, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.4044510126113892, "logits_per_char": -0.7022255063056946, "num_chars": 2}, {"sum_logits": -1.5316894054412842, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.5316894054412842, "logits_per_char": -0.7658447027206421, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 241, "native_id": "1137", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5266304016113281, "incorrect_loss_raw": 1.3572216431299846, "correct_loss_per_char": 0.7633152008056641, "incorrect_loss_per_char": 0.6786108215649923, "correct_loss_per_token": 1.5266304016113281, "incorrect_loss_per_token": 1.3572216431299846, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2364416122436523, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.2364416122436523, "logits_per_char": -0.6182208061218262, "num_chars": 2}, {"sum_logits": -1.4077954292297363, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4077954292297363, "logits_per_char": -0.7038977146148682, "num_chars": 2}, {"sum_logits": -1.427427887916565, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.427427887916565, "logits_per_char": -0.7137139439582825, "num_chars": 2}, {"sum_logits": -1.5266304016113281, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.5266304016113281, "logits_per_char": -0.7633152008056641, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 242, "native_id": "7-203", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5423789024353027, "incorrect_loss_raw": 1.3716471592585247, "correct_loss_per_char": 0.7711894512176514, "incorrect_loss_per_char": 0.6858235796292623, "correct_loss_per_token": 1.5423789024353027, "incorrect_loss_per_token": 1.3716471592585247, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.077696442604065, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": true, "logits_per_token": -1.077696442604065, "logits_per_char": -0.5388482213020325, "num_chars": 2}, {"sum_logits": -1.4510365724563599, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.4510365724563599, "logits_per_char": -0.7255182862281799, "num_chars": 2}, {"sum_logits": -1.586208462715149, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.586208462715149, "logits_per_char": -0.7931042313575745, "num_chars": 2}, {"sum_logits": -1.5423789024353027, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.5423789024353027, "logits_per_char": -0.7711894512176514, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 243, "native_id": "745", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4030122756958008, "incorrect_loss_raw": 1.3970808585484822, "correct_loss_per_char": 0.7015061378479004, "incorrect_loss_per_char": 0.6985404292742411, "correct_loss_per_token": 1.4030122756958008, "incorrect_loss_per_token": 1.3970808585484822, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3705317974090576, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.3705317974090576, "logits_per_char": -0.6852658987045288, "num_chars": 2}, {"sum_logits": -1.4030122756958008, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.4030122756958008, "logits_per_char": -0.7015061378479004, "num_chars": 2}, {"sum_logits": -1.2663792371749878, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": true, "logits_per_token": -1.2663792371749878, "logits_per_char": -0.6331896185874939, "num_chars": 2}, {"sum_logits": -1.5543315410614014, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.5543315410614014, "logits_per_char": -0.7771657705307007, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 244, "native_id": "7-902", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3027453422546387, "incorrect_loss_raw": 1.4240894317626953, "correct_loss_per_char": 0.6513726711273193, "incorrect_loss_per_char": 0.7120447158813477, "correct_loss_per_token": 1.3027453422546387, "incorrect_loss_per_token": 1.4240894317626953, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3027453422546387, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.3027453422546387, "logits_per_char": -0.6513726711273193, "num_chars": 2}, {"sum_logits": -1.435481071472168, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.435481071472168, "logits_per_char": -0.717740535736084, "num_chars": 2}, {"sum_logits": -1.480567216873169, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.480567216873169, "logits_per_char": -0.7402836084365845, "num_chars": 2}, {"sum_logits": -1.356220006942749, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.356220006942749, "logits_per_char": -0.6781100034713745, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 245, "native_id": "1095", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1423283815383911, "incorrect_loss_raw": 1.4948808749516804, "correct_loss_per_char": 0.5711641907691956, "incorrect_loss_per_char": 0.7474404374758402, "correct_loss_per_token": 1.1423283815383911, "incorrect_loss_per_token": 1.4948808749516804, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1423283815383911, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": true, "logits_per_token": -1.1423283815383911, "logits_per_char": -0.5711641907691956, "num_chars": 2}, {"sum_logits": -1.4604581594467163, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.4604581594467163, "logits_per_char": -0.7302290797233582, "num_chars": 2}, {"sum_logits": -1.5873936414718628, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.5873936414718628, "logits_per_char": -0.7936968207359314, "num_chars": 2}, {"sum_logits": -1.4367908239364624, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.4367908239364624, "logits_per_char": -0.7183954119682312, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 246, "native_id": "7-163", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2909682989120483, "incorrect_loss_raw": 1.434002161026001, "correct_loss_per_char": 0.6454841494560242, "incorrect_loss_per_char": 0.7170010805130005, "correct_loss_per_token": 1.2909682989120483, "incorrect_loss_per_token": 1.434002161026001, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2909682989120483, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.2909682989120483, "logits_per_char": -0.6454841494560242, "num_chars": 2}, {"sum_logits": -1.5065209865570068, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.5065209865570068, "logits_per_char": -0.7532604932785034, "num_chars": 2}, {"sum_logits": -1.4986284971237183, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4986284971237183, "logits_per_char": -0.7493142485618591, "num_chars": 2}, {"sum_logits": -1.2968569993972778, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.2968569993972778, "logits_per_char": -0.6484284996986389, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 247, "native_id": "9-858", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4245290756225586, "incorrect_loss_raw": 1.3918040593465169, "correct_loss_per_char": 0.7122645378112793, "incorrect_loss_per_char": 0.6959020296732584, "correct_loss_per_token": 1.4245290756225586, "incorrect_loss_per_token": 1.3918040593465169, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2196708917617798, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.2196708917617798, "logits_per_char": -0.6098354458808899, "num_chars": 2}, {"sum_logits": -1.3913320302963257, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.3913320302963257, "logits_per_char": -0.6956660151481628, "num_chars": 2}, {"sum_logits": -1.4245290756225586, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4245290756225586, "logits_per_char": -0.7122645378112793, "num_chars": 2}, {"sum_logits": -1.5644092559814453, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.5644092559814453, "logits_per_char": -0.7822046279907227, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 248, "native_id": "1530", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3921180963516235, "incorrect_loss_raw": 1.3956819772720337, "correct_loss_per_char": 0.6960590481758118, "incorrect_loss_per_char": 0.6978409886360168, "correct_loss_per_token": 1.3921180963516235, "incorrect_loss_per_token": 1.3956819772720337, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.318911075592041, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": true, "logits_per_token": -1.318911075592041, "logits_per_char": -0.6594555377960205, "num_chars": 2}, {"sum_logits": -1.4837173223495483, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.4837173223495483, "logits_per_char": -0.7418586611747742, "num_chars": 2}, {"sum_logits": -1.3844175338745117, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.3844175338745117, "logits_per_char": -0.6922087669372559, "num_chars": 2}, {"sum_logits": -1.3921180963516235, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.3921180963516235, "logits_per_char": -0.6960590481758118, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 249, "native_id": "9-993", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5227631330490112, "incorrect_loss_raw": 1.3567474683125813, "correct_loss_per_char": 0.7613815665245056, "incorrect_loss_per_char": 0.6783737341562907, "correct_loss_per_token": 1.5227631330490112, "incorrect_loss_per_token": 1.3567474683125813, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2400013208389282, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.2400013208389282, "logits_per_char": -0.6200006604194641, "num_chars": 2}, {"sum_logits": -1.4342786073684692, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4342786073684692, "logits_per_char": -0.7171393036842346, "num_chars": 2}, {"sum_logits": -1.3959624767303467, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.3959624767303467, "logits_per_char": -0.6979812383651733, "num_chars": 2}, {"sum_logits": -1.5227631330490112, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.5227631330490112, "logits_per_char": -0.7613815665245056, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 250, "native_id": "8-340", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4962584972381592, "incorrect_loss_raw": 1.3676005601882935, "correct_loss_per_char": 0.7481292486190796, "incorrect_loss_per_char": 0.6838002800941467, "correct_loss_per_token": 1.4962584972381592, "incorrect_loss_per_token": 1.3676005601882935, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.232447624206543, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.232447624206543, "logits_per_char": -0.6162238121032715, "num_chars": 2}, {"sum_logits": -1.492595911026001, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.492595911026001, "logits_per_char": -0.7462979555130005, "num_chars": 2}, {"sum_logits": -1.3777581453323364, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.3777581453323364, "logits_per_char": -0.6888790726661682, "num_chars": 2}, {"sum_logits": -1.4962584972381592, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4962584972381592, "logits_per_char": -0.7481292486190796, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 251, "native_id": "3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4554824829101562, "incorrect_loss_raw": 1.3959664106369019, "correct_loss_per_char": 0.7277412414550781, "incorrect_loss_per_char": 0.6979832053184509, "correct_loss_per_token": 1.4554824829101562, "incorrect_loss_per_token": 1.3959664106369019, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1241605281829834, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": true, "logits_per_token": -1.1241605281829834, "logits_per_char": -0.5620802640914917, "num_chars": 2}, {"sum_logits": -1.4554824829101562, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.4554824829101562, "logits_per_char": -0.7277412414550781, "num_chars": 2}, {"sum_logits": -1.468597173690796, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.468597173690796, "logits_per_char": -0.734298586845398, "num_chars": 2}, {"sum_logits": -1.5951415300369263, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.5951415300369263, "logits_per_char": -0.7975707650184631, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 252, "native_id": "1074", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2700748443603516, "incorrect_loss_raw": 1.44697904586792, "correct_loss_per_char": 0.6350374221801758, "incorrect_loss_per_char": 0.72348952293396, "correct_loss_per_token": 1.2700748443603516, "incorrect_loss_per_token": 1.44697904586792, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2501193284988403, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": true, "logits_per_token": -1.2501193284988403, "logits_per_char": -0.6250596642494202, "num_chars": 2}, {"sum_logits": -1.4757131338119507, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.4757131338119507, "logits_per_char": -0.7378565669059753, "num_chars": 2}, {"sum_logits": -1.2700748443603516, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.2700748443603516, "logits_per_char": -0.6350374221801758, "num_chars": 2}, {"sum_logits": -1.6151046752929688, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.6151046752929688, "logits_per_char": -0.8075523376464844, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 253, "native_id": "9-431", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.223084807395935, "incorrect_loss_raw": 1.4588499069213867, "correct_loss_per_char": 0.6115424036979675, "incorrect_loss_per_char": 0.7294249534606934, "correct_loss_per_token": 1.223084807395935, "incorrect_loss_per_token": 1.4588499069213867, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.223084807395935, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": true, "logits_per_token": -1.223084807395935, "logits_per_char": -0.6115424036979675, "num_chars": 2}, {"sum_logits": -1.3628826141357422, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.3628826141357422, "logits_per_char": -0.6814413070678711, "num_chars": 2}, {"sum_logits": -1.4610178470611572, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4610178470611572, "logits_per_char": -0.7305089235305786, "num_chars": 2}, {"sum_logits": -1.5526492595672607, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.5526492595672607, "logits_per_char": -0.7763246297836304, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 254, "native_id": "9-638", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.398591160774231, "incorrect_loss_raw": 1.3954962094624836, "correct_loss_per_char": 0.6992955803871155, "incorrect_loss_per_char": 0.6977481047312418, "correct_loss_per_token": 1.398591160774231, "incorrect_loss_per_token": 1.3954962094624836, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2700669765472412, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.2700669765472412, "logits_per_char": -0.6350334882736206, "num_chars": 2}, {"sum_logits": -1.398591160774231, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.398591160774231, "logits_per_char": -0.6992955803871155, "num_chars": 2}, {"sum_logits": -1.3827241659164429, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.3827241659164429, "logits_per_char": -0.6913620829582214, "num_chars": 2}, {"sum_logits": -1.533697485923767, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.533697485923767, "logits_per_char": -0.7668487429618835, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 255, "native_id": "9-352", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2862725257873535, "incorrect_loss_raw": 1.4323800802230835, "correct_loss_per_char": 0.6431362628936768, "incorrect_loss_per_char": 0.7161900401115417, "correct_loss_per_token": 1.2862725257873535, "incorrect_loss_per_token": 1.4323800802230835, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4736032485961914, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4736032485961914, "logits_per_char": -0.7368016242980957, "num_chars": 2}, {"sum_logits": -1.4666756391525269, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4666756391525269, "logits_per_char": -0.7333378195762634, "num_chars": 2}, {"sum_logits": -1.3568613529205322, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.3568613529205322, "logits_per_char": -0.6784306764602661, "num_chars": 2}, {"sum_logits": -1.2862725257873535, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": true, "logits_per_token": -1.2862725257873535, "logits_per_char": -0.6431362628936768, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 256, "native_id": "226", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4733288288116455, "incorrect_loss_raw": 1.3755840460459392, "correct_loss_per_char": 0.7366644144058228, "incorrect_loss_per_char": 0.6877920230229696, "correct_loss_per_token": 1.4733288288116455, "incorrect_loss_per_token": 1.3755840460459392, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2385375499725342, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": true, "logits_per_token": -1.2385375499725342, "logits_per_char": -0.6192687749862671, "num_chars": 2}, {"sum_logits": -1.533107042312622, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.533107042312622, "logits_per_char": -0.766553521156311, "num_chars": 2}, {"sum_logits": -1.3551075458526611, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.3551075458526611, "logits_per_char": -0.6775537729263306, "num_chars": 2}, {"sum_logits": -1.4733288288116455, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.4733288288116455, "logits_per_char": -0.7366644144058228, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 257, "native_id": "9-132", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.260015606880188, "incorrect_loss_raw": 1.4451828797658284, "correct_loss_per_char": 0.630007803440094, "incorrect_loss_per_char": 0.7225914398829142, "correct_loss_per_token": 1.260015606880188, "incorrect_loss_per_token": 1.4451828797658284, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.260015606880188, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": true, "logits_per_token": -1.260015606880188, "logits_per_char": -0.630007803440094, "num_chars": 2}, {"sum_logits": -1.5531871318817139, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.5531871318817139, "logits_per_char": -0.7765935659408569, "num_chars": 2}, {"sum_logits": -1.4339556694030762, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.4339556694030762, "logits_per_char": -0.7169778347015381, "num_chars": 2}, {"sum_logits": -1.3484058380126953, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.3484058380126953, "logits_per_char": -0.6742029190063477, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 258, "native_id": "9-222", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6244726181030273, "incorrect_loss_raw": 1.3415272235870361, "correct_loss_per_char": 0.8122363090515137, "incorrect_loss_per_char": 0.6707636117935181, "correct_loss_per_token": 1.6244726181030273, "incorrect_loss_per_token": 1.3415272235870361, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1010297536849976, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": true, "logits_per_token": -1.1010297536849976, "logits_per_char": -0.5505148768424988, "num_chars": 2}, {"sum_logits": -1.4967471361160278, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4967471361160278, "logits_per_char": -0.7483735680580139, "num_chars": 2}, {"sum_logits": -1.426804780960083, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.426804780960083, "logits_per_char": -0.7134023904800415, "num_chars": 2}, {"sum_logits": -1.6244726181030273, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.6244726181030273, "logits_per_char": -0.8122363090515137, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 259, "native_id": "9-105", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3522682189941406, "incorrect_loss_raw": 1.4144881963729858, "correct_loss_per_char": 0.6761341094970703, "incorrect_loss_per_char": 0.7072440981864929, "correct_loss_per_token": 1.3522682189941406, "incorrect_loss_per_token": 1.4144881963729858, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2576713562011719, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": true, "logits_per_token": -1.2576713562011719, "logits_per_char": -0.6288356781005859, "num_chars": 2}, {"sum_logits": -1.5555756092071533, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.5555756092071533, "logits_per_char": -0.7777878046035767, "num_chars": 2}, {"sum_logits": -1.4302176237106323, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.4302176237106323, "logits_per_char": -0.7151088118553162, "num_chars": 2}, {"sum_logits": -1.3522682189941406, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.3522682189941406, "logits_per_char": -0.6761341094970703, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 260, "native_id": "7-459", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3380448818206787, "incorrect_loss_raw": 1.429693619410197, "correct_loss_per_char": 0.6690224409103394, "incorrect_loss_per_char": 0.7148468097050985, "correct_loss_per_token": 1.3380448818206787, "incorrect_loss_per_token": 1.429693619410197, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2085174322128296, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": true, "logits_per_token": -1.2085174322128296, "logits_per_char": -0.6042587161064148, "num_chars": 2}, {"sum_logits": -1.3380448818206787, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.3380448818206787, "logits_per_char": -0.6690224409103394, "num_chars": 2}, {"sum_logits": -1.4020109176635742, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.4020109176635742, "logits_per_char": -0.7010054588317871, "num_chars": 2}, {"sum_logits": -1.678552508354187, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.678552508354187, "logits_per_char": -0.8392762541770935, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 261, "native_id": "9-881", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4607758522033691, "incorrect_loss_raw": 1.37180757522583, "correct_loss_per_char": 0.7303879261016846, "incorrect_loss_per_char": 0.685903787612915, "correct_loss_per_token": 1.4607758522033691, "incorrect_loss_per_token": 1.37180757522583, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2905131578445435, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": true, "logits_per_token": -1.2905131578445435, "logits_per_char": -0.6452565789222717, "num_chars": 2}, {"sum_logits": -1.4607758522033691, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4607758522033691, "logits_per_char": -0.7303879261016846, "num_chars": 2}, {"sum_logits": -1.3813809156417847, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.3813809156417847, "logits_per_char": -0.6906904578208923, "num_chars": 2}, {"sum_logits": -1.443528652191162, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.443528652191162, "logits_per_char": -0.721764326095581, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 262, "native_id": "280", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5171622037887573, "incorrect_loss_raw": 1.36540949344635, "correct_loss_per_char": 0.7585811018943787, "incorrect_loss_per_char": 0.682704746723175, "correct_loss_per_token": 1.5171622037887573, "incorrect_loss_per_token": 1.36540949344635, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1864385604858398, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.1864385604858398, "logits_per_char": -0.5932192802429199, "num_chars": 2}, {"sum_logits": -1.5171622037887573, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.5171622037887573, "logits_per_char": -0.7585811018943787, "num_chars": 2}, {"sum_logits": -1.3821136951446533, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.3821136951446533, "logits_per_char": -0.6910568475723267, "num_chars": 2}, {"sum_logits": -1.5276762247085571, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.5276762247085571, "logits_per_char": -0.7638381123542786, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 263, "native_id": "187", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.490817904472351, "incorrect_loss_raw": 1.3624756733576457, "correct_loss_per_char": 0.7454089522361755, "incorrect_loss_per_char": 0.6812378366788229, "correct_loss_per_token": 1.490817904472351, "incorrect_loss_per_token": 1.3624756733576457, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3208260536193848, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": true, "logits_per_token": -1.3208260536193848, "logits_per_char": -0.6604130268096924, "num_chars": 2}, {"sum_logits": -1.490817904472351, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.490817904472351, "logits_per_char": -0.7454089522361755, "num_chars": 2}, {"sum_logits": -1.3863774538040161, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.3863774538040161, "logits_per_char": -0.6931887269020081, "num_chars": 2}, {"sum_logits": -1.3802235126495361, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.3802235126495361, "logits_per_char": -0.6901117563247681, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 264, "native_id": "8-253", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4986119270324707, "incorrect_loss_raw": 1.3853582541147869, "correct_loss_per_char": 0.7493059635162354, "incorrect_loss_per_char": 0.6926791270573934, "correct_loss_per_token": 1.4986119270324707, "incorrect_loss_per_token": 1.3853582541147869, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1435202360153198, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": true, "logits_per_token": -1.1435202360153198, "logits_per_char": -0.5717601180076599, "num_chars": 2}, {"sum_logits": -1.5523979663848877, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.5523979663848877, "logits_per_char": -0.7761989831924438, "num_chars": 2}, {"sum_logits": -1.4986119270324707, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.4986119270324707, "logits_per_char": -0.7493059635162354, "num_chars": 2}, {"sum_logits": -1.4601565599441528, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.4601565599441528, "logits_per_char": -0.7300782799720764, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 265, "native_id": "9-482", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4004225730895996, "incorrect_loss_raw": 1.3972231944402058, "correct_loss_per_char": 0.7002112865447998, "incorrect_loss_per_char": 0.6986115972201029, "correct_loss_per_token": 1.4004225730895996, "incorrect_loss_per_token": 1.3972231944402058, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.237684965133667, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": true, "logits_per_token": -1.237684965133667, "logits_per_char": -0.6188424825668335, "num_chars": 2}, {"sum_logits": -1.5370320081710815, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.5370320081710815, "logits_per_char": -0.7685160040855408, "num_chars": 2}, {"sum_logits": -1.4169526100158691, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4169526100158691, "logits_per_char": -0.7084763050079346, "num_chars": 2}, {"sum_logits": -1.4004225730895996, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4004225730895996, "logits_per_char": -0.7002112865447998, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 266, "native_id": "496", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2711573839187622, "incorrect_loss_raw": 1.4381962617238362, "correct_loss_per_char": 0.6355786919593811, "incorrect_loss_per_char": 0.7190981308619181, "correct_loss_per_token": 1.2711573839187622, "incorrect_loss_per_token": 1.4381962617238362, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2711573839187622, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": true, "logits_per_token": -1.2711573839187622, "logits_per_char": -0.6355786919593811, "num_chars": 2}, {"sum_logits": -1.4375959634780884, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.4375959634780884, "logits_per_char": -0.7187979817390442, "num_chars": 2}, {"sum_logits": -1.4022976160049438, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.4022976160049438, "logits_per_char": -0.7011488080024719, "num_chars": 2}, {"sum_logits": -1.4746952056884766, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.4746952056884766, "logits_per_char": -0.7373476028442383, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 267, "native_id": "630", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.413453221321106, "incorrect_loss_raw": 1.4067504008611043, "correct_loss_per_char": 0.706726610660553, "incorrect_loss_per_char": 0.7033752004305521, "correct_loss_per_token": 1.413453221321106, "incorrect_loss_per_token": 1.4067504008611043, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1573832035064697, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.1573832035064697, "logits_per_char": -0.5786916017532349, "num_chars": 2}, {"sum_logits": -1.458860993385315, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.458860993385315, "logits_per_char": -0.7294304966926575, "num_chars": 2}, {"sum_logits": -1.413453221321106, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.413453221321106, "logits_per_char": -0.706726610660553, "num_chars": 2}, {"sum_logits": -1.6040070056915283, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.6040070056915283, "logits_per_char": -0.8020035028457642, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 268, "native_id": "9-16", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4614461660385132, "incorrect_loss_raw": 1.383073369661967, "correct_loss_per_char": 0.7307230830192566, "incorrect_loss_per_char": 0.6915366848309835, "correct_loss_per_token": 1.4614461660385132, "incorrect_loss_per_token": 1.383073369661967, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2087681293487549, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": true, "logits_per_token": -1.2087681293487549, "logits_per_char": -0.6043840646743774, "num_chars": 2}, {"sum_logits": -1.374739170074463, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.374739170074463, "logits_per_char": -0.6873695850372314, "num_chars": 2}, {"sum_logits": -1.565712809562683, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.565712809562683, "logits_per_char": -0.7828564047813416, "num_chars": 2}, {"sum_logits": -1.4614461660385132, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4614461660385132, "logits_per_char": -0.7307230830192566, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 269, "native_id": "7-986", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5033371448516846, "incorrect_loss_raw": 1.3890087207158406, "correct_loss_per_char": 0.7516685724258423, "incorrect_loss_per_char": 0.6945043603579203, "correct_loss_per_token": 1.5033371448516846, "incorrect_loss_per_token": 1.3890087207158406, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0444375276565552, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": true, "logits_per_token": -1.0444375276565552, "logits_per_char": -0.5222187638282776, "num_chars": 2}, {"sum_logits": -1.5018616914749146, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.5018616914749146, "logits_per_char": -0.7509308457374573, "num_chars": 2}, {"sum_logits": -1.6207269430160522, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.6207269430160522, "logits_per_char": -0.8103634715080261, "num_chars": 2}, {"sum_logits": -1.5033371448516846, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.5033371448516846, "logits_per_char": -0.7516685724258423, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 270, "native_id": "7-787", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4387685060501099, "incorrect_loss_raw": 1.3900471528371174, "correct_loss_per_char": 0.7193842530250549, "incorrect_loss_per_char": 0.6950235764185587, "correct_loss_per_token": 1.4387685060501099, "incorrect_loss_per_token": 1.3900471528371174, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2184419631958008, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.2184419631958008, "logits_per_char": -0.6092209815979004, "num_chars": 2}, {"sum_logits": -1.4325913190841675, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4325913190841675, "logits_per_char": -0.7162956595420837, "num_chars": 2}, {"sum_logits": -1.4387685060501099, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4387685060501099, "logits_per_char": -0.7193842530250549, "num_chars": 2}, {"sum_logits": -1.5191081762313843, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.5191081762313843, "logits_per_char": -0.7595540881156921, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 271, "native_id": "9-181", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2073217630386353, "incorrect_loss_raw": 1.4665969610214233, "correct_loss_per_char": 0.6036608815193176, "incorrect_loss_per_char": 0.7332984805107117, "correct_loss_per_token": 1.2073217630386353, "incorrect_loss_per_token": 1.4665969610214233, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2073217630386353, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.2073217630386353, "logits_per_char": -0.6036608815193176, "num_chars": 2}, {"sum_logits": -1.4748835563659668, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4748835563659668, "logits_per_char": -0.7374417781829834, "num_chars": 2}, {"sum_logits": -1.3601574897766113, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.3601574897766113, "logits_per_char": -0.6800787448883057, "num_chars": 2}, {"sum_logits": -1.564749836921692, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.564749836921692, "logits_per_char": -0.782374918460846, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 272, "native_id": "1240", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6270993947982788, "incorrect_loss_raw": 1.324802319208781, "correct_loss_per_char": 0.8135496973991394, "incorrect_loss_per_char": 0.6624011596043905, "correct_loss_per_token": 1.6270993947982788, "incorrect_loss_per_token": 1.324802319208781, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3514156341552734, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.3514156341552734, "logits_per_char": -0.6757078170776367, "num_chars": 2}, {"sum_logits": -1.6270993947982788, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.6270993947982788, "logits_per_char": -0.8135496973991394, "num_chars": 2}, {"sum_logits": -1.2670342922210693, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": true, "logits_per_token": -1.2670342922210693, "logits_per_char": -0.6335171461105347, "num_chars": 2}, {"sum_logits": -1.35595703125, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.35595703125, "logits_per_char": -0.677978515625, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 273, "native_id": "474", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.484523892402649, "incorrect_loss_raw": 1.3647611538569133, "correct_loss_per_char": 0.7422619462013245, "incorrect_loss_per_char": 0.6823805769284567, "correct_loss_per_token": 1.484523892402649, "incorrect_loss_per_token": 1.3647611538569133, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2800296545028687, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": true, "logits_per_token": -1.2800296545028687, "logits_per_char": -0.6400148272514343, "num_chars": 2}, {"sum_logits": -1.4201854467391968, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.4201854467391968, "logits_per_char": -0.7100927233695984, "num_chars": 2}, {"sum_logits": -1.3940683603286743, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.3940683603286743, "logits_per_char": -0.6970341801643372, "num_chars": 2}, {"sum_logits": -1.484523892402649, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.484523892402649, "logits_per_char": -0.7422619462013245, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 274, "native_id": "1274", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5586508512496948, "incorrect_loss_raw": 1.347080667813619, "correct_loss_per_char": 0.7793254256248474, "incorrect_loss_per_char": 0.6735403339068095, "correct_loss_per_token": 1.5586508512496948, "incorrect_loss_per_token": 1.347080667813619, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.241512656211853, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": true, "logits_per_token": -1.241512656211853, "logits_per_char": -0.6207563281059265, "num_chars": 2}, {"sum_logits": -1.469466209411621, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.469466209411621, "logits_per_char": -0.7347331047058105, "num_chars": 2}, {"sum_logits": -1.3302631378173828, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.3302631378173828, "logits_per_char": -0.6651315689086914, "num_chars": 2}, {"sum_logits": -1.5586508512496948, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.5586508512496948, "logits_per_char": -0.7793254256248474, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 275, "native_id": "1531", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5035836696624756, "incorrect_loss_raw": 1.3580111265182495, "correct_loss_per_char": 0.7517918348312378, "incorrect_loss_per_char": 0.6790055632591248, "correct_loss_per_token": 1.5035836696624756, "incorrect_loss_per_token": 1.3580111265182495, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.295117974281311, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": true, "logits_per_token": -1.295117974281311, "logits_per_char": -0.6475589871406555, "num_chars": 2}, {"sum_logits": -1.359376311302185, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.359376311302185, "logits_per_char": -0.6796881556510925, "num_chars": 2}, {"sum_logits": -1.4195390939712524, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.4195390939712524, "logits_per_char": -0.7097695469856262, "num_chars": 2}, {"sum_logits": -1.5035836696624756, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.5035836696624756, "logits_per_char": -0.7517918348312378, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 276, "native_id": "8-321", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2702044248580933, "incorrect_loss_raw": 1.4415754477183025, "correct_loss_per_char": 0.6351022124290466, "incorrect_loss_per_char": 0.7207877238591512, "correct_loss_per_token": 1.2702044248580933, "incorrect_loss_per_token": 1.4415754477183025, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3391735553741455, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.3391735553741455, "logits_per_char": -0.6695867776870728, "num_chars": 2}, {"sum_logits": -1.2702044248580933, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": true, "logits_per_token": -1.2702044248580933, "logits_per_char": -0.6351022124290466, "num_chars": 2}, {"sum_logits": -1.4628360271453857, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.4628360271453857, "logits_per_char": -0.7314180135726929, "num_chars": 2}, {"sum_logits": -1.522716760635376, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.522716760635376, "logits_per_char": -0.761358380317688, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 277, "native_id": "1321", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4238297939300537, "incorrect_loss_raw": 1.3833620150883992, "correct_loss_per_char": 0.7119148969650269, "incorrect_loss_per_char": 0.6916810075441996, "correct_loss_per_token": 1.4238297939300537, "incorrect_loss_per_token": 1.3833620150883992, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4078731536865234, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.4078731536865234, "logits_per_char": -0.7039365768432617, "num_chars": 2}, {"sum_logits": -1.4618664979934692, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.4618664979934692, "logits_per_char": -0.7309332489967346, "num_chars": 2}, {"sum_logits": -1.4238297939300537, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.4238297939300537, "logits_per_char": -0.7119148969650269, "num_chars": 2}, {"sum_logits": -1.280346393585205, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.280346393585205, "logits_per_char": -0.6401731967926025, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 278, "native_id": "9-51", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.301418423652649, "incorrect_loss_raw": 1.4270625511805217, "correct_loss_per_char": 0.6507092118263245, "incorrect_loss_per_char": 0.7135312755902609, "correct_loss_per_token": 1.301418423652649, "incorrect_loss_per_token": 1.4270625511805217, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.360305666923523, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.360305666923523, "logits_per_char": -0.6801528334617615, "num_chars": 2}, {"sum_logits": -1.4944298267364502, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4944298267364502, "logits_per_char": -0.7472149133682251, "num_chars": 2}, {"sum_logits": -1.4264521598815918, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4264521598815918, "logits_per_char": -0.7132260799407959, "num_chars": 2}, {"sum_logits": -1.301418423652649, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.301418423652649, "logits_per_char": -0.6507092118263245, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 279, "native_id": "7-685", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4514302015304565, "incorrect_loss_raw": 1.3771191835403442, "correct_loss_per_char": 0.7257151007652283, "incorrect_loss_per_char": 0.6885595917701721, "correct_loss_per_token": 1.4514302015304565, "incorrect_loss_per_token": 1.3771191835403442, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3199467658996582, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.3199467658996582, "logits_per_char": -0.6599733829498291, "num_chars": 2}, {"sum_logits": -1.4514302015304565, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4514302015304565, "logits_per_char": -0.7257151007652283, "num_chars": 2}, {"sum_logits": -1.343673586845398, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.343673586845398, "logits_per_char": -0.671836793422699, "num_chars": 2}, {"sum_logits": -1.4677371978759766, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4677371978759766, "logits_per_char": -0.7338685989379883, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 280, "native_id": "7-59", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1623975038528442, "incorrect_loss_raw": 1.4919517437616985, "correct_loss_per_char": 0.5811987519264221, "incorrect_loss_per_char": 0.7459758718808492, "correct_loss_per_token": 1.1623975038528442, "incorrect_loss_per_token": 1.4919517437616985, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1623975038528442, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": true, "logits_per_token": -1.1623975038528442, "logits_per_char": -0.5811987519264221, "num_chars": 2}, {"sum_logits": -1.378033995628357, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.378033995628357, "logits_per_char": -0.6890169978141785, "num_chars": 2}, {"sum_logits": -1.4047216176986694, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.4047216176986694, "logits_per_char": -0.7023608088493347, "num_chars": 2}, {"sum_logits": -1.6930996179580688, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.6930996179580688, "logits_per_char": -0.8465498089790344, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 281, "native_id": "7-270", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.546758770942688, "incorrect_loss_raw": 1.3552745580673218, "correct_loss_per_char": 0.773379385471344, "incorrect_loss_per_char": 0.6776372790336609, "correct_loss_per_token": 1.546758770942688, "incorrect_loss_per_token": 1.3552745580673218, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1797411441802979, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": true, "logits_per_token": -1.1797411441802979, "logits_per_char": -0.5898705720901489, "num_chars": 2}, {"sum_logits": -1.3897757530212402, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.3897757530212402, "logits_per_char": -0.6948878765106201, "num_chars": 2}, {"sum_logits": -1.4963067770004272, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.4963067770004272, "logits_per_char": -0.7481533885002136, "num_chars": 2}, {"sum_logits": -1.546758770942688, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.546758770942688, "logits_per_char": -0.773379385471344, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 282, "native_id": "7-736", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4843125343322754, "incorrect_loss_raw": 1.373896320660909, "correct_loss_per_char": 0.7421562671661377, "incorrect_loss_per_char": 0.6869481603304545, "correct_loss_per_token": 1.4843125343322754, "incorrect_loss_per_token": 1.373896320660909, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1958059072494507, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.1958059072494507, "logits_per_char": -0.5979029536247253, "num_chars": 2}, {"sum_logits": -1.5090515613555908, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.5090515613555908, "logits_per_char": -0.7545257806777954, "num_chars": 2}, {"sum_logits": -1.4843125343322754, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4843125343322754, "logits_per_char": -0.7421562671661377, "num_chars": 2}, {"sum_logits": -1.4168314933776855, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4168314933776855, "logits_per_char": -0.7084157466888428, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 283, "native_id": "8-186", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4602755308151245, "incorrect_loss_raw": 1.3789764245351155, "correct_loss_per_char": 0.7301377654075623, "incorrect_loss_per_char": 0.6894882122675577, "correct_loss_per_token": 1.4602755308151245, "incorrect_loss_per_token": 1.3789764245351155, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2031179666519165, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.2031179666519165, "logits_per_char": -0.6015589833259583, "num_chars": 2}, {"sum_logits": -1.4277364015579224, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4277364015579224, "logits_per_char": -0.7138682007789612, "num_chars": 2}, {"sum_logits": -1.5060749053955078, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.5060749053955078, "logits_per_char": -0.7530374526977539, "num_chars": 2}, {"sum_logits": -1.4602755308151245, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4602755308151245, "logits_per_char": -0.7301377654075623, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 284, "native_id": "224", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.550567865371704, "incorrect_loss_raw": 1.3516501188278198, "correct_loss_per_char": 0.775283932685852, "incorrect_loss_per_char": 0.6758250594139099, "correct_loss_per_token": 1.550567865371704, "incorrect_loss_per_token": 1.3516501188278198, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1954762935638428, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.1954762935638428, "logits_per_char": -0.5977381467819214, "num_chars": 2}, {"sum_logits": -1.550567865371704, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.550567865371704, "logits_per_char": -0.775283932685852, "num_chars": 2}, {"sum_logits": -1.3704978227615356, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.3704978227615356, "logits_per_char": -0.6852489113807678, "num_chars": 2}, {"sum_logits": -1.488976240158081, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.488976240158081, "logits_per_char": -0.7444881200790405, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 285, "native_id": "8-206", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.172928810119629, "incorrect_loss_raw": 1.480722705523173, "correct_loss_per_char": 0.5864644050598145, "incorrect_loss_per_char": 0.7403613527615865, "correct_loss_per_token": 1.172928810119629, "incorrect_loss_per_token": 1.480722705523173, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.172928810119629, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": true, "logits_per_token": -1.172928810119629, "logits_per_char": -0.5864644050598145, "num_chars": 2}, {"sum_logits": -1.4914534091949463, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4914534091949463, "logits_per_char": -0.7457267045974731, "num_chars": 2}, {"sum_logits": -1.5693469047546387, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.5693469047546387, "logits_per_char": -0.7846734523773193, "num_chars": 2}, {"sum_logits": -1.381367802619934, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.381367802619934, "logits_per_char": -0.690683901309967, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 286, "native_id": "8-190", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4101893901824951, "incorrect_loss_raw": 1.3973835706710815, "correct_loss_per_char": 0.7050946950912476, "incorrect_loss_per_char": 0.6986917853355408, "correct_loss_per_token": 1.4101893901824951, "incorrect_loss_per_token": 1.3973835706710815, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2151079177856445, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.2151079177856445, "logits_per_char": -0.6075539588928223, "num_chars": 2}, {"sum_logits": -1.4101893901824951, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4101893901824951, "logits_per_char": -0.7050946950912476, "num_chars": 2}, {"sum_logits": -1.3955397605895996, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.3955397605895996, "logits_per_char": -0.6977698802947998, "num_chars": 2}, {"sum_logits": -1.5815030336380005, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.5815030336380005, "logits_per_char": -0.7907515168190002, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 287, "native_id": "7-334", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4182097911834717, "incorrect_loss_raw": 1.3995213508605957, "correct_loss_per_char": 0.7091048955917358, "incorrect_loss_per_char": 0.6997606754302979, "correct_loss_per_token": 1.4182097911834717, "incorrect_loss_per_token": 1.3995213508605957, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1521550416946411, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.1521550416946411, "logits_per_char": -0.5760775208473206, "num_chars": 2}, {"sum_logits": -1.4182097911834717, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4182097911834717, "logits_per_char": -0.7091048955917358, "num_chars": 2}, {"sum_logits": -1.5123854875564575, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5123854875564575, "logits_per_char": -0.7561927437782288, "num_chars": 2}, {"sum_logits": -1.5340235233306885, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.5340235233306885, "logits_per_char": -0.7670117616653442, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 288, "native_id": "9-853", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.392164707183838, "incorrect_loss_raw": 1.3965799411137898, "correct_loss_per_char": 0.696082353591919, "incorrect_loss_per_char": 0.6982899705568949, "correct_loss_per_token": 1.392164707183838, "incorrect_loss_per_token": 1.3965799411137898, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.296858787536621, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": true, "logits_per_token": -1.296858787536621, "logits_per_char": -0.6484293937683105, "num_chars": 2}, {"sum_logits": -1.392164707183838, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.392164707183838, "logits_per_char": -0.696082353591919, "num_chars": 2}, {"sum_logits": -1.4912036657333374, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.4912036657333374, "logits_per_char": -0.7456018328666687, "num_chars": 2}, {"sum_logits": -1.4016773700714111, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.4016773700714111, "logits_per_char": -0.7008386850357056, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 289, "native_id": "8-367", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.224155068397522, "incorrect_loss_raw": 1.4551899830500286, "correct_loss_per_char": 0.612077534198761, "incorrect_loss_per_char": 0.7275949915250143, "correct_loss_per_token": 1.224155068397522, "incorrect_loss_per_token": 1.4551899830500286, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.224155068397522, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.224155068397522, "logits_per_char": -0.612077534198761, "num_chars": 2}, {"sum_logits": -1.465196132659912, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.465196132659912, "logits_per_char": -0.732598066329956, "num_chars": 2}, {"sum_logits": -1.4866477251052856, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4866477251052856, "logits_per_char": -0.7433238625526428, "num_chars": 2}, {"sum_logits": -1.4137260913848877, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4137260913848877, "logits_per_char": -0.7068630456924438, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 290, "native_id": "1047", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.340686321258545, "incorrect_loss_raw": 1.4112590551376343, "correct_loss_per_char": 0.6703431606292725, "incorrect_loss_per_char": 0.7056295275688171, "correct_loss_per_token": 1.340686321258545, "incorrect_loss_per_token": 1.4112590551376343, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.340686321258545, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": true, "logits_per_token": -1.340686321258545, "logits_per_char": -0.6703431606292725, "num_chars": 2}, {"sum_logits": -1.4514180421829224, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4514180421829224, "logits_per_char": -0.7257090210914612, "num_chars": 2}, {"sum_logits": -1.4266648292541504, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4266648292541504, "logits_per_char": -0.7133324146270752, "num_chars": 2}, {"sum_logits": -1.35569429397583, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.35569429397583, "logits_per_char": -0.677847146987915, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 291, "native_id": "9-454", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4786701202392578, "incorrect_loss_raw": 1.369230071703593, "correct_loss_per_char": 0.7393350601196289, "incorrect_loss_per_char": 0.6846150358517965, "correct_loss_per_token": 1.4786701202392578, "incorrect_loss_per_token": 1.369230071703593, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.445099115371704, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.445099115371704, "logits_per_char": -0.722549557685852, "num_chars": 2}, {"sum_logits": -1.3556321859359741, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.3556321859359741, "logits_per_char": -0.6778160929679871, "num_chars": 2}, {"sum_logits": -1.4786701202392578, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.4786701202392578, "logits_per_char": -0.7393350601196289, "num_chars": 2}, {"sum_logits": -1.3069589138031006, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": true, "logits_per_token": -1.3069589138031006, "logits_per_char": -0.6534794569015503, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 292, "native_id": "1572", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3567572832107544, "incorrect_loss_raw": 1.420724908510844, "correct_loss_per_char": 0.6783786416053772, "incorrect_loss_per_char": 0.710362454255422, "correct_loss_per_token": 1.3567572832107544, "incorrect_loss_per_token": 1.420724908510844, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2364106178283691, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": true, "logits_per_token": -1.2364106178283691, "logits_per_char": -0.6182053089141846, "num_chars": 2}, {"sum_logits": -1.6304771900177002, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.6304771900177002, "logits_per_char": -0.8152385950088501, "num_chars": 2}, {"sum_logits": -1.3567572832107544, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.3567572832107544, "logits_per_char": -0.6783786416053772, "num_chars": 2}, {"sum_logits": -1.3952869176864624, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.3952869176864624, "logits_per_char": -0.6976434588432312, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 293, "native_id": "8-373", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9314947128295898, "incorrect_loss_raw": 1.6332568724950154, "correct_loss_per_char": 0.4657473564147949, "incorrect_loss_per_char": 0.8166284362475077, "correct_loss_per_token": 0.9314947128295898, "incorrect_loss_per_token": 1.6332568724950154, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9314947128295898, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": true, "logits_per_token": -0.9314947128295898, "logits_per_char": -0.4657473564147949, "num_chars": 2}, {"sum_logits": -1.3820720911026, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.3820720911026, "logits_per_char": -0.6910360455513, "num_chars": 2}, {"sum_logits": -1.6826118230819702, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.6826118230819702, "logits_per_char": -0.8413059115409851, "num_chars": 2}, {"sum_logits": -1.835086703300476, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.835086703300476, "logits_per_char": -0.917543351650238, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 294, "native_id": "9-772", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1247138977050781, "incorrect_loss_raw": 1.5112775564193726, "correct_loss_per_char": 0.5623569488525391, "incorrect_loss_per_char": 0.7556387782096863, "correct_loss_per_token": 1.1247138977050781, "incorrect_loss_per_token": 1.5112775564193726, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1247138977050781, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": true, "logits_per_token": -1.1247138977050781, "logits_per_char": -0.5623569488525391, "num_chars": 2}, {"sum_logits": -1.4227832555770874, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.4227832555770874, "logits_per_char": -0.7113916277885437, "num_chars": 2}, {"sum_logits": -1.3906196355819702, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.3906196355819702, "logits_per_char": -0.6953098177909851, "num_chars": 2}, {"sum_logits": -1.72042977809906, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.72042977809906, "logits_per_char": -0.86021488904953, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 295, "native_id": "1852", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.315259575843811, "incorrect_loss_raw": 1.4208981196085613, "correct_loss_per_char": 0.6576297879219055, "incorrect_loss_per_char": 0.7104490598042806, "correct_loss_per_token": 1.315259575843811, "incorrect_loss_per_token": 1.4208981196085613, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.315259575843811, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": true, "logits_per_token": -1.315259575843811, "logits_per_char": -0.6576297879219055, "num_chars": 2}, {"sum_logits": -1.368910551071167, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.368910551071167, "logits_per_char": -0.6844552755355835, "num_chars": 2}, {"sum_logits": -1.4561561346054077, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.4561561346054077, "logits_per_char": -0.7280780673027039, "num_chars": 2}, {"sum_logits": -1.4376276731491089, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.4376276731491089, "logits_per_char": -0.7188138365745544, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 296, "native_id": "9-1090", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4740889072418213, "incorrect_loss_raw": 1.374403953552246, "correct_loss_per_char": 0.7370444536209106, "incorrect_loss_per_char": 0.687201976776123, "correct_loss_per_token": 1.4740889072418213, "incorrect_loss_per_token": 1.374403953552246, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2433940172195435, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.2433940172195435, "logits_per_char": -0.6216970086097717, "num_chars": 2}, {"sum_logits": -1.4180314540863037, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4180314540863037, "logits_per_char": -0.7090157270431519, "num_chars": 2}, {"sum_logits": -1.4617863893508911, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4617863893508911, "logits_per_char": -0.7308931946754456, "num_chars": 2}, {"sum_logits": -1.4740889072418213, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4740889072418213, "logits_per_char": -0.7370444536209106, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 297, "native_id": "7-769", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3628002405166626, "incorrect_loss_raw": 1.4180054664611816, "correct_loss_per_char": 0.6814001202583313, "incorrect_loss_per_char": 0.7090027332305908, "correct_loss_per_token": 1.3628002405166626, "incorrect_loss_per_token": 1.4180054664611816, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1752187013626099, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.1752187013626099, "logits_per_char": -0.5876093506813049, "num_chars": 2}, {"sum_logits": -1.4869194030761719, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4869194030761719, "logits_per_char": -0.7434597015380859, "num_chars": 2}, {"sum_logits": -1.3628002405166626, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.3628002405166626, "logits_per_char": -0.6814001202583313, "num_chars": 2}, {"sum_logits": -1.5918782949447632, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.5918782949447632, "logits_per_char": -0.7959391474723816, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 298, "native_id": "9-478", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3859082460403442, "incorrect_loss_raw": 1.4105713764826457, "correct_loss_per_char": 0.6929541230201721, "incorrect_loss_per_char": 0.7052856882413229, "correct_loss_per_token": 1.3859082460403442, "incorrect_loss_per_token": 1.4105713764826457, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2934365272521973, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.2934365272521973, "logits_per_char": -0.6467182636260986, "num_chars": 2}, {"sum_logits": -1.2806209325790405, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": true, "logits_per_token": -1.2806209325790405, "logits_per_char": -0.6403104662895203, "num_chars": 2}, {"sum_logits": -1.3859082460403442, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.3859082460403442, "logits_per_char": -0.6929541230201721, "num_chars": 2}, {"sum_logits": -1.6576566696166992, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.6576566696166992, "logits_per_char": -0.8288283348083496, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 299, "native_id": "448", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2285552024841309, "incorrect_loss_raw": 1.4560914039611816, "correct_loss_per_char": 0.6142776012420654, "incorrect_loss_per_char": 0.7280457019805908, "correct_loss_per_token": 1.2285552024841309, "incorrect_loss_per_token": 1.4560914039611816, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2285552024841309, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.2285552024841309, "logits_per_char": -0.6142776012420654, "num_chars": 2}, {"sum_logits": -1.3823051452636719, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.3823051452636719, "logits_per_char": -0.6911525726318359, "num_chars": 2}, {"sum_logits": -1.4533979892730713, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4533979892730713, "logits_per_char": -0.7266989946365356, "num_chars": 2}, {"sum_logits": -1.5325710773468018, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.5325710773468018, "logits_per_char": -0.7662855386734009, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 300, "native_id": "7-417", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.59785795211792, "incorrect_loss_raw": 1.3526378870010376, "correct_loss_per_char": 0.79892897605896, "incorrect_loss_per_char": 0.6763189435005188, "correct_loss_per_token": 1.59785795211792, "incorrect_loss_per_token": 1.3526378870010376, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1205289363861084, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.1205289363861084, "logits_per_char": -0.5602644681930542, "num_chars": 2}, {"sum_logits": -1.59785795211792, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.59785795211792, "logits_per_char": -0.79892897605896, "num_chars": 2}, {"sum_logits": -1.3960851430892944, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.3960851430892944, "logits_per_char": -0.6980425715446472, "num_chars": 2}, {"sum_logits": -1.54129958152771, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.54129958152771, "logits_per_char": -0.770649790763855, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 301, "native_id": "7-108", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4117127656936646, "incorrect_loss_raw": 1.4113186995188396, "correct_loss_per_char": 0.7058563828468323, "incorrect_loss_per_char": 0.7056593497594198, "correct_loss_per_token": 1.4117127656936646, "incorrect_loss_per_token": 1.4113186995188396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2192282676696777, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.2192282676696777, "logits_per_char": -0.6096141338348389, "num_chars": 2}, {"sum_logits": -1.7039215564727783, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.7039215564727783, "logits_per_char": -0.8519607782363892, "num_chars": 2}, {"sum_logits": -1.3108062744140625, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.3108062744140625, "logits_per_char": -0.6554031372070312, "num_chars": 2}, {"sum_logits": -1.4117127656936646, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4117127656936646, "logits_per_char": -0.7058563828468323, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 302, "native_id": "1506", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4196547269821167, "incorrect_loss_raw": 1.3858218987782795, "correct_loss_per_char": 0.7098273634910583, "incorrect_loss_per_char": 0.6929109493891398, "correct_loss_per_token": 1.4196547269821167, "incorrect_loss_per_token": 1.3858218987782795, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2618328332901, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": true, "logits_per_token": -1.2618328332901, "logits_per_char": -0.63091641664505, "num_chars": 2}, {"sum_logits": -1.4711155891418457, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.4711155891418457, "logits_per_char": -0.7355577945709229, "num_chars": 2}, {"sum_logits": -1.424517273902893, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.424517273902893, "logits_per_char": -0.7122586369514465, "num_chars": 2}, {"sum_logits": -1.4196547269821167, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.4196547269821167, "logits_per_char": -0.7098273634910583, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 303, "native_id": "1712", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.252912998199463, "incorrect_loss_raw": 1.4440552790959675, "correct_loss_per_char": 0.6264564990997314, "incorrect_loss_per_char": 0.7220276395479838, "correct_loss_per_token": 1.252912998199463, "incorrect_loss_per_token": 1.4440552790959675, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.252912998199463, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": true, "logits_per_token": -1.252912998199463, "logits_per_char": -0.6264564990997314, "num_chars": 2}, {"sum_logits": -1.4201147556304932, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4201147556304932, "logits_per_char": -0.7100573778152466, "num_chars": 2}, {"sum_logits": -1.3888543844223022, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.3888543844223022, "logits_per_char": -0.6944271922111511, "num_chars": 2}, {"sum_logits": -1.5231966972351074, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.5231966972351074, "logits_per_char": -0.7615983486175537, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 304, "native_id": "8-312", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5236287117004395, "incorrect_loss_raw": 1.367301344871521, "correct_loss_per_char": 0.7618143558502197, "incorrect_loss_per_char": 0.6836506724357605, "correct_loss_per_token": 1.5236287117004395, "incorrect_loss_per_token": 1.367301344871521, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.163704752922058, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.163704752922058, "logits_per_char": -0.581852376461029, "num_chars": 2}, {"sum_logits": -1.4664433002471924, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4664433002471924, "logits_per_char": -0.7332216501235962, "num_chars": 2}, {"sum_logits": -1.5236287117004395, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.5236287117004395, "logits_per_char": -0.7618143558502197, "num_chars": 2}, {"sum_logits": -1.4717559814453125, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4717559814453125, "logits_per_char": -0.7358779907226562, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 305, "native_id": "9-776", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3965580463409424, "incorrect_loss_raw": 1.4040988286336262, "correct_loss_per_char": 0.6982790231704712, "incorrect_loss_per_char": 0.7020494143168131, "correct_loss_per_token": 1.3965580463409424, "incorrect_loss_per_token": 1.4040988286336262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3965580463409424, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.3965580463409424, "logits_per_char": -0.6982790231704712, "num_chars": 2}, {"sum_logits": -1.5870550870895386, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.5870550870895386, "logits_per_char": -0.7935275435447693, "num_chars": 2}, {"sum_logits": -1.229521632194519, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": true, "logits_per_token": -1.229521632194519, "logits_per_char": -0.6147608160972595, "num_chars": 2}, {"sum_logits": -1.3957197666168213, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.3957197666168213, "logits_per_char": -0.6978598833084106, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 306, "native_id": "8-279", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.447823405265808, "incorrect_loss_raw": 1.395231048266093, "correct_loss_per_char": 0.723911702632904, "incorrect_loss_per_char": 0.6976155241330465, "correct_loss_per_token": 1.447823405265808, "incorrect_loss_per_token": 1.395231048266093, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1794090270996094, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.1794090270996094, "logits_per_char": -0.5897045135498047, "num_chars": 2}, {"sum_logits": -1.6378649473190308, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.6378649473190308, "logits_per_char": -0.8189324736595154, "num_chars": 2}, {"sum_logits": -1.447823405265808, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.447823405265808, "logits_per_char": -0.723911702632904, "num_chars": 2}, {"sum_logits": -1.3684191703796387, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.3684191703796387, "logits_per_char": -0.6842095851898193, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 307, "native_id": "9-621", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5256251096725464, "incorrect_loss_raw": 1.3565951585769653, "correct_loss_per_char": 0.7628125548362732, "incorrect_loss_per_char": 0.6782975792884827, "correct_loss_per_token": 1.5256251096725464, "incorrect_loss_per_token": 1.3565951585769653, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.210331916809082, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": true, "logits_per_token": -1.210331916809082, "logits_per_char": -0.605165958404541, "num_chars": 2}, {"sum_logits": -1.5256251096725464, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.5256251096725464, "logits_per_char": -0.7628125548362732, "num_chars": 2}, {"sum_logits": -1.4478873014450073, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.4478873014450073, "logits_per_char": -0.7239436507225037, "num_chars": 2}, {"sum_logits": -1.4115662574768066, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.4115662574768066, "logits_per_char": -0.7057831287384033, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 308, "native_id": "1823", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4676356315612793, "incorrect_loss_raw": 1.390513300895691, "correct_loss_per_char": 0.7338178157806396, "incorrect_loss_per_char": 0.6952566504478455, "correct_loss_per_token": 1.4676356315612793, "incorrect_loss_per_token": 1.390513300895691, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1158820390701294, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.1158820390701294, "logits_per_char": -0.5579410195350647, "num_chars": 2}, {"sum_logits": -1.4567739963531494, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4567739963531494, "logits_per_char": -0.7283869981765747, "num_chars": 2}, {"sum_logits": -1.4676356315612793, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4676356315612793, "logits_per_char": -0.7338178157806396, "num_chars": 2}, {"sum_logits": -1.598883867263794, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.598883867263794, "logits_per_char": -0.799441933631897, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 309, "native_id": "9-735", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4691088199615479, "incorrect_loss_raw": 1.3703713019688923, "correct_loss_per_char": 0.7345544099807739, "incorrect_loss_per_char": 0.6851856509844462, "correct_loss_per_token": 1.4691088199615479, "incorrect_loss_per_token": 1.3703713019688923, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2874808311462402, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": true, "logits_per_token": -1.2874808311462402, "logits_per_char": -0.6437404155731201, "num_chars": 2}, {"sum_logits": -1.4691088199615479, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.4691088199615479, "logits_per_char": -0.7345544099807739, "num_chars": 2}, {"sum_logits": -1.4487669467926025, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.4487669467926025, "logits_per_char": -0.7243834733963013, "num_chars": 2}, {"sum_logits": -1.3748661279678345, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.3748661279678345, "logits_per_char": -0.6874330639839172, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 310, "native_id": "7-1170", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4121423959732056, "incorrect_loss_raw": 1.3915284474690754, "correct_loss_per_char": 0.7060711979866028, "incorrect_loss_per_char": 0.6957642237345377, "correct_loss_per_token": 1.4121423959732056, "incorrect_loss_per_token": 1.3915284474690754, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2422925233840942, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": true, "logits_per_token": -1.2422925233840942, "logits_per_char": -0.6211462616920471, "num_chars": 2}, {"sum_logits": -1.4121423959732056, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.4121423959732056, "logits_per_char": -0.7060711979866028, "num_chars": 2}, {"sum_logits": -1.397035002708435, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.397035002708435, "logits_per_char": -0.6985175013542175, "num_chars": 2}, {"sum_logits": -1.5352578163146973, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.5352578163146973, "logits_per_char": -0.7676289081573486, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 311, "native_id": "1500", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4384506940841675, "incorrect_loss_raw": 1.3913142283757527, "correct_loss_per_char": 0.7192253470420837, "incorrect_loss_per_char": 0.6956571141878763, "correct_loss_per_token": 1.4384506940841675, "incorrect_loss_per_token": 1.3913142283757527, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4384506940841675, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4384506940841675, "logits_per_char": -0.7192253470420837, "num_chars": 2}, {"sum_logits": -1.3263972997665405, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.3263972997665405, "logits_per_char": -0.6631986498832703, "num_chars": 2}, {"sum_logits": -1.2893410921096802, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.2893410921096802, "logits_per_char": -0.6446705460548401, "num_chars": 2}, {"sum_logits": -1.5582042932510376, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.5582042932510376, "logits_per_char": -0.7791021466255188, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 312, "native_id": "342", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.470369577407837, "incorrect_loss_raw": 1.3864068587621052, "correct_loss_per_char": 0.7351847887039185, "incorrect_loss_per_char": 0.6932034293810526, "correct_loss_per_token": 1.470369577407837, "incorrect_loss_per_token": 1.3864068587621052, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1621752977371216, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": true, "logits_per_token": -1.1621752977371216, "logits_per_char": -0.5810876488685608, "num_chars": 2}, {"sum_logits": -1.470369577407837, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.470369577407837, "logits_per_char": -0.7351847887039185, "num_chars": 2}, {"sum_logits": -1.4354779720306396, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.4354779720306396, "logits_per_char": -0.7177389860153198, "num_chars": 2}, {"sum_logits": -1.5615673065185547, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.5615673065185547, "logits_per_char": -0.7807836532592773, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 313, "native_id": "7-356", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4876593351364136, "incorrect_loss_raw": 1.3753115733464558, "correct_loss_per_char": 0.7438296675682068, "incorrect_loss_per_char": 0.6876557866732279, "correct_loss_per_token": 1.4876593351364136, "incorrect_loss_per_token": 1.3753115733464558, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1698802709579468, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.1698802709579468, "logits_per_char": -0.5849401354789734, "num_chars": 2}, {"sum_logits": -1.5542783737182617, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.5542783737182617, "logits_per_char": -0.7771391868591309, "num_chars": 2}, {"sum_logits": -1.4017760753631592, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4017760753631592, "logits_per_char": -0.7008880376815796, "num_chars": 2}, {"sum_logits": -1.4876593351364136, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4876593351364136, "logits_per_char": -0.7438296675682068, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 314, "native_id": "78", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4312280416488647, "incorrect_loss_raw": 1.3925701379776, "correct_loss_per_char": 0.7156140208244324, "incorrect_loss_per_char": 0.6962850689888, "correct_loss_per_token": 1.4312280416488647, "incorrect_loss_per_token": 1.3925701379776, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2178422212600708, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": true, "logits_per_token": -1.2178422212600708, "logits_per_char": -0.6089211106300354, "num_chars": 2}, {"sum_logits": -1.4312280416488647, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4312280416488647, "logits_per_char": -0.7156140208244324, "num_chars": 2}, {"sum_logits": -1.3508607149124146, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.3508607149124146, "logits_per_char": -0.6754303574562073, "num_chars": 2}, {"sum_logits": -1.609007477760315, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.609007477760315, "logits_per_char": -0.8045037388801575, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 315, "native_id": "9-520", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3895856142044067, "incorrect_loss_raw": 1.410991112391154, "correct_loss_per_char": 0.6947928071022034, "incorrect_loss_per_char": 0.705495556195577, "correct_loss_per_token": 1.3895856142044067, "incorrect_loss_per_token": 1.410991112391154, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.235144853591919, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.235144853591919, "logits_per_char": -0.6175724267959595, "num_chars": 2}, {"sum_logits": -1.5887424945831299, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.5887424945831299, "logits_per_char": -0.7943712472915649, "num_chars": 2}, {"sum_logits": -1.3895856142044067, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.3895856142044067, "logits_per_char": -0.6947928071022034, "num_chars": 2}, {"sum_logits": -1.409085988998413, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.409085988998413, "logits_per_char": -0.7045429944992065, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 316, "native_id": "7-653", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4390733242034912, "incorrect_loss_raw": 1.3806971708933513, "correct_loss_per_char": 0.7195366621017456, "incorrect_loss_per_char": 0.6903485854466757, "correct_loss_per_token": 1.4390733242034912, "incorrect_loss_per_token": 1.3806971708933513, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3615553379058838, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.3615553379058838, "logits_per_char": -0.6807776689529419, "num_chars": 2}, {"sum_logits": -1.5035420656204224, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.5035420656204224, "logits_per_char": -0.7517710328102112, "num_chars": 2}, {"sum_logits": -1.4390733242034912, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.4390733242034912, "logits_per_char": -0.7195366621017456, "num_chars": 2}, {"sum_logits": -1.2769941091537476, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": true, "logits_per_token": -1.2769941091537476, "logits_per_char": -0.6384970545768738, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 317, "native_id": "1112", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.394339680671692, "incorrect_loss_raw": 1.4409391085306804, "correct_loss_per_char": 0.697169840335846, "incorrect_loss_per_char": 0.7204695542653402, "correct_loss_per_token": 1.394339680671692, "incorrect_loss_per_token": 1.4409391085306804, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0330312252044678, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": true, "logits_per_token": -1.0330312252044678, "logits_per_char": -0.5165156126022339, "num_chars": 2}, {"sum_logits": -1.394339680671692, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.394339680671692, "logits_per_char": -0.697169840335846, "num_chars": 2}, {"sum_logits": -1.5077427625656128, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.5077427625656128, "logits_per_char": -0.7538713812828064, "num_chars": 2}, {"sum_logits": -1.7820433378219604, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.7820433378219604, "logits_per_char": -0.8910216689109802, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 318, "native_id": "9-152", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4440187215805054, "incorrect_loss_raw": 1.4007387161254883, "correct_loss_per_char": 0.7220093607902527, "incorrect_loss_per_char": 0.7003693580627441, "correct_loss_per_token": 1.4440187215805054, "incorrect_loss_per_token": 1.4007387161254883, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1201071739196777, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.1201071739196777, "logits_per_char": -0.5600535869598389, "num_chars": 2}, {"sum_logits": -1.4440187215805054, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4440187215805054, "logits_per_char": -0.7220093607902527, "num_chars": 2}, {"sum_logits": -1.4209504127502441, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4209504127502441, "logits_per_char": -0.7104752063751221, "num_chars": 2}, {"sum_logits": -1.661158561706543, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.661158561706543, "logits_per_char": -0.8305792808532715, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 319, "native_id": "9-552", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.354289174079895, "incorrect_loss_raw": 1.4168963034947712, "correct_loss_per_char": 0.6771445870399475, "incorrect_loss_per_char": 0.7084481517473856, "correct_loss_per_token": 1.354289174079895, "incorrect_loss_per_token": 1.4168963034947712, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2141315937042236, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": true, "logits_per_token": -1.2141315937042236, "logits_per_char": -0.6070657968521118, "num_chars": 2}, {"sum_logits": -1.354289174079895, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.354289174079895, "logits_per_char": -0.6771445870399475, "num_chars": 2}, {"sum_logits": -1.5066909790039062, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.5066909790039062, "logits_per_char": -0.7533454895019531, "num_chars": 2}, {"sum_logits": -1.529866337776184, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.529866337776184, "logits_per_char": -0.764933168888092, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 320, "native_id": "7-262", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1513539552688599, "incorrect_loss_raw": 1.4923265377680461, "correct_loss_per_char": 0.5756769776344299, "incorrect_loss_per_char": 0.7461632688840231, "correct_loss_per_token": 1.1513539552688599, "incorrect_loss_per_token": 1.4923265377680461, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1513539552688599, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.1513539552688599, "logits_per_char": -0.5756769776344299, "num_chars": 2}, {"sum_logits": -1.3909536600112915, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.3909536600112915, "logits_per_char": -0.6954768300056458, "num_chars": 2}, {"sum_logits": -1.4578123092651367, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4578123092651367, "logits_per_char": -0.7289061546325684, "num_chars": 2}, {"sum_logits": -1.62821364402771, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.62821364402771, "logits_per_char": -0.814106822013855, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 321, "native_id": "7-683", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5531389713287354, "incorrect_loss_raw": 1.351942539215088, "correct_loss_per_char": 0.7765694856643677, "incorrect_loss_per_char": 0.675971269607544, "correct_loss_per_token": 1.5531389713287354, "incorrect_loss_per_token": 1.351942539215088, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1646586656570435, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": true, "logits_per_token": -1.1646586656570435, "logits_per_char": -0.5823293328285217, "num_chars": 2}, {"sum_logits": -1.4777419567108154, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.4777419567108154, "logits_per_char": -0.7388709783554077, "num_chars": 2}, {"sum_logits": -1.4134269952774048, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.4134269952774048, "logits_per_char": -0.7067134976387024, "num_chars": 2}, {"sum_logits": -1.5531389713287354, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.5531389713287354, "logits_per_char": -0.7765694856643677, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 322, "native_id": "276", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.452111840248108, "incorrect_loss_raw": 1.3844681978225708, "correct_loss_per_char": 0.726055920124054, "incorrect_loss_per_char": 0.6922340989112854, "correct_loss_per_token": 1.452111840248108, "incorrect_loss_per_token": 1.3844681978225708, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1893030405044556, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.1893030405044556, "logits_per_char": -0.5946515202522278, "num_chars": 2}, {"sum_logits": -1.452111840248108, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.452111840248108, "logits_per_char": -0.726055920124054, "num_chars": 2}, {"sum_logits": -1.4347879886627197, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4347879886627197, "logits_per_char": -0.7173939943313599, "num_chars": 2}, {"sum_logits": -1.529313564300537, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.529313564300537, "logits_per_char": -0.7646567821502686, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 323, "native_id": "7-855", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.528815746307373, "incorrect_loss_raw": 1.37315837542216, "correct_loss_per_char": 0.7644078731536865, "incorrect_loss_per_char": 0.68657918771108, "correct_loss_per_token": 1.528815746307373, "incorrect_loss_per_token": 1.37315837542216, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1204341650009155, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.1204341650009155, "logits_per_char": -0.5602170825004578, "num_chars": 2}, {"sum_logits": -1.361348032951355, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.361348032951355, "logits_per_char": -0.6806740164756775, "num_chars": 2}, {"sum_logits": -1.528815746307373, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.528815746307373, "logits_per_char": -0.7644078731536865, "num_chars": 2}, {"sum_logits": -1.637692928314209, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.637692928314209, "logits_per_char": -0.8188464641571045, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 324, "native_id": "664", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5363751649856567, "incorrect_loss_raw": 1.3538144032160442, "correct_loss_per_char": 0.7681875824928284, "incorrect_loss_per_char": 0.6769072016080221, "correct_loss_per_token": 1.5363751649856567, "incorrect_loss_per_token": 1.3538144032160442, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2055652141571045, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": true, "logits_per_token": -1.2055652141571045, "logits_per_char": -0.6027826070785522, "num_chars": 2}, {"sum_logits": -1.418436050415039, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.418436050415039, "logits_per_char": -0.7092180252075195, "num_chars": 2}, {"sum_logits": -1.4374419450759888, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.4374419450759888, "logits_per_char": -0.7187209725379944, "num_chars": 2}, {"sum_logits": -1.5363751649856567, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.5363751649856567, "logits_per_char": -0.7681875824928284, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 325, "native_id": "9-883", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5331292152404785, "incorrect_loss_raw": 1.3697725931803386, "correct_loss_per_char": 0.7665646076202393, "incorrect_loss_per_char": 0.6848862965901693, "correct_loss_per_token": 1.5331292152404785, "incorrect_loss_per_token": 1.3697725931803386, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1336138248443604, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": true, "logits_per_token": -1.1336138248443604, "logits_per_char": -0.5668069124221802, "num_chars": 2}, {"sum_logits": -1.5049357414245605, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.5049357414245605, "logits_per_char": -0.7524678707122803, "num_chars": 2}, {"sum_logits": -1.5331292152404785, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.5331292152404785, "logits_per_char": -0.7665646076202393, "num_chars": 2}, {"sum_logits": -1.4707682132720947, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.4707682132720947, "logits_per_char": -0.7353841066360474, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 326, "native_id": "9-550", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0565078258514404, "incorrect_loss_raw": 1.5375044743220012, "correct_loss_per_char": 0.5282539129257202, "incorrect_loss_per_char": 0.7687522371610006, "correct_loss_per_token": 1.0565078258514404, "incorrect_loss_per_token": 1.5375044743220012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0565078258514404, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": true, "logits_per_token": -1.0565078258514404, "logits_per_char": -0.5282539129257202, "num_chars": 2}, {"sum_logits": -1.6474298238754272, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.6474298238754272, "logits_per_char": -0.8237149119377136, "num_chars": 2}, {"sum_logits": -1.4739775657653809, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.4739775657653809, "logits_per_char": -0.7369887828826904, "num_chars": 2}, {"sum_logits": -1.4911060333251953, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.4911060333251953, "logits_per_char": -0.7455530166625977, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 327, "native_id": "8-493", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6303519010543823, "incorrect_loss_raw": 1.332963267962138, "correct_loss_per_char": 0.8151759505271912, "incorrect_loss_per_char": 0.666481633981069, "correct_loss_per_token": 1.6303519010543823, "incorrect_loss_per_token": 1.332963267962138, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1472203731536865, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": true, "logits_per_token": -1.1472203731536865, "logits_per_char": -0.5736101865768433, "num_chars": 2}, {"sum_logits": -1.4173610210418701, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4173610210418701, "logits_per_char": -0.7086805105209351, "num_chars": 2}, {"sum_logits": -1.434308409690857, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.434308409690857, "logits_per_char": -0.7171542048454285, "num_chars": 2}, {"sum_logits": -1.6303519010543823, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.6303519010543823, "logits_per_char": -0.8151759505271912, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 328, "native_id": "9-257", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3990058898925781, "incorrect_loss_raw": 1.4092147747675579, "correct_loss_per_char": 0.6995029449462891, "incorrect_loss_per_char": 0.7046073873837789, "correct_loss_per_token": 1.3990058898925781, "incorrect_loss_per_token": 1.4092147747675579, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1544477939605713, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.1544477939605713, "logits_per_char": -0.5772238969802856, "num_chars": 2}, {"sum_logits": -1.4976236820220947, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4976236820220947, "logits_per_char": -0.7488118410110474, "num_chars": 2}, {"sum_logits": -1.3990058898925781, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.3990058898925781, "logits_per_char": -0.6995029449462891, "num_chars": 2}, {"sum_logits": -1.5755728483200073, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.5755728483200073, "logits_per_char": -0.7877864241600037, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 329, "native_id": "1239", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2701213359832764, "incorrect_loss_raw": 1.43805726369222, "correct_loss_per_char": 0.6350606679916382, "incorrect_loss_per_char": 0.71902863184611, "correct_loss_per_token": 1.2701213359832764, "incorrect_loss_per_token": 1.43805726369222, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4923075437545776, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.4923075437545776, "logits_per_char": -0.7461537718772888, "num_chars": 2}, {"sum_logits": -1.40057373046875, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.40057373046875, "logits_per_char": -0.700286865234375, "num_chars": 2}, {"sum_logits": -1.4212905168533325, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.4212905168533325, "logits_per_char": -0.7106452584266663, "num_chars": 2}, {"sum_logits": -1.2701213359832764, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": true, "logits_per_token": -1.2701213359832764, "logits_per_char": -0.6350606679916382, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 330, "native_id": "869", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2592909336090088, "incorrect_loss_raw": 1.4416344165802002, "correct_loss_per_char": 0.6296454668045044, "incorrect_loss_per_char": 0.7208172082901001, "correct_loss_per_token": 1.2592909336090088, "incorrect_loss_per_token": 1.4416344165802002, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2592909336090088, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.2592909336090088, "logits_per_char": -0.6296454668045044, "num_chars": 2}, {"sum_logits": -1.4525766372680664, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4525766372680664, "logits_per_char": -0.7262883186340332, "num_chars": 2}, {"sum_logits": -1.3591537475585938, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.3591537475585938, "logits_per_char": -0.6795768737792969, "num_chars": 2}, {"sum_logits": -1.5131728649139404, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.5131728649139404, "logits_per_char": -0.7565864324569702, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 331, "native_id": "7-1105", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.066666603088379, "incorrect_loss_raw": 1.5290428400039673, "correct_loss_per_char": 0.5333333015441895, "incorrect_loss_per_char": 0.7645214200019836, "correct_loss_per_token": 1.066666603088379, "incorrect_loss_per_token": 1.5290428400039673, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.066666603088379, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": true, "logits_per_token": -1.066666603088379, "logits_per_char": -0.5333333015441895, "num_chars": 2}, {"sum_logits": -1.5133498907089233, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.5133498907089233, "logits_per_char": -0.7566749453544617, "num_chars": 2}, {"sum_logits": -1.4776195287704468, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.4776195287704468, "logits_per_char": -0.7388097643852234, "num_chars": 2}, {"sum_logits": -1.5961591005325317, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.5961591005325317, "logits_per_char": -0.7980795502662659, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 332, "native_id": "597", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3692280054092407, "incorrect_loss_raw": 1.407003362973531, "correct_loss_per_char": 0.6846140027046204, "incorrect_loss_per_char": 0.7035016814867655, "correct_loss_per_token": 1.3692280054092407, "incorrect_loss_per_token": 1.407003362973531, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2399213314056396, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": true, "logits_per_token": -1.2399213314056396, "logits_per_char": -0.6199606657028198, "num_chars": 2}, {"sum_logits": -1.5217701196670532, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.5217701196670532, "logits_per_char": -0.7608850598335266, "num_chars": 2}, {"sum_logits": -1.4593186378479004, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.4593186378479004, "logits_per_char": -0.7296593189239502, "num_chars": 2}, {"sum_logits": -1.3692280054092407, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.3692280054092407, "logits_per_char": -0.6846140027046204, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 333, "native_id": "385", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4355106353759766, "incorrect_loss_raw": 1.38795002301534, "correct_loss_per_char": 0.7177553176879883, "incorrect_loss_per_char": 0.69397501150767, "correct_loss_per_token": 1.4355106353759766, "incorrect_loss_per_token": 1.38795002301534, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2065025568008423, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": true, "logits_per_token": -1.2065025568008423, "logits_per_char": -0.6032512784004211, "num_chars": 2}, {"sum_logits": -1.4576547145843506, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.4576547145843506, "logits_per_char": -0.7288273572921753, "num_chars": 2}, {"sum_logits": -1.4996927976608276, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.4996927976608276, "logits_per_char": -0.7498463988304138, "num_chars": 2}, {"sum_logits": -1.4355106353759766, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.4355106353759766, "logits_per_char": -0.7177553176879883, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 334, "native_id": "1301", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4629555940628052, "incorrect_loss_raw": 1.3977853457132976, "correct_loss_per_char": 0.7314777970314026, "incorrect_loss_per_char": 0.6988926728566488, "correct_loss_per_token": 1.4629555940628052, "incorrect_loss_per_token": 1.3977853457132976, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0986801385879517, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.0986801385879517, "logits_per_char": -0.5493400692939758, "num_chars": 2}, {"sum_logits": -1.4629555940628052, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4629555940628052, "logits_per_char": -0.7314777970314026, "num_chars": 2}, {"sum_logits": -1.450650691986084, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.450650691986084, "logits_per_char": -0.725325345993042, "num_chars": 2}, {"sum_logits": -1.644025206565857, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.644025206565857, "logits_per_char": -0.8220126032829285, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 335, "native_id": "9-893", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3318485021591187, "incorrect_loss_raw": 1.419859806696574, "correct_loss_per_char": 0.6659242510795593, "incorrect_loss_per_char": 0.709929903348287, "correct_loss_per_token": 1.3318485021591187, "incorrect_loss_per_token": 1.419859806696574, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2905986309051514, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.2905986309051514, "logits_per_char": -0.6452993154525757, "num_chars": 2}, {"sum_logits": -1.5148423910140991, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.5148423910140991, "logits_per_char": -0.7574211955070496, "num_chars": 2}, {"sum_logits": -1.4541383981704712, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4541383981704712, "logits_per_char": -0.7270691990852356, "num_chars": 2}, {"sum_logits": -1.3318485021591187, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.3318485021591187, "logits_per_char": -0.6659242510795593, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 336, "native_id": "9-369", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5031747817993164, "incorrect_loss_raw": 1.3693594137827556, "correct_loss_per_char": 0.7515873908996582, "incorrect_loss_per_char": 0.6846797068913778, "correct_loss_per_token": 1.5031747817993164, "incorrect_loss_per_token": 1.3693594137827556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2287160158157349, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": true, "logits_per_token": -1.2287160158157349, "logits_per_char": -0.6143580079078674, "num_chars": 2}, {"sum_logits": -1.5031747817993164, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.5031747817993164, "logits_per_char": -0.7515873908996582, "num_chars": 2}, {"sum_logits": -1.3508723974227905, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.3508723974227905, "logits_per_char": -0.6754361987113953, "num_chars": 2}, {"sum_logits": -1.5284898281097412, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.5284898281097412, "logits_per_char": -0.7642449140548706, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 337, "native_id": "9-1026", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3533331155776978, "incorrect_loss_raw": 1.4090704917907715, "correct_loss_per_char": 0.6766665577888489, "incorrect_loss_per_char": 0.7045352458953857, "correct_loss_per_token": 1.3533331155776978, "incorrect_loss_per_token": 1.4090704917907715, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3533331155776978, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.3533331155776978, "logits_per_char": -0.6766665577888489, "num_chars": 2}, {"sum_logits": -1.2992310523986816, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.2992310523986816, "logits_per_char": -0.6496155261993408, "num_chars": 2}, {"sum_logits": -1.4970359802246094, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4970359802246094, "logits_per_char": -0.7485179901123047, "num_chars": 2}, {"sum_logits": -1.4309444427490234, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4309444427490234, "logits_per_char": -0.7154722213745117, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 338, "native_id": "7-424", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3960967063903809, "incorrect_loss_raw": 1.4039799372355144, "correct_loss_per_char": 0.6980483531951904, "incorrect_loss_per_char": 0.7019899686177572, "correct_loss_per_token": 1.3960967063903809, "incorrect_loss_per_token": 1.4039799372355144, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1826372146606445, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": true, "logits_per_token": -1.1826372146606445, "logits_per_char": -0.5913186073303223, "num_chars": 2}, {"sum_logits": -1.3960967063903809, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.3960967063903809, "logits_per_char": -0.6980483531951904, "num_chars": 2}, {"sum_logits": -1.4759913682937622, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.4759913682937622, "logits_per_char": -0.7379956841468811, "num_chars": 2}, {"sum_logits": -1.5533112287521362, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.5533112287521362, "logits_per_char": -0.7766556143760681, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 339, "native_id": "9-259", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2508292198181152, "incorrect_loss_raw": 1.4666165113449097, "correct_loss_per_char": 0.6254146099090576, "incorrect_loss_per_char": 0.7333082556724548, "correct_loss_per_token": 1.2508292198181152, "incorrect_loss_per_token": 1.4666165113449097, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2508292198181152, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.2508292198181152, "logits_per_char": -0.6254146099090576, "num_chars": 2}, {"sum_logits": -1.6941773891448975, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.6941773891448975, "logits_per_char": -0.8470886945724487, "num_chars": 2}, {"sum_logits": -1.2458667755126953, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": true, "logits_per_token": -1.2458667755126953, "logits_per_char": -0.6229333877563477, "num_chars": 2}, {"sum_logits": -1.4598053693771362, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.4598053693771362, "logits_per_char": -0.7299026846885681, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 340, "native_id": "9-783", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4737669229507446, "incorrect_loss_raw": 1.3808935483296711, "correct_loss_per_char": 0.7368834614753723, "incorrect_loss_per_char": 0.6904467741648356, "correct_loss_per_token": 1.4737669229507446, "incorrect_loss_per_token": 1.3808935483296711, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.166865348815918, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": true, "logits_per_token": -1.166865348815918, "logits_per_char": -0.583432674407959, "num_chars": 2}, {"sum_logits": -1.3758747577667236, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.3758747577667236, "logits_per_char": -0.6879373788833618, "num_chars": 2}, {"sum_logits": -1.4737669229507446, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.4737669229507446, "logits_per_char": -0.7368834614753723, "num_chars": 2}, {"sum_logits": -1.599940538406372, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.599940538406372, "logits_per_char": -0.799970269203186, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 341, "native_id": "1088", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4057601690292358, "incorrect_loss_raw": 1.4059737126032512, "correct_loss_per_char": 0.7028800845146179, "incorrect_loss_per_char": 0.7029868563016256, "correct_loss_per_token": 1.4057601690292358, "incorrect_loss_per_token": 1.4059737126032512, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1784831285476685, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": true, "logits_per_token": -1.1784831285476685, "logits_per_char": -0.5892415642738342, "num_chars": 2}, {"sum_logits": -1.4057601690292358, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4057601690292358, "logits_per_char": -0.7028800845146179, "num_chars": 2}, {"sum_logits": -1.4155595302581787, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4155595302581787, "logits_per_char": -0.7077797651290894, "num_chars": 2}, {"sum_logits": -1.6238784790039062, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.6238784790039062, "logits_per_char": -0.8119392395019531, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 342, "native_id": "1387", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4655330181121826, "incorrect_loss_raw": 1.3773078521092732, "correct_loss_per_char": 0.7327665090560913, "incorrect_loss_per_char": 0.6886539260546366, "correct_loss_per_token": 1.4655330181121826, "incorrect_loss_per_token": 1.3773078521092732, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1914398670196533, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.1914398670196533, "logits_per_char": -0.5957199335098267, "num_chars": 2}, {"sum_logits": -1.5035746097564697, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5035746097564697, "logits_per_char": -0.7517873048782349, "num_chars": 2}, {"sum_logits": -1.4655330181121826, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4655330181121826, "logits_per_char": -0.7327665090560913, "num_chars": 2}, {"sum_logits": -1.4369090795516968, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4369090795516968, "logits_per_char": -0.7184545397758484, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 343, "native_id": "7-1062", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1861717700958252, "incorrect_loss_raw": 1.4705173969268799, "correct_loss_per_char": 0.5930858850479126, "incorrect_loss_per_char": 0.7352586984634399, "correct_loss_per_token": 1.1861717700958252, "incorrect_loss_per_token": 1.4705173969268799, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1861717700958252, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": true, "logits_per_token": -1.1861717700958252, "logits_per_char": -0.5930858850479126, "num_chars": 2}, {"sum_logits": -1.4952220916748047, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.4952220916748047, "logits_per_char": -0.7476110458374023, "num_chars": 2}, {"sum_logits": -1.4275203943252563, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.4275203943252563, "logits_per_char": -0.7137601971626282, "num_chars": 2}, {"sum_logits": -1.4888097047805786, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.4888097047805786, "logits_per_char": -0.7444048523902893, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 344, "native_id": "676", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.736049771308899, "incorrect_loss_raw": 1.3194042046864827, "correct_loss_per_char": 0.8680248856544495, "incorrect_loss_per_char": 0.6597021023432413, "correct_loss_per_token": 1.736049771308899, "incorrect_loss_per_token": 1.3194042046864827, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0547051429748535, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.0547051429748535, "logits_per_char": -0.5273525714874268, "num_chars": 2}, {"sum_logits": -1.4105496406555176, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4105496406555176, "logits_per_char": -0.7052748203277588, "num_chars": 2}, {"sum_logits": -1.4929578304290771, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4929578304290771, "logits_per_char": -0.7464789152145386, "num_chars": 2}, {"sum_logits": -1.736049771308899, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.736049771308899, "logits_per_char": -0.8680248856544495, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 345, "native_id": "1998", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3709864616394043, "incorrect_loss_raw": 1.4317877689997356, "correct_loss_per_char": 0.6854932308197021, "incorrect_loss_per_char": 0.7158938844998678, "correct_loss_per_token": 1.3709864616394043, "incorrect_loss_per_token": 1.4317877689997356, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0947208404541016, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.0947208404541016, "logits_per_char": -0.5473604202270508, "num_chars": 2}, {"sum_logits": -1.3709864616394043, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.3709864616394043, "logits_per_char": -0.6854932308197021, "num_chars": 2}, {"sum_logits": -1.4959412813186646, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4959412813186646, "logits_per_char": -0.7479706406593323, "num_chars": 2}, {"sum_logits": -1.7047011852264404, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.7047011852264404, "logits_per_char": -0.8523505926132202, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 346, "native_id": "1698", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3332631587982178, "incorrect_loss_raw": 1.414311408996582, "correct_loss_per_char": 0.6666315793991089, "incorrect_loss_per_char": 0.707155704498291, "correct_loss_per_token": 1.3332631587982178, "incorrect_loss_per_token": 1.414311408996582, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3332631587982178, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": true, "logits_per_token": -1.3332631587982178, "logits_per_char": -0.6666315793991089, "num_chars": 2}, {"sum_logits": -1.3491984605789185, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.3491984605789185, "logits_per_char": -0.6745992302894592, "num_chars": 2}, {"sum_logits": -1.4647350311279297, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.4647350311279297, "logits_per_char": -0.7323675155639648, "num_chars": 2}, {"sum_logits": -1.429000735282898, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.429000735282898, "logits_per_char": -0.714500367641449, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 347, "native_id": "490", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1535975933074951, "incorrect_loss_raw": 1.4889217615127563, "correct_loss_per_char": 0.5767987966537476, "incorrect_loss_per_char": 0.7444608807563782, "correct_loss_per_token": 1.1535975933074951, "incorrect_loss_per_token": 1.4889217615127563, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1535975933074951, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": true, "logits_per_token": -1.1535975933074951, "logits_per_char": -0.5767987966537476, "num_chars": 2}, {"sum_logits": -1.4260668754577637, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.4260668754577637, "logits_per_char": -0.7130334377288818, "num_chars": 2}, {"sum_logits": -1.4687007665634155, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.4687007665634155, "logits_per_char": -0.7343503832817078, "num_chars": 2}, {"sum_logits": -1.5719976425170898, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.5719976425170898, "logits_per_char": -0.7859988212585449, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 348, "native_id": "844", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1070220470428467, "incorrect_loss_raw": 1.5142614444096882, "correct_loss_per_char": 0.5535110235214233, "incorrect_loss_per_char": 0.7571307222048441, "correct_loss_per_token": 1.1070220470428467, "incorrect_loss_per_token": 1.5142614444096882, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1070220470428467, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": true, "logits_per_token": -1.1070220470428467, "logits_per_char": -0.5535110235214233, "num_chars": 2}, {"sum_logits": -1.4983159303665161, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.4983159303665161, "logits_per_char": -0.7491579651832581, "num_chars": 2}, {"sum_logits": -1.4448342323303223, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.4448342323303223, "logits_per_char": -0.7224171161651611, "num_chars": 2}, {"sum_logits": -1.5996341705322266, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.5996341705322266, "logits_per_char": -0.7998170852661133, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 349, "native_id": "1795", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4132239818572998, "incorrect_loss_raw": 1.3885815540949504, "correct_loss_per_char": 0.7066119909286499, "incorrect_loss_per_char": 0.6942907770474752, "correct_loss_per_token": 1.4132239818572998, "incorrect_loss_per_token": 1.3885815540949504, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3048477172851562, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.3048477172851562, "logits_per_char": -0.6524238586425781, "num_chars": 2}, {"sum_logits": -1.4518218040466309, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4518218040466309, "logits_per_char": -0.7259109020233154, "num_chars": 2}, {"sum_logits": -1.4132239818572998, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4132239818572998, "logits_per_char": -0.7066119909286499, "num_chars": 2}, {"sum_logits": -1.409075140953064, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.409075140953064, "logits_per_char": -0.704537570476532, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 350, "native_id": "1508", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4750559329986572, "incorrect_loss_raw": 1.377504587173462, "correct_loss_per_char": 0.7375279664993286, "incorrect_loss_per_char": 0.688752293586731, "correct_loss_per_token": 1.4750559329986572, "incorrect_loss_per_token": 1.377504587173462, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5721596479415894, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.5721596479415894, "logits_per_char": -0.7860798239707947, "num_chars": 2}, {"sum_logits": -1.4750559329986572, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4750559329986572, "logits_per_char": -0.7375279664993286, "num_chars": 2}, {"sum_logits": -1.238619327545166, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.238619327545166, "logits_per_char": -0.619309663772583, "num_chars": 2}, {"sum_logits": -1.3217347860336304, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.3217347860336304, "logits_per_char": -0.6608673930168152, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 351, "native_id": "9-289", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.620807409286499, "incorrect_loss_raw": 1.3275826374689739, "correct_loss_per_char": 0.8104037046432495, "incorrect_loss_per_char": 0.6637913187344869, "correct_loss_per_token": 1.620807409286499, "incorrect_loss_per_token": 1.3275826374689739, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3003731966018677, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": true, "logits_per_token": -1.3003731966018677, "logits_per_char": -0.6501865983009338, "num_chars": 2}, {"sum_logits": -1.3571912050247192, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.3571912050247192, "logits_per_char": -0.6785956025123596, "num_chars": 2}, {"sum_logits": -1.3251835107803345, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.3251835107803345, "logits_per_char": -0.6625917553901672, "num_chars": 2}, {"sum_logits": -1.620807409286499, "num_tokens": 1, "num_tokens_all": 246, "is_greedy": false, "logits_per_token": -1.620807409286499, "logits_per_char": -0.8104037046432495, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 352, "native_id": "9-668", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4354441165924072, "incorrect_loss_raw": 1.3886290788650513, "correct_loss_per_char": 0.7177220582962036, "incorrect_loss_per_char": 0.6943145394325256, "correct_loss_per_token": 1.4354441165924072, "incorrect_loss_per_token": 1.3886290788650513, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2268109321594238, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.2268109321594238, "logits_per_char": -0.6134054660797119, "num_chars": 2}, {"sum_logits": -1.3915815353393555, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.3915815353393555, "logits_per_char": -0.6957907676696777, "num_chars": 2}, {"sum_logits": -1.4354441165924072, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4354441165924072, "logits_per_char": -0.7177220582962036, "num_chars": 2}, {"sum_logits": -1.5474947690963745, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.5474947690963745, "logits_per_char": -0.7737473845481873, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 353, "native_id": "7-364", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.30674409866333, "incorrect_loss_raw": 1.4234700600306194, "correct_loss_per_char": 0.653372049331665, "incorrect_loss_per_char": 0.7117350300153097, "correct_loss_per_token": 1.30674409866333, "incorrect_loss_per_token": 1.4234700600306194, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.30674409866333, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.30674409866333, "logits_per_char": -0.653372049331665, "num_chars": 2}, {"sum_logits": -1.4811471700668335, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4811471700668335, "logits_per_char": -0.7405735850334167, "num_chars": 2}, {"sum_logits": -1.3811781406402588, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.3811781406402588, "logits_per_char": -0.6905890703201294, "num_chars": 2}, {"sum_logits": -1.4080848693847656, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4080848693847656, "logits_per_char": -0.7040424346923828, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 354, "native_id": "1271", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.293982982635498, "incorrect_loss_raw": 1.4412600994110107, "correct_loss_per_char": 0.646991491317749, "incorrect_loss_per_char": 0.7206300497055054, "correct_loss_per_token": 1.293982982635498, "incorrect_loss_per_token": 1.4412600994110107, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2847521305084229, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": true, "logits_per_token": -1.2847521305084229, "logits_per_char": -0.6423760652542114, "num_chars": 2}, {"sum_logits": -1.293982982635498, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.293982982635498, "logits_per_char": -0.646991491317749, "num_chars": 2}, {"sum_logits": -1.38625967502594, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.38625967502594, "logits_per_char": -0.69312983751297, "num_chars": 2}, {"sum_logits": -1.6527684926986694, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.6527684926986694, "logits_per_char": -0.8263842463493347, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 355, "native_id": "9-1117", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2894606590270996, "incorrect_loss_raw": 1.4367162386576335, "correct_loss_per_char": 0.6447303295135498, "incorrect_loss_per_char": 0.7183581193288168, "correct_loss_per_token": 1.2894606590270996, "incorrect_loss_per_token": 1.4367162386576335, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2894606590270996, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": true, "logits_per_token": -1.2894606590270996, "logits_per_char": -0.6447303295135498, "num_chars": 2}, {"sum_logits": -1.3490149974822998, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.3490149974822998, "logits_per_char": -0.6745074987411499, "num_chars": 2}, {"sum_logits": -1.4254729747772217, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4254729747772217, "logits_per_char": -0.7127364873886108, "num_chars": 2}, {"sum_logits": -1.535660743713379, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.535660743713379, "logits_per_char": -0.7678303718566895, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 356, "native_id": "35", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3151335716247559, "incorrect_loss_raw": 1.4338057438532512, "correct_loss_per_char": 0.6575667858123779, "incorrect_loss_per_char": 0.7169028719266256, "correct_loss_per_token": 1.3151335716247559, "incorrect_loss_per_token": 1.4338057438532512, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3151335716247559, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": true, "logits_per_token": -1.3151335716247559, "logits_per_char": -0.6575667858123779, "num_chars": 2}, {"sum_logits": -1.3206923007965088, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.3206923007965088, "logits_per_char": -0.6603461503982544, "num_chars": 2}, {"sum_logits": -1.3439892530441284, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.3439892530441284, "logits_per_char": -0.6719946265220642, "num_chars": 2}, {"sum_logits": -1.6367356777191162, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.6367356777191162, "logits_per_char": -0.8183678388595581, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 357, "native_id": "1660", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3774131536483765, "incorrect_loss_raw": 1.3969385623931885, "correct_loss_per_char": 0.6887065768241882, "incorrect_loss_per_char": 0.6984692811965942, "correct_loss_per_token": 1.3774131536483765, "incorrect_loss_per_token": 1.3969385623931885, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3586751222610474, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.3586751222610474, "logits_per_char": -0.6793375611305237, "num_chars": 2}, {"sum_logits": -1.3774131536483765, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.3774131536483765, "logits_per_char": -0.6887065768241882, "num_chars": 2}, {"sum_logits": -1.415084719657898, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.415084719657898, "logits_per_char": -0.707542359828949, "num_chars": 2}, {"sum_logits": -1.4170558452606201, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.4170558452606201, "logits_per_char": -0.7085279226303101, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 358, "native_id": "7-710", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.512421727180481, "incorrect_loss_raw": 1.359342376391093, "correct_loss_per_char": 0.7562108635902405, "incorrect_loss_per_char": 0.6796711881955465, "correct_loss_per_token": 1.512421727180481, "incorrect_loss_per_token": 1.359342376391093, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3687124252319336, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.3687124252319336, "logits_per_char": -0.6843562126159668, "num_chars": 2}, {"sum_logits": -1.512421727180481, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.512421727180481, "logits_per_char": -0.7562108635902405, "num_chars": 2}, {"sum_logits": -1.3913383483886719, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.3913383483886719, "logits_per_char": -0.6956691741943359, "num_chars": 2}, {"sum_logits": -1.3179763555526733, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": true, "logits_per_token": -1.3179763555526733, "logits_per_char": -0.6589881777763367, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 359, "native_id": "8-52", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.367510199546814, "incorrect_loss_raw": 1.4004307587941487, "correct_loss_per_char": 0.683755099773407, "incorrect_loss_per_char": 0.7002153793970743, "correct_loss_per_token": 1.367510199546814, "incorrect_loss_per_token": 1.4004307587941487, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3893916606903076, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.3893916606903076, "logits_per_char": -0.6946958303451538, "num_chars": 2}, {"sum_logits": -1.367510199546814, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.367510199546814, "logits_per_char": -0.683755099773407, "num_chars": 2}, {"sum_logits": -1.4112411737442017, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4112411737442017, "logits_per_char": -0.7056205868721008, "num_chars": 2}, {"sum_logits": -1.400659441947937, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.400659441947937, "logits_per_char": -0.7003297209739685, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 360, "native_id": "9-1167", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6108604669570923, "incorrect_loss_raw": 1.3397181431452434, "correct_loss_per_char": 0.8054302334785461, "incorrect_loss_per_char": 0.6698590715726217, "correct_loss_per_token": 1.6108604669570923, "incorrect_loss_per_token": 1.3397181431452434, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1762694120407104, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": true, "logits_per_token": -1.1762694120407104, "logits_per_char": -0.5881347060203552, "num_chars": 2}, {"sum_logits": -1.4905158281326294, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.4905158281326294, "logits_per_char": -0.7452579140663147, "num_chars": 2}, {"sum_logits": -1.3523691892623901, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.3523691892623901, "logits_per_char": -0.6761845946311951, "num_chars": 2}, {"sum_logits": -1.6108604669570923, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.6108604669570923, "logits_per_char": -0.8054302334785461, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 361, "native_id": "8-43", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5153732299804688, "incorrect_loss_raw": 1.3708651860555012, "correct_loss_per_char": 0.7576866149902344, "incorrect_loss_per_char": 0.6854325930277506, "correct_loss_per_token": 1.5153732299804688, "incorrect_loss_per_token": 1.3708651860555012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1252498626708984, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": true, "logits_per_token": -1.1252498626708984, "logits_per_char": -0.5626249313354492, "num_chars": 2}, {"sum_logits": -1.4875380992889404, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.4875380992889404, "logits_per_char": -0.7437690496444702, "num_chars": 2}, {"sum_logits": -1.499807596206665, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.499807596206665, "logits_per_char": -0.7499037981033325, "num_chars": 2}, {"sum_logits": -1.5153732299804688, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.5153732299804688, "logits_per_char": -0.7576866149902344, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 362, "native_id": "9-57", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4514046907424927, "incorrect_loss_raw": 1.378923495610555, "correct_loss_per_char": 0.7257023453712463, "incorrect_loss_per_char": 0.6894617478052775, "correct_loss_per_token": 1.4514046907424927, "incorrect_loss_per_token": 1.378923495610555, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.266031265258789, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": true, "logits_per_token": -1.266031265258789, "logits_per_char": -0.6330156326293945, "num_chars": 2}, {"sum_logits": -1.4514046907424927, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.4514046907424927, "logits_per_char": -0.7257023453712463, "num_chars": 2}, {"sum_logits": -1.4935160875320435, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.4935160875320435, "logits_per_char": -0.7467580437660217, "num_chars": 2}, {"sum_logits": -1.3772231340408325, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.3772231340408325, "logits_per_char": -0.6886115670204163, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 363, "native_id": "1411", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.165894865989685, "incorrect_loss_raw": 1.4875972270965576, "correct_loss_per_char": 0.5829474329948425, "incorrect_loss_per_char": 0.7437986135482788, "correct_loss_per_token": 1.165894865989685, "incorrect_loss_per_token": 1.4875972270965576, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.165894865989685, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": true, "logits_per_token": -1.165894865989685, "logits_per_char": -0.5829474329948425, "num_chars": 2}, {"sum_logits": -1.416541337966919, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.416541337966919, "logits_per_char": -0.7082706689834595, "num_chars": 2}, {"sum_logits": -1.38412606716156, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.38412606716156, "logits_per_char": -0.69206303358078, "num_chars": 2}, {"sum_logits": -1.6621242761611938, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.6621242761611938, "logits_per_char": -0.8310621380805969, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 364, "native_id": "9-206", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3800078630447388, "incorrect_loss_raw": 1.4043079614639282, "correct_loss_per_char": 0.6900039315223694, "incorrect_loss_per_char": 0.7021539807319641, "correct_loss_per_token": 1.3800078630447388, "incorrect_loss_per_token": 1.4043079614639282, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2694892883300781, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": true, "logits_per_token": -1.2694892883300781, "logits_per_char": -0.6347446441650391, "num_chars": 2}, {"sum_logits": -1.4308159351348877, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.4308159351348877, "logits_per_char": -0.7154079675674438, "num_chars": 2}, {"sum_logits": -1.3800078630447388, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.3800078630447388, "logits_per_char": -0.6900039315223694, "num_chars": 2}, {"sum_logits": -1.5126186609268188, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.5126186609268188, "logits_per_char": -0.7563093304634094, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 365, "native_id": "7-740", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4836570024490356, "incorrect_loss_raw": 1.3706647555033367, "correct_loss_per_char": 0.7418285012245178, "incorrect_loss_per_char": 0.6853323777516683, "correct_loss_per_token": 1.4836570024490356, "incorrect_loss_per_token": 1.3706647555033367, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2111523151397705, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": true, "logits_per_token": -1.2111523151397705, "logits_per_char": -0.6055761575698853, "num_chars": 2}, {"sum_logits": -1.4677785634994507, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.4677785634994507, "logits_per_char": -0.7338892817497253, "num_chars": 2}, {"sum_logits": -1.4330633878707886, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.4330633878707886, "logits_per_char": -0.7165316939353943, "num_chars": 2}, {"sum_logits": -1.4836570024490356, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.4836570024490356, "logits_per_char": -0.7418285012245178, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 366, "native_id": "1774", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.432865858078003, "incorrect_loss_raw": 1.3865341345469158, "correct_loss_per_char": 0.7164329290390015, "incorrect_loss_per_char": 0.6932670672734579, "correct_loss_per_token": 1.432865858078003, "incorrect_loss_per_token": 1.3865341345469158, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2026904821395874, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": true, "logits_per_token": -1.2026904821395874, "logits_per_char": -0.6013452410697937, "num_chars": 2}, {"sum_logits": -1.432865858078003, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.432865858078003, "logits_per_char": -0.7164329290390015, "num_chars": 2}, {"sum_logits": -1.4578431844711304, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4578431844711304, "logits_per_char": -0.7289215922355652, "num_chars": 2}, {"sum_logits": -1.4990687370300293, "num_tokens": 1, "num_tokens_all": 286, "is_greedy": false, "logits_per_token": -1.4990687370300293, "logits_per_char": -0.7495343685150146, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 367, "native_id": "7-93", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5147591829299927, "incorrect_loss_raw": 1.3770571947097778, "correct_loss_per_char": 0.7573795914649963, "incorrect_loss_per_char": 0.6885285973548889, "correct_loss_per_token": 1.5147591829299927, "incorrect_loss_per_token": 1.3770571947097778, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.100767731666565, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": true, "logits_per_token": -1.100767731666565, "logits_per_char": -0.5503838658332825, "num_chars": 2}, {"sum_logits": -1.4541325569152832, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.4541325569152832, "logits_per_char": -0.7270662784576416, "num_chars": 2}, {"sum_logits": -1.5147591829299927, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.5147591829299927, "logits_per_char": -0.7573795914649963, "num_chars": 2}, {"sum_logits": -1.5762712955474854, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.5762712955474854, "logits_per_char": -0.7881356477737427, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 368, "native_id": "8-97", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4639286994934082, "incorrect_loss_raw": 1.3706812063852947, "correct_loss_per_char": 0.7319643497467041, "incorrect_loss_per_char": 0.6853406031926473, "correct_loss_per_token": 1.4639286994934082, "incorrect_loss_per_token": 1.3706812063852947, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.389924168586731, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.389924168586731, "logits_per_char": -0.6949620842933655, "num_chars": 2}, {"sum_logits": -1.4639286994934082, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4639286994934082, "logits_per_char": -0.7319643497467041, "num_chars": 2}, {"sum_logits": -1.395126461982727, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.395126461982727, "logits_per_char": -0.6975632309913635, "num_chars": 2}, {"sum_logits": -1.3269929885864258, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.3269929885864258, "logits_per_char": -0.6634964942932129, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 369, "native_id": "9-813", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5269008874893188, "incorrect_loss_raw": 1.35878590742747, "correct_loss_per_char": 0.7634504437446594, "incorrect_loss_per_char": 0.679392953713735, "correct_loss_per_token": 1.5269008874893188, "incorrect_loss_per_token": 1.35878590742747, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2045365571975708, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": true, "logits_per_token": -1.2045365571975708, "logits_per_char": -0.6022682785987854, "num_chars": 2}, {"sum_logits": -1.5269008874893188, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.5269008874893188, "logits_per_char": -0.7634504437446594, "num_chars": 2}, {"sum_logits": -1.4526923894882202, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.4526923894882202, "logits_per_char": -0.7263461947441101, "num_chars": 2}, {"sum_logits": -1.4191287755966187, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.4191287755966187, "logits_per_char": -0.7095643877983093, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 370, "native_id": "9-686", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.496337652206421, "incorrect_loss_raw": 1.3688405354817708, "correct_loss_per_char": 0.7481688261032104, "incorrect_loss_per_char": 0.6844202677408854, "correct_loss_per_token": 1.496337652206421, "incorrect_loss_per_token": 1.3688405354817708, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1992924213409424, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": true, "logits_per_token": -1.1992924213409424, "logits_per_char": -0.5996462106704712, "num_chars": 2}, {"sum_logits": -1.496337652206421, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.496337652206421, "logits_per_char": -0.7481688261032104, "num_chars": 2}, {"sum_logits": -1.3849451541900635, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.3849451541900635, "logits_per_char": -0.6924725770950317, "num_chars": 2}, {"sum_logits": -1.5222840309143066, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.5222840309143066, "logits_per_char": -0.7611420154571533, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 371, "native_id": "9-799", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4307653903961182, "incorrect_loss_raw": 1.3856453498204548, "correct_loss_per_char": 0.7153826951980591, "incorrect_loss_per_char": 0.6928226749102274, "correct_loss_per_token": 1.4307653903961182, "incorrect_loss_per_token": 1.3856453498204548, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2264295816421509, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.2264295816421509, "logits_per_char": -0.6132147908210754, "num_chars": 2}, {"sum_logits": -1.4172981977462769, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4172981977462769, "logits_per_char": -0.7086490988731384, "num_chars": 2}, {"sum_logits": -1.4307653903961182, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4307653903961182, "logits_per_char": -0.7153826951980591, "num_chars": 2}, {"sum_logits": -1.513208270072937, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.513208270072937, "logits_per_char": -0.7566041350364685, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 372, "native_id": "1179", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5366061925888062, "incorrect_loss_raw": 1.3506266276041667, "correct_loss_per_char": 0.7683030962944031, "incorrect_loss_per_char": 0.6753133138020834, "correct_loss_per_token": 1.5366061925888062, "incorrect_loss_per_token": 1.3506266276041667, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.25077486038208, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": true, "logits_per_token": -1.25077486038208, "logits_per_char": -0.62538743019104, "num_chars": 2}, {"sum_logits": -1.5366061925888062, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.5366061925888062, "logits_per_char": -0.7683030962944031, "num_chars": 2}, {"sum_logits": -1.3580732345581055, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.3580732345581055, "logits_per_char": -0.6790366172790527, "num_chars": 2}, {"sum_logits": -1.4430317878723145, "num_tokens": 1, "num_tokens_all": 275, "is_greedy": false, "logits_per_token": -1.4430317878723145, "logits_per_char": -0.7215158939361572, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 373, "native_id": "1954", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1693460941314697, "incorrect_loss_raw": 1.4814964930216472, "correct_loss_per_char": 0.5846730470657349, "incorrect_loss_per_char": 0.7407482465108236, "correct_loss_per_token": 1.1693460941314697, "incorrect_loss_per_token": 1.4814964930216472, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1693460941314697, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": true, "logits_per_token": -1.1693460941314697, "logits_per_char": -0.5846730470657349, "num_chars": 2}, {"sum_logits": -1.579240083694458, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.579240083694458, "logits_per_char": -0.789620041847229, "num_chars": 2}, {"sum_logits": -1.4783456325531006, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.4783456325531006, "logits_per_char": -0.7391728162765503, "num_chars": 2}, {"sum_logits": -1.3869037628173828, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.3869037628173828, "logits_per_char": -0.6934518814086914, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 374, "native_id": "8-403", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.439225673675537, "incorrect_loss_raw": 1.3866290251413982, "correct_loss_per_char": 0.7196128368377686, "incorrect_loss_per_char": 0.6933145125706991, "correct_loss_per_token": 1.439225673675537, "incorrect_loss_per_token": 1.3866290251413982, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.288841724395752, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": true, "logits_per_token": -1.288841724395752, "logits_per_char": -0.644420862197876, "num_chars": 2}, {"sum_logits": -1.53578519821167, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.53578519821167, "logits_per_char": -0.767892599105835, "num_chars": 2}, {"sum_logits": -1.439225673675537, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.439225673675537, "logits_per_char": -0.7196128368377686, "num_chars": 2}, {"sum_logits": -1.3352601528167725, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.3352601528167725, "logits_per_char": -0.6676300764083862, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 375, "native_id": "9-576", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2693347930908203, "incorrect_loss_raw": 1.4364020427068074, "correct_loss_per_char": 0.6346673965454102, "incorrect_loss_per_char": 0.7182010213534037, "correct_loss_per_token": 1.2693347930908203, "incorrect_loss_per_token": 1.4364020427068074, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2693347930908203, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": true, "logits_per_token": -1.2693347930908203, "logits_per_char": -0.6346673965454102, "num_chars": 2}, {"sum_logits": -1.3844448328018188, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.3844448328018188, "logits_per_char": -0.6922224164009094, "num_chars": 2}, {"sum_logits": -1.4578207731246948, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.4578207731246948, "logits_per_char": -0.7289103865623474, "num_chars": 2}, {"sum_logits": -1.4669405221939087, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.4669405221939087, "logits_per_char": -0.7334702610969543, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 376, "native_id": "9-866", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4526917934417725, "incorrect_loss_raw": 1.4013232787450154, "correct_loss_per_char": 0.7263458967208862, "incorrect_loss_per_char": 0.7006616393725077, "correct_loss_per_token": 1.4526917934417725, "incorrect_loss_per_token": 1.4013232787450154, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0919660329818726, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": true, "logits_per_token": -1.0919660329818726, "logits_per_char": -0.5459830164909363, "num_chars": 2}, {"sum_logits": -1.4526917934417725, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4526917934417725, "logits_per_char": -0.7263458967208862, "num_chars": 2}, {"sum_logits": -1.4992009401321411, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.4992009401321411, "logits_per_char": -0.7496004700660706, "num_chars": 2}, {"sum_logits": -1.6128028631210327, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.6128028631210327, "logits_per_char": -0.8064014315605164, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 377, "native_id": "7-208", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4976500272750854, "incorrect_loss_raw": 1.3656286398569744, "correct_loss_per_char": 0.7488250136375427, "incorrect_loss_per_char": 0.6828143199284872, "correct_loss_per_token": 1.4976500272750854, "incorrect_loss_per_token": 1.3656286398569744, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2141485214233398, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": true, "logits_per_token": -1.2141485214233398, "logits_per_char": -0.6070742607116699, "num_chars": 2}, {"sum_logits": -1.4622600078582764, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4622600078582764, "logits_per_char": -0.7311300039291382, "num_chars": 2}, {"sum_logits": -1.4976500272750854, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4976500272750854, "logits_per_char": -0.7488250136375427, "num_chars": 2}, {"sum_logits": -1.4204773902893066, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4204773902893066, "logits_per_char": -0.7102386951446533, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 378, "native_id": "9-771", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4456981420516968, "incorrect_loss_raw": 1.3943424622217815, "correct_loss_per_char": 0.7228490710258484, "incorrect_loss_per_char": 0.6971712311108907, "correct_loss_per_token": 1.4456981420516968, "incorrect_loss_per_token": 1.3943424622217815, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.237066388130188, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": true, "logits_per_token": -1.237066388130188, "logits_per_char": -0.618533194065094, "num_chars": 2}, {"sum_logits": -1.366459608078003, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.366459608078003, "logits_per_char": -0.6832298040390015, "num_chars": 2}, {"sum_logits": -1.5795013904571533, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.5795013904571533, "logits_per_char": -0.7897506952285767, "num_chars": 2}, {"sum_logits": -1.4456981420516968, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4456981420516968, "logits_per_char": -0.7228490710258484, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 379, "native_id": "998", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.428804636001587, "incorrect_loss_raw": 1.3829928239186604, "correct_loss_per_char": 0.7144023180007935, "incorrect_loss_per_char": 0.6914964119593302, "correct_loss_per_token": 1.428804636001587, "incorrect_loss_per_token": 1.3829928239186604, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3029597997665405, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": true, "logits_per_token": -1.3029597997665405, "logits_per_char": -0.6514798998832703, "num_chars": 2}, {"sum_logits": -1.4066818952560425, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.4066818952560425, "logits_per_char": -0.7033409476280212, "num_chars": 2}, {"sum_logits": -1.428804636001587, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.428804636001587, "logits_per_char": -0.7144023180007935, "num_chars": 2}, {"sum_logits": -1.4393367767333984, "num_tokens": 1, "num_tokens_all": 290, "is_greedy": false, "logits_per_token": -1.4393367767333984, "logits_per_char": -0.7196683883666992, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 380, "native_id": "433", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.473779559135437, "incorrect_loss_raw": 1.36829940478007, "correct_loss_per_char": 0.7368897795677185, "incorrect_loss_per_char": 0.684149702390035, "correct_loss_per_token": 1.473779559135437, "incorrect_loss_per_token": 1.36829940478007, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.263662338256836, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": true, "logits_per_token": -1.263662338256836, "logits_per_char": -0.631831169128418, "num_chars": 2}, {"sum_logits": -1.473779559135437, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.473779559135437, "logits_per_char": -0.7368897795677185, "num_chars": 2}, {"sum_logits": -1.3732985258102417, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.3732985258102417, "logits_per_char": -0.6866492629051208, "num_chars": 2}, {"sum_logits": -1.4679373502731323, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.4679373502731323, "logits_per_char": -0.7339686751365662, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 381, "native_id": "9-508", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2733477354049683, "incorrect_loss_raw": 1.4446123838424683, "correct_loss_per_char": 0.6366738677024841, "incorrect_loss_per_char": 0.7223061919212341, "correct_loss_per_token": 1.2733477354049683, "incorrect_loss_per_token": 1.4446123838424683, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2733477354049683, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": true, "logits_per_token": -1.2733477354049683, "logits_per_char": -0.6366738677024841, "num_chars": 2}, {"sum_logits": -1.601165771484375, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.601165771484375, "logits_per_char": -0.8005828857421875, "num_chars": 2}, {"sum_logits": -1.3862122297286987, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.3862122297286987, "logits_per_char": -0.6931061148643494, "num_chars": 2}, {"sum_logits": -1.346459150314331, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.346459150314331, "logits_per_char": -0.6732295751571655, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 382, "native_id": "7-561", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.477577805519104, "incorrect_loss_raw": 1.3664095401763916, "correct_loss_per_char": 0.738788902759552, "incorrect_loss_per_char": 0.6832047700881958, "correct_loss_per_token": 1.477577805519104, "incorrect_loss_per_token": 1.3664095401763916, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4073959589004517, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4073959589004517, "logits_per_char": -0.7036979794502258, "num_chars": 2}, {"sum_logits": -1.3749836683273315, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.3749836683273315, "logits_per_char": -0.6874918341636658, "num_chars": 2}, {"sum_logits": -1.3168489933013916, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": true, "logits_per_token": -1.3168489933013916, "logits_per_char": -0.6584244966506958, "num_chars": 2}, {"sum_logits": -1.477577805519104, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.477577805519104, "logits_per_char": -0.738788902759552, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 383, "native_id": "7-976", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2462408542633057, "incorrect_loss_raw": 1.4467562039693196, "correct_loss_per_char": 0.6231204271316528, "incorrect_loss_per_char": 0.7233781019846598, "correct_loss_per_token": 1.2462408542633057, "incorrect_loss_per_token": 1.4467562039693196, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2462408542633057, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": true, "logits_per_token": -1.2462408542633057, "logits_per_char": -0.6231204271316528, "num_chars": 2}, {"sum_logits": -1.4532800912857056, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.4532800912857056, "logits_per_char": -0.7266400456428528, "num_chars": 2}, {"sum_logits": -1.4223393201828003, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.4223393201828003, "logits_per_char": -0.7111696600914001, "num_chars": 2}, {"sum_logits": -1.4646492004394531, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.4646492004394531, "logits_per_char": -0.7323246002197266, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 384, "native_id": "1635", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5264108180999756, "incorrect_loss_raw": 1.373448650042216, "correct_loss_per_char": 0.7632054090499878, "incorrect_loss_per_char": 0.686724325021108, "correct_loss_per_token": 1.5264108180999756, "incorrect_loss_per_token": 1.373448650042216, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1240993738174438, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": true, "logits_per_token": -1.1240993738174438, "logits_per_char": -0.5620496869087219, "num_chars": 2}, {"sum_logits": -1.3716387748718262, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.3716387748718262, "logits_per_char": -0.6858193874359131, "num_chars": 2}, {"sum_logits": -1.5264108180999756, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.5264108180999756, "logits_per_char": -0.7632054090499878, "num_chars": 2}, {"sum_logits": -1.624607801437378, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.624607801437378, "logits_per_char": -0.812303900718689, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 385, "native_id": "7-875", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1204016208648682, "incorrect_loss_raw": 1.5079460938771565, "correct_loss_per_char": 0.5602008104324341, "incorrect_loss_per_char": 0.7539730469385783, "correct_loss_per_token": 1.1204016208648682, "incorrect_loss_per_token": 1.5079460938771565, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1204016208648682, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": true, "logits_per_token": -1.1204016208648682, "logits_per_char": -0.5602008104324341, "num_chars": 2}, {"sum_logits": -1.436902403831482, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.436902403831482, "logits_per_char": -0.718451201915741, "num_chars": 2}, {"sum_logits": -1.4350171089172363, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.4350171089172363, "logits_per_char": -0.7175085544586182, "num_chars": 2}, {"sum_logits": -1.6519187688827515, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.6519187688827515, "logits_per_char": -0.8259593844413757, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 386, "native_id": "7-1053", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5791704654693604, "incorrect_loss_raw": 1.347388784090678, "correct_loss_per_char": 0.7895852327346802, "incorrect_loss_per_char": 0.673694392045339, "correct_loss_per_token": 1.5791704654693604, "incorrect_loss_per_token": 1.347388784090678, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1544790267944336, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": true, "logits_per_token": -1.1544790267944336, "logits_per_char": -0.5772395133972168, "num_chars": 2}, {"sum_logits": -1.5791704654693604, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.5791704654693604, "logits_per_char": -0.7895852327346802, "num_chars": 2}, {"sum_logits": -1.473074197769165, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.473074197769165, "logits_per_char": -0.7365370988845825, "num_chars": 2}, {"sum_logits": -1.414613127708435, "num_tokens": 1, "num_tokens_all": 264, "is_greedy": false, "logits_per_token": -1.414613127708435, "logits_per_char": -0.7073065638542175, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 387, "native_id": "9-957", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.547594666481018, "incorrect_loss_raw": 1.3572381734848022, "correct_loss_per_char": 0.773797333240509, "incorrect_loss_per_char": 0.6786190867424011, "correct_loss_per_token": 1.547594666481018, "incorrect_loss_per_token": 1.3572381734848022, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1654807329177856, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": true, "logits_per_token": -1.1654807329177856, "logits_per_char": -0.5827403664588928, "num_chars": 2}, {"sum_logits": -1.4285409450531006, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4285409450531006, "logits_per_char": -0.7142704725265503, "num_chars": 2}, {"sum_logits": -1.4776928424835205, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4776928424835205, "logits_per_char": -0.7388464212417603, "num_chars": 2}, {"sum_logits": -1.547594666481018, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.547594666481018, "logits_per_char": -0.773797333240509, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 388, "native_id": "1150", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4058771133422852, "incorrect_loss_raw": 1.3978379170099895, "correct_loss_per_char": 0.7029385566711426, "incorrect_loss_per_char": 0.6989189585049947, "correct_loss_per_token": 1.4058771133422852, "incorrect_loss_per_token": 1.3978379170099895, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2324564456939697, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": true, "logits_per_token": -1.2324564456939697, "logits_per_char": -0.6162282228469849, "num_chars": 2}, {"sum_logits": -1.4882913827896118, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4882913827896118, "logits_per_char": -0.7441456913948059, "num_chars": 2}, {"sum_logits": -1.4058771133422852, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4058771133422852, "logits_per_char": -0.7029385566711426, "num_chars": 2}, {"sum_logits": -1.4727659225463867, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4727659225463867, "logits_per_char": -0.7363829612731934, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 389, "native_id": "8-240", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2473889589309692, "incorrect_loss_raw": 1.4497569004694622, "correct_loss_per_char": 0.6236944794654846, "incorrect_loss_per_char": 0.7248784502347311, "correct_loss_per_token": 1.2473889589309692, "incorrect_loss_per_token": 1.4497569004694622, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2473889589309692, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": true, "logits_per_token": -1.2473889589309692, "logits_per_char": -0.6236944794654846, "num_chars": 2}, {"sum_logits": -1.3537046909332275, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.3537046909332275, "logits_per_char": -0.6768523454666138, "num_chars": 2}, {"sum_logits": -1.476889967918396, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.476889967918396, "logits_per_char": -0.738444983959198, "num_chars": 2}, {"sum_logits": -1.5186760425567627, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.5186760425567627, "logits_per_char": -0.7593380212783813, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 390, "native_id": "9-554", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4650882482528687, "incorrect_loss_raw": 1.3790855407714844, "correct_loss_per_char": 0.7325441241264343, "incorrect_loss_per_char": 0.6895427703857422, "correct_loss_per_token": 1.4650882482528687, "incorrect_loss_per_token": 1.3790855407714844, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.185072898864746, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": true, "logits_per_token": -1.185072898864746, "logits_per_char": -0.592536449432373, "num_chars": 2}, {"sum_logits": -1.5423439741134644, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.5423439741134644, "logits_per_char": -0.7711719870567322, "num_chars": 2}, {"sum_logits": -1.4650882482528687, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.4650882482528687, "logits_per_char": -0.7325441241264343, "num_chars": 2}, {"sum_logits": -1.4098397493362427, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.4098397493362427, "logits_per_char": -0.7049198746681213, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 391, "native_id": "9-135", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4346922636032104, "incorrect_loss_raw": 1.3895653088887532, "correct_loss_per_char": 0.7173461318016052, "incorrect_loss_per_char": 0.6947826544443766, "correct_loss_per_token": 1.4346922636032104, "incorrect_loss_per_token": 1.3895653088887532, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2110228538513184, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": true, "logits_per_token": -1.2110228538513184, "logits_per_char": -0.6055114269256592, "num_chars": 2}, {"sum_logits": -1.4346922636032104, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.4346922636032104, "logits_per_char": -0.7173461318016052, "num_chars": 2}, {"sum_logits": -1.5265612602233887, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.5265612602233887, "logits_per_char": -0.7632806301116943, "num_chars": 2}, {"sum_logits": -1.4311118125915527, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.4311118125915527, "logits_per_char": -0.7155559062957764, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 392, "native_id": "7-1096", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4249755144119263, "incorrect_loss_raw": 1.3876893917719524, "correct_loss_per_char": 0.7124877572059631, "incorrect_loss_per_char": 0.6938446958859762, "correct_loss_per_token": 1.4249755144119263, "incorrect_loss_per_token": 1.3876893917719524, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2893948554992676, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": true, "logits_per_token": -1.2893948554992676, "logits_per_char": -0.6446974277496338, "num_chars": 2}, {"sum_logits": -1.4249755144119263, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.4249755144119263, "logits_per_char": -0.7124877572059631, "num_chars": 2}, {"sum_logits": -1.412363052368164, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.412363052368164, "logits_per_char": -0.706181526184082, "num_chars": 2}, {"sum_logits": -1.4613102674484253, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.4613102674484253, "logits_per_char": -0.7306551337242126, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 393, "native_id": "841", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4458816051483154, "incorrect_loss_raw": 1.3777916034062703, "correct_loss_per_char": 0.7229408025741577, "incorrect_loss_per_char": 0.6888958017031351, "correct_loss_per_token": 1.4458816051483154, "incorrect_loss_per_token": 1.3777916034062703, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4610834121704102, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4610834121704102, "logits_per_char": -0.7305417060852051, "num_chars": 2}, {"sum_logits": -1.2712515592575073, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": true, "logits_per_token": -1.2712515592575073, "logits_per_char": -0.6356257796287537, "num_chars": 2}, {"sum_logits": -1.4458816051483154, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4458816051483154, "logits_per_char": -0.7229408025741577, "num_chars": 2}, {"sum_logits": -1.4010398387908936, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4010398387908936, "logits_per_char": -0.7005199193954468, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 394, "native_id": "7-146", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.140891432762146, "incorrect_loss_raw": 1.4947193463643391, "correct_loss_per_char": 0.570445716381073, "incorrect_loss_per_char": 0.7473596731821696, "correct_loss_per_token": 1.140891432762146, "incorrect_loss_per_token": 1.4947193463643391, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.140891432762146, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": true, "logits_per_token": -1.140891432762146, "logits_per_char": -0.570445716381073, "num_chars": 2}, {"sum_logits": -1.5570495128631592, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.5570495128631592, "logits_per_char": -0.7785247564315796, "num_chars": 2}, {"sum_logits": -1.37840735912323, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.37840735912323, "logits_per_char": -0.689203679561615, "num_chars": 2}, {"sum_logits": -1.5487011671066284, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.5487011671066284, "logits_per_char": -0.7743505835533142, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 395, "native_id": "1554", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3519196510314941, "incorrect_loss_raw": 1.407084862391154, "correct_loss_per_char": 0.6759598255157471, "incorrect_loss_per_char": 0.703542431195577, "correct_loss_per_token": 1.3519196510314941, "incorrect_loss_per_token": 1.407084862391154, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4428818225860596, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4428818225860596, "logits_per_char": -0.7214409112930298, "num_chars": 2}, {"sum_logits": -1.3900415897369385, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.3900415897369385, "logits_per_char": -0.6950207948684692, "num_chars": 2}, {"sum_logits": -1.3883311748504639, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.3883311748504639, "logits_per_char": -0.6941655874252319, "num_chars": 2}, {"sum_logits": -1.3519196510314941, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.3519196510314941, "logits_per_char": -0.6759598255157471, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 396, "native_id": "9-731", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.258938193321228, "incorrect_loss_raw": 1.4416958888371785, "correct_loss_per_char": 0.629469096660614, "incorrect_loss_per_char": 0.7208479444185892, "correct_loss_per_token": 1.258938193321228, "incorrect_loss_per_token": 1.4416958888371785, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.258938193321228, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": true, "logits_per_token": -1.258938193321228, "logits_per_char": -0.629469096660614, "num_chars": 2}, {"sum_logits": -1.403015375137329, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.403015375137329, "logits_per_char": -0.7015076875686646, "num_chars": 2}, {"sum_logits": -1.4318050146102905, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.4318050146102905, "logits_per_char": -0.7159025073051453, "num_chars": 2}, {"sum_logits": -1.490267276763916, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.490267276763916, "logits_per_char": -0.745133638381958, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 397, "native_id": "1780", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2264699935913086, "incorrect_loss_raw": 1.469042976697286, "correct_loss_per_char": 0.6132349967956543, "incorrect_loss_per_char": 0.734521488348643, "correct_loss_per_token": 1.2264699935913086, "incorrect_loss_per_token": 1.469042976697286, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2264699935913086, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": true, "logits_per_token": -1.2264699935913086, "logits_per_char": -0.6132349967956543, "num_chars": 2}, {"sum_logits": -1.2804503440856934, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.2804503440856934, "logits_per_char": -0.6402251720428467, "num_chars": 2}, {"sum_logits": -1.5000907182693481, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.5000907182693481, "logits_per_char": -0.7500453591346741, "num_chars": 2}, {"sum_logits": -1.6265878677368164, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.6265878677368164, "logits_per_char": -0.8132939338684082, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 398, "native_id": "7-1077", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3977937698364258, "incorrect_loss_raw": 1.395306666692098, "correct_loss_per_char": 0.6988968849182129, "incorrect_loss_per_char": 0.697653333346049, "correct_loss_per_token": 1.3977937698364258, "incorrect_loss_per_token": 1.395306666692098, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.30152428150177, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": true, "logits_per_token": -1.30152428150177, "logits_per_char": -0.650762140750885, "num_chars": 2}, {"sum_logits": -1.5228683948516846, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.5228683948516846, "logits_per_char": -0.7614341974258423, "num_chars": 2}, {"sum_logits": -1.3615273237228394, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.3615273237228394, "logits_per_char": -0.6807636618614197, "num_chars": 2}, {"sum_logits": -1.3977937698364258, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.3977937698364258, "logits_per_char": -0.6988968849182129, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 399, "native_id": "8-494", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.458747386932373, "incorrect_loss_raw": 1.385615388552348, "correct_loss_per_char": 0.7293736934661865, "incorrect_loss_per_char": 0.692807694276174, "correct_loss_per_token": 1.458747386932373, "incorrect_loss_per_token": 1.385615388552348, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.25490403175354, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": true, "logits_per_token": -1.25490403175354, "logits_per_char": -0.62745201587677, "num_chars": 2}, {"sum_logits": -1.523945689201355, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.523945689201355, "logits_per_char": -0.7619728446006775, "num_chars": 2}, {"sum_logits": -1.458747386932373, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.458747386932373, "logits_per_char": -0.7293736934661865, "num_chars": 2}, {"sum_logits": -1.3779964447021484, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.3779964447021484, "logits_per_char": -0.6889982223510742, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 400, "native_id": "936", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.15109384059906, "incorrect_loss_raw": 1.4924116134643555, "correct_loss_per_char": 0.57554692029953, "incorrect_loss_per_char": 0.7462058067321777, "correct_loss_per_token": 1.15109384059906, "incorrect_loss_per_token": 1.4924116134643555, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.15109384059906, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": true, "logits_per_token": -1.15109384059906, "logits_per_char": -0.57554692029953, "num_chars": 2}, {"sum_logits": -1.4134408235549927, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.4134408235549927, "logits_per_char": -0.7067204117774963, "num_chars": 2}, {"sum_logits": -1.4488704204559326, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.4488704204559326, "logits_per_char": -0.7244352102279663, "num_chars": 2}, {"sum_logits": -1.6149235963821411, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.6149235963821411, "logits_per_char": -0.8074617981910706, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 401, "native_id": "8-478", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.52614164352417, "incorrect_loss_raw": 1.3705895344416301, "correct_loss_per_char": 0.763070821762085, "incorrect_loss_per_char": 0.6852947672208151, "correct_loss_per_token": 1.52614164352417, "incorrect_loss_per_token": 1.3705895344416301, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0992686748504639, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.0992686748504639, "logits_per_char": -0.5496343374252319, "num_chars": 2}, {"sum_logits": -1.52614164352417, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.52614164352417, "logits_per_char": -0.763070821762085, "num_chars": 2}, {"sum_logits": -1.478312373161316, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.478312373161316, "logits_per_char": -0.739156186580658, "num_chars": 2}, {"sum_logits": -1.5341875553131104, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.5341875553131104, "logits_per_char": -0.7670937776565552, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 402, "native_id": "9-669", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4078378677368164, "incorrect_loss_raw": 1.4197088082631428, "correct_loss_per_char": 0.7039189338684082, "incorrect_loss_per_char": 0.7098544041315714, "correct_loss_per_token": 1.4078378677368164, "incorrect_loss_per_token": 1.4197088082631428, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1166616678237915, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": true, "logits_per_token": -1.1166616678237915, "logits_per_char": -0.5583308339118958, "num_chars": 2}, {"sum_logits": -1.6189137697219849, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.6189137697219849, "logits_per_char": -0.8094568848609924, "num_chars": 2}, {"sum_logits": -1.4078378677368164, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.4078378677368164, "logits_per_char": -0.7039189338684082, "num_chars": 2}, {"sum_logits": -1.5235509872436523, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.5235509872436523, "logits_per_char": -0.7617754936218262, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 403, "native_id": "7-732", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2590969800949097, "incorrect_loss_raw": 1.4435772895812988, "correct_loss_per_char": 0.6295484900474548, "incorrect_loss_per_char": 0.7217886447906494, "correct_loss_per_token": 1.2590969800949097, "incorrect_loss_per_token": 1.4435772895812988, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2590969800949097, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": true, "logits_per_token": -1.2590969800949097, "logits_per_char": -0.6295484900474548, "num_chars": 2}, {"sum_logits": -1.4410388469696045, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4410388469696045, "logits_per_char": -0.7205194234848022, "num_chars": 2}, {"sum_logits": -1.4074033498764038, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4074033498764038, "logits_per_char": -0.7037016749382019, "num_chars": 2}, {"sum_logits": -1.4822896718978882, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4822896718978882, "logits_per_char": -0.7411448359489441, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 404, "native_id": "7-658", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4762966632843018, "incorrect_loss_raw": 1.379581610361735, "correct_loss_per_char": 0.7381483316421509, "incorrect_loss_per_char": 0.6897908051808676, "correct_loss_per_token": 1.4762966632843018, "incorrect_loss_per_token": 1.379581610361735, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.190172553062439, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.190172553062439, "logits_per_char": -0.5950862765312195, "num_chars": 2}, {"sum_logits": -1.4141117334365845, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4141117334365845, "logits_per_char": -0.7070558667182922, "num_chars": 2}, {"sum_logits": -1.5344605445861816, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.5344605445861816, "logits_per_char": -0.7672302722930908, "num_chars": 2}, {"sum_logits": -1.4762966632843018, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4762966632843018, "logits_per_char": -0.7381483316421509, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 405, "native_id": "1003", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.480368733406067, "incorrect_loss_raw": 1.385005513827006, "correct_loss_per_char": 0.7401843667030334, "incorrect_loss_per_char": 0.692502756913503, "correct_loss_per_token": 1.480368733406067, "incorrect_loss_per_token": 1.385005513827006, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.156496286392212, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": true, "logits_per_token": -1.156496286392212, "logits_per_char": -0.578248143196106, "num_chars": 2}, {"sum_logits": -1.480368733406067, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.480368733406067, "logits_per_char": -0.7401843667030334, "num_chars": 2}, {"sum_logits": -1.3691149950027466, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.3691149950027466, "logits_per_char": -0.6845574975013733, "num_chars": 2}, {"sum_logits": -1.6294052600860596, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.6294052600860596, "logits_per_char": -0.8147026300430298, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 406, "native_id": "8-62", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2211718559265137, "incorrect_loss_raw": 1.4578288793563843, "correct_loss_per_char": 0.6105859279632568, "incorrect_loss_per_char": 0.7289144396781921, "correct_loss_per_token": 1.2211718559265137, "incorrect_loss_per_token": 1.4578288793563843, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2211718559265137, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": true, "logits_per_token": -1.2211718559265137, "logits_per_char": -0.6105859279632568, "num_chars": 2}, {"sum_logits": -1.3939069509506226, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.3939069509506226, "logits_per_char": -0.6969534754753113, "num_chars": 2}, {"sum_logits": -1.5052849054336548, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.5052849054336548, "logits_per_char": -0.7526424527168274, "num_chars": 2}, {"sum_logits": -1.4742947816848755, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.4742947816848755, "logits_per_char": -0.7371473908424377, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 407, "native_id": "7-386", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.50443434715271, "incorrect_loss_raw": 1.3702431519826253, "correct_loss_per_char": 0.752217173576355, "incorrect_loss_per_char": 0.6851215759913126, "correct_loss_per_token": 1.50443434715271, "incorrect_loss_per_token": 1.3702431519826253, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1984447240829468, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": true, "logits_per_token": -1.1984447240829468, "logits_per_char": -0.5992223620414734, "num_chars": 2}, {"sum_logits": -1.50443434715271, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.50443434715271, "logits_per_char": -0.752217173576355, "num_chars": 2}, {"sum_logits": -1.3304671049118042, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.3304671049118042, "logits_per_char": -0.6652335524559021, "num_chars": 2}, {"sum_logits": -1.581817626953125, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.581817626953125, "logits_per_char": -0.7909088134765625, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 408, "native_id": "257", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4173038005828857, "incorrect_loss_raw": 1.3857500950495403, "correct_loss_per_char": 0.7086519002914429, "incorrect_loss_per_char": 0.6928750475247701, "correct_loss_per_token": 1.4173038005828857, "incorrect_loss_per_token": 1.3857500950495403, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3509340286254883, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.3509340286254883, "logits_per_char": -0.6754670143127441, "num_chars": 2}, {"sum_logits": -1.45979642868042, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.45979642868042, "logits_per_char": -0.72989821434021, "num_chars": 2}, {"sum_logits": -1.3465198278427124, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.3465198278427124, "logits_per_char": -0.6732599139213562, "num_chars": 2}, {"sum_logits": -1.4173038005828857, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4173038005828857, "logits_per_char": -0.7086519002914429, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 409, "native_id": "147", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5829448699951172, "incorrect_loss_raw": 1.3442395528157551, "correct_loss_per_char": 0.7914724349975586, "incorrect_loss_per_char": 0.6721197764078776, "correct_loss_per_token": 1.5829448699951172, "incorrect_loss_per_token": 1.3442395528157551, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2066160440444946, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": true, "logits_per_token": -1.2066160440444946, "logits_per_char": -0.6033080220222473, "num_chars": 2}, {"sum_logits": -1.4463279247283936, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.4463279247283936, "logits_per_char": -0.7231639623641968, "num_chars": 2}, {"sum_logits": -1.3797746896743774, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.3797746896743774, "logits_per_char": -0.6898873448371887, "num_chars": 2}, {"sum_logits": -1.5829448699951172, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.5829448699951172, "logits_per_char": -0.7914724349975586, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 410, "native_id": "7-599", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5022575855255127, "incorrect_loss_raw": 1.3667619228363037, "correct_loss_per_char": 0.7511287927627563, "incorrect_loss_per_char": 0.6833809614181519, "correct_loss_per_token": 1.5022575855255127, "incorrect_loss_per_token": 1.3667619228363037, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.183741569519043, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": true, "logits_per_token": -1.183741569519043, "logits_per_char": -0.5918707847595215, "num_chars": 2}, {"sum_logits": -1.503446102142334, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.503446102142334, "logits_per_char": -0.751723051071167, "num_chars": 2}, {"sum_logits": -1.4130980968475342, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.4130980968475342, "logits_per_char": -0.7065490484237671, "num_chars": 2}, {"sum_logits": -1.5022575855255127, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.5022575855255127, "logits_per_char": -0.7511287927627563, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 411, "native_id": "8-92", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.448201298713684, "incorrect_loss_raw": 1.3787585496902466, "correct_loss_per_char": 0.724100649356842, "incorrect_loss_per_char": 0.6893792748451233, "correct_loss_per_token": 1.448201298713684, "incorrect_loss_per_token": 1.3787585496902466, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2269330024719238, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": true, "logits_per_token": -1.2269330024719238, "logits_per_char": -0.6134665012359619, "num_chars": 2}, {"sum_logits": -1.448201298713684, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.448201298713684, "logits_per_char": -0.724100649356842, "num_chars": 2}, {"sum_logits": -1.445340871810913, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.445340871810913, "logits_per_char": -0.7226704359054565, "num_chars": 2}, {"sum_logits": -1.4640017747879028, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.4640017747879028, "logits_per_char": -0.7320008873939514, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 412, "native_id": "354", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1735961437225342, "incorrect_loss_raw": 1.4818555911382039, "correct_loss_per_char": 0.5867980718612671, "incorrect_loss_per_char": 0.7409277955691019, "correct_loss_per_token": 1.1735961437225342, "incorrect_loss_per_token": 1.4818555911382039, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1735961437225342, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.1735961437225342, "logits_per_char": -0.5867980718612671, "num_chars": 2}, {"sum_logits": -1.4393776655197144, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4393776655197144, "logits_per_char": -0.7196888327598572, "num_chars": 2}, {"sum_logits": -1.5901014804840088, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.5901014804840088, "logits_per_char": -0.7950507402420044, "num_chars": 2}, {"sum_logits": -1.4160876274108887, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4160876274108887, "logits_per_char": -0.7080438137054443, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 413, "native_id": "9-966", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4630075693130493, "incorrect_loss_raw": 1.3813717762629192, "correct_loss_per_char": 0.7315037846565247, "incorrect_loss_per_char": 0.6906858881314596, "correct_loss_per_token": 1.4630075693130493, "incorrect_loss_per_token": 1.3813717762629192, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2153360843658447, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.2153360843658447, "logits_per_char": -0.6076680421829224, "num_chars": 2}, {"sum_logits": -1.4630075693130493, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4630075693130493, "logits_per_char": -0.7315037846565247, "num_chars": 2}, {"sum_logits": -1.3933483362197876, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.3933483362197876, "logits_per_char": -0.6966741681098938, "num_chars": 2}, {"sum_logits": -1.535430908203125, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.535430908203125, "logits_per_char": -0.7677154541015625, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 414, "native_id": "9-612", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3407135009765625, "incorrect_loss_raw": 1.4134337107340496, "correct_loss_per_char": 0.6703567504882812, "incorrect_loss_per_char": 0.7067168553670248, "correct_loss_per_token": 1.3407135009765625, "incorrect_loss_per_token": 1.4134337107340496, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3688113689422607, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.3688113689422607, "logits_per_char": -0.6844056844711304, "num_chars": 2}, {"sum_logits": -1.3407135009765625, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": true, "logits_per_token": -1.3407135009765625, "logits_per_char": -0.6703567504882812, "num_chars": 2}, {"sum_logits": -1.367978572845459, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.367978572845459, "logits_per_char": -0.6839892864227295, "num_chars": 2}, {"sum_logits": -1.5035111904144287, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.5035111904144287, "logits_per_char": -0.7517555952072144, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 415, "native_id": "9-548", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1914334297180176, "incorrect_loss_raw": 1.547366778055827, "correct_loss_per_char": 0.5957167148590088, "incorrect_loss_per_char": 0.7736833890279134, "correct_loss_per_token": 1.1914334297180176, "incorrect_loss_per_token": 1.547366778055827, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1914334297180176, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.1914334297180176, "logits_per_char": -0.5957167148590088, "num_chars": 2}, {"sum_logits": -1.9375478029251099, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.9375478029251099, "logits_per_char": -0.9687739014625549, "num_chars": 2}, {"sum_logits": -1.5435930490493774, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.5435930490493774, "logits_per_char": -0.7717965245246887, "num_chars": 2}, {"sum_logits": -1.1609594821929932, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.1609594821929932, "logits_per_char": -0.5804797410964966, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 416, "native_id": "9-429", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.160907506942749, "incorrect_loss_raw": 1.489459713300069, "correct_loss_per_char": 0.5804537534713745, "incorrect_loss_per_char": 0.7447298566500345, "correct_loss_per_token": 1.160907506942749, "incorrect_loss_per_token": 1.489459713300069, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.160907506942749, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": true, "logits_per_token": -1.160907506942749, "logits_per_char": -0.5804537534713745, "num_chars": 2}, {"sum_logits": -1.4334248304367065, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.4334248304367065, "logits_per_char": -0.7167124152183533, "num_chars": 2}, {"sum_logits": -1.371795415878296, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.371795415878296, "logits_per_char": -0.685897707939148, "num_chars": 2}, {"sum_logits": -1.663158893585205, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.663158893585205, "logits_per_char": -0.8315794467926025, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 417, "native_id": "7-95", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4422246217727661, "incorrect_loss_raw": 1.3895847002665203, "correct_loss_per_char": 0.7211123108863831, "incorrect_loss_per_char": 0.6947923501332601, "correct_loss_per_token": 1.4422246217727661, "incorrect_loss_per_token": 1.3895847002665203, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.153490424156189, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": true, "logits_per_token": -1.153490424156189, "logits_per_char": -0.5767452120780945, "num_chars": 2}, {"sum_logits": -1.5142532587051392, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.5142532587051392, "logits_per_char": -0.7571266293525696, "num_chars": 2}, {"sum_logits": -1.4422246217727661, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4422246217727661, "logits_per_char": -0.7211123108863831, "num_chars": 2}, {"sum_logits": -1.5010104179382324, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.5010104179382324, "logits_per_char": -0.7505052089691162, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 418, "native_id": "1560", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.147584080696106, "incorrect_loss_raw": 1.492014726003011, "correct_loss_per_char": 0.573792040348053, "incorrect_loss_per_char": 0.7460073630015055, "correct_loss_per_token": 1.147584080696106, "incorrect_loss_per_token": 1.492014726003011, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.147584080696106, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": true, "logits_per_token": -1.147584080696106, "logits_per_char": -0.573792040348053, "num_chars": 2}, {"sum_logits": -1.3896914720535278, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.3896914720535278, "logits_per_char": -0.6948457360267639, "num_chars": 2}, {"sum_logits": -1.5077528953552246, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.5077528953552246, "logits_per_char": -0.7538764476776123, "num_chars": 2}, {"sum_logits": -1.5785998106002808, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.5785998106002808, "logits_per_char": -0.7892999053001404, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 419, "native_id": "9-461", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5264233350753784, "incorrect_loss_raw": 1.3512459595998128, "correct_loss_per_char": 0.7632116675376892, "incorrect_loss_per_char": 0.6756229797999064, "correct_loss_per_token": 1.5264233350753784, "incorrect_loss_per_token": 1.3512459595998128, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3973783254623413, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.3973783254623413, "logits_per_char": -0.6986891627311707, "num_chars": 2}, {"sum_logits": -1.3797394037246704, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.3797394037246704, "logits_per_char": -0.6898697018623352, "num_chars": 2}, {"sum_logits": -1.5264233350753784, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": false, "logits_per_token": -1.5264233350753784, "logits_per_char": -0.7632116675376892, "num_chars": 2}, {"sum_logits": -1.2766201496124268, "num_tokens": 1, "num_tokens_all": 298, "is_greedy": true, "logits_per_token": -1.2766201496124268, "logits_per_char": -0.6383100748062134, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 420, "native_id": "9-490", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4123671054840088, "incorrect_loss_raw": 1.3990471760431926, "correct_loss_per_char": 0.7061835527420044, "incorrect_loss_per_char": 0.6995235880215963, "correct_loss_per_token": 1.4123671054840088, "incorrect_loss_per_token": 1.3990471760431926, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2080228328704834, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.2080228328704834, "logits_per_char": -0.6040114164352417, "num_chars": 2}, {"sum_logits": -1.4230868816375732, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4230868816375732, "logits_per_char": -0.7115434408187866, "num_chars": 2}, {"sum_logits": -1.4123671054840088, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4123671054840088, "logits_per_char": -0.7061835527420044, "num_chars": 2}, {"sum_logits": -1.566031813621521, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.566031813621521, "logits_per_char": -0.7830159068107605, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 421, "native_id": "9-301", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.495266079902649, "incorrect_loss_raw": 1.3641548951466878, "correct_loss_per_char": 0.7476330399513245, "incorrect_loss_per_char": 0.6820774475733439, "correct_loss_per_token": 1.495266079902649, "incorrect_loss_per_token": 1.3641548951466878, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2310175895690918, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.2310175895690918, "logits_per_char": -0.6155087947845459, "num_chars": 2}, {"sum_logits": -1.406461477279663, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.406461477279663, "logits_per_char": -0.7032307386398315, "num_chars": 2}, {"sum_logits": -1.495266079902649, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.495266079902649, "logits_per_char": -0.7476330399513245, "num_chars": 2}, {"sum_logits": -1.4549856185913086, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4549856185913086, "logits_per_char": -0.7274928092956543, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 422, "native_id": "60", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2784528732299805, "incorrect_loss_raw": 1.4413565397262573, "correct_loss_per_char": 0.6392264366149902, "incorrect_loss_per_char": 0.7206782698631287, "correct_loss_per_token": 1.2784528732299805, "incorrect_loss_per_token": 1.4413565397262573, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.405134916305542, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.405134916305542, "logits_per_char": -0.702567458152771, "num_chars": 2}, {"sum_logits": -1.578639030456543, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.578639030456543, "logits_per_char": -0.7893195152282715, "num_chars": 2}, {"sum_logits": -1.2784528732299805, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.2784528732299805, "logits_per_char": -0.6392264366149902, "num_chars": 2}, {"sum_logits": -1.340295672416687, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.340295672416687, "logits_per_char": -0.6701478362083435, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 423, "native_id": "9-894", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.331273078918457, "incorrect_loss_raw": 1.4442191918690999, "correct_loss_per_char": 0.6656365394592285, "incorrect_loss_per_char": 0.7221095959345499, "correct_loss_per_token": 1.331273078918457, "incorrect_loss_per_token": 1.4442191918690999, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1084237098693848, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.1084237098693848, "logits_per_char": -0.5542118549346924, "num_chars": 2}, {"sum_logits": -1.6369138956069946, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.6369138956069946, "logits_per_char": -0.8184569478034973, "num_chars": 2}, {"sum_logits": -1.331273078918457, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.331273078918457, "logits_per_char": -0.6656365394592285, "num_chars": 2}, {"sum_logits": -1.5873199701309204, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.5873199701309204, "logits_per_char": -0.7936599850654602, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 424, "native_id": "9-895", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3173363208770752, "incorrect_loss_raw": 1.421394149462382, "correct_loss_per_char": 0.6586681604385376, "incorrect_loss_per_char": 0.710697074731191, "correct_loss_per_token": 1.3173363208770752, "incorrect_loss_per_token": 1.421394149462382, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3173363208770752, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.3173363208770752, "logits_per_char": -0.6586681604385376, "num_chars": 2}, {"sum_logits": -1.4860384464263916, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4860384464263916, "logits_per_char": -0.7430192232131958, "num_chars": 2}, {"sum_logits": -1.469868779182434, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.469868779182434, "logits_per_char": -0.734934389591217, "num_chars": 2}, {"sum_logits": -1.3082752227783203, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.3082752227783203, "logits_per_char": -0.6541376113891602, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 425, "native_id": "9-281", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.252081036567688, "incorrect_loss_raw": 1.447900931040446, "correct_loss_per_char": 0.626040518283844, "incorrect_loss_per_char": 0.723950465520223, "correct_loss_per_token": 1.252081036567688, "incorrect_loss_per_token": 1.447900931040446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.252081036567688, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": true, "logits_per_token": -1.252081036567688, "logits_per_char": -0.626040518283844, "num_chars": 2}, {"sum_logits": -1.559586763381958, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.559586763381958, "logits_per_char": -0.779793381690979, "num_chars": 2}, {"sum_logits": -1.4485268592834473, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.4485268592834473, "logits_per_char": -0.7242634296417236, "num_chars": 2}, {"sum_logits": -1.3355891704559326, "num_tokens": 1, "num_tokens_all": 278, "is_greedy": false, "logits_per_token": -1.3355891704559326, "logits_per_char": -0.6677945852279663, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 426, "native_id": "202", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5291528701782227, "incorrect_loss_raw": 1.3798142274220784, "correct_loss_per_char": 0.7645764350891113, "incorrect_loss_per_char": 0.6899071137110392, "correct_loss_per_token": 1.5291528701782227, "incorrect_loss_per_token": 1.3798142274220784, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0513687133789062, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": true, "logits_per_token": -1.0513687133789062, "logits_per_char": -0.5256843566894531, "num_chars": 2}, {"sum_logits": -1.5045191049575806, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.5045191049575806, "logits_per_char": -0.7522595524787903, "num_chars": 2}, {"sum_logits": -1.5291528701782227, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.5291528701782227, "logits_per_char": -0.7645764350891113, "num_chars": 2}, {"sum_logits": -1.5835548639297485, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.5835548639297485, "logits_per_char": -0.7917774319648743, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 427, "native_id": "1937", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4818286895751953, "incorrect_loss_raw": 1.3950558106104534, "correct_loss_per_char": 0.7409143447875977, "incorrect_loss_per_char": 0.6975279053052267, "correct_loss_per_token": 1.4818286895751953, "incorrect_loss_per_token": 1.3950558106104534, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0967721939086914, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": true, "logits_per_token": -1.0967721939086914, "logits_per_char": -0.5483860969543457, "num_chars": 2}, {"sum_logits": -1.38097083568573, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.38097083568573, "logits_per_char": -0.690485417842865, "num_chars": 2}, {"sum_logits": -1.4818286895751953, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.4818286895751953, "logits_per_char": -0.7409143447875977, "num_chars": 2}, {"sum_logits": -1.7074244022369385, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.7074244022369385, "logits_per_char": -0.8537122011184692, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 428, "native_id": "620", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1083264350891113, "incorrect_loss_raw": 1.5101659695307414, "correct_loss_per_char": 0.5541632175445557, "incorrect_loss_per_char": 0.7550829847653707, "correct_loss_per_token": 1.1083264350891113, "incorrect_loss_per_token": 1.5101659695307414, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1083264350891113, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.1083264350891113, "logits_per_char": -0.5541632175445557, "num_chars": 2}, {"sum_logits": -1.5213700532913208, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.5213700532913208, "logits_per_char": -0.7606850266456604, "num_chars": 2}, {"sum_logits": -1.456937313079834, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.456937313079834, "logits_per_char": -0.728468656539917, "num_chars": 2}, {"sum_logits": -1.5521905422210693, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.5521905422210693, "logits_per_char": -0.7760952711105347, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 429, "native_id": "8-142", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5047861337661743, "incorrect_loss_raw": 1.356881817181905, "correct_loss_per_char": 0.7523930668830872, "incorrect_loss_per_char": 0.6784409085909525, "correct_loss_per_token": 1.5047861337661743, "incorrect_loss_per_token": 1.356881817181905, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2846705913543701, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": true, "logits_per_token": -1.2846705913543701, "logits_per_char": -0.6423352956771851, "num_chars": 2}, {"sum_logits": -1.3728001117706299, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.3728001117706299, "logits_per_char": -0.6864000558853149, "num_chars": 2}, {"sum_logits": -1.5047861337661743, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.5047861337661743, "logits_per_char": -0.7523930668830872, "num_chars": 2}, {"sum_logits": -1.4131747484207153, "num_tokens": 1, "num_tokens_all": 294, "is_greedy": false, "logits_per_token": -1.4131747484207153, "logits_per_char": -0.7065873742103577, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 430, "native_id": "7-1138", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.208956003189087, "incorrect_loss_raw": 1.4667609532674153, "correct_loss_per_char": 0.6044780015945435, "incorrect_loss_per_char": 0.7333804766337076, "correct_loss_per_token": 1.208956003189087, "incorrect_loss_per_token": 1.4667609532674153, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.208956003189087, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": true, "logits_per_token": -1.208956003189087, "logits_per_char": -0.6044780015945435, "num_chars": 2}, {"sum_logits": -1.4519522190093994, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4519522190093994, "logits_per_char": -0.7259761095046997, "num_chars": 2}, {"sum_logits": -1.3364330530166626, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.3364330530166626, "logits_per_char": -0.6682165265083313, "num_chars": 2}, {"sum_logits": -1.611897587776184, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.611897587776184, "logits_per_char": -0.805948793888092, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 431, "native_id": "8-471", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5098708868026733, "incorrect_loss_raw": 1.3682146072387695, "correct_loss_per_char": 0.7549354434013367, "incorrect_loss_per_char": 0.6841073036193848, "correct_loss_per_token": 1.5098708868026733, "incorrect_loss_per_token": 1.3682146072387695, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1725566387176514, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": true, "logits_per_token": -1.1725566387176514, "logits_per_char": -0.5862783193588257, "num_chars": 2}, {"sum_logits": -1.5098708868026733, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.5098708868026733, "logits_per_char": -0.7549354434013367, "num_chars": 2}, {"sum_logits": -1.4206352233886719, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.4206352233886719, "logits_per_char": -0.7103176116943359, "num_chars": 2}, {"sum_logits": -1.5114519596099854, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.5114519596099854, "logits_per_char": -0.7557259798049927, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 432, "native_id": "9-433", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5157525539398193, "incorrect_loss_raw": 1.374068816502889, "correct_loss_per_char": 0.7578762769699097, "incorrect_loss_per_char": 0.6870344082514445, "correct_loss_per_token": 1.5157525539398193, "incorrect_loss_per_token": 1.374068816502889, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4707709550857544, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4707709550857544, "logits_per_char": -0.7353854775428772, "num_chars": 2}, {"sum_logits": -1.5157525539398193, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.5157525539398193, "logits_per_char": -0.7578762769699097, "num_chars": 2}, {"sum_logits": -1.3065145015716553, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.3065145015716553, "logits_per_char": -0.6532572507858276, "num_chars": 2}, {"sum_logits": -1.3449209928512573, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.3449209928512573, "logits_per_char": -0.6724604964256287, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 433, "native_id": "1458", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.459627389907837, "incorrect_loss_raw": 1.3977164427439372, "correct_loss_per_char": 0.7298136949539185, "incorrect_loss_per_char": 0.6988582213719686, "correct_loss_per_token": 1.459627389907837, "incorrect_loss_per_token": 1.3977164427439372, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0747331380844116, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": true, "logits_per_token": -1.0747331380844116, "logits_per_char": -0.5373665690422058, "num_chars": 2}, {"sum_logits": -1.5351574420928955, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.5351574420928955, "logits_per_char": -0.7675787210464478, "num_chars": 2}, {"sum_logits": -1.459627389907837, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.459627389907837, "logits_per_char": -0.7298136949539185, "num_chars": 2}, {"sum_logits": -1.5832587480545044, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.5832587480545044, "logits_per_char": -0.7916293740272522, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 434, "native_id": "57", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.552246332168579, "incorrect_loss_raw": 1.3526577552159627, "correct_loss_per_char": 0.7761231660842896, "incorrect_loss_per_char": 0.6763288776079813, "correct_loss_per_token": 1.552246332168579, "incorrect_loss_per_token": 1.3526577552159627, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2317532300949097, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.2317532300949097, "logits_per_char": -0.6158766150474548, "num_chars": 2}, {"sum_logits": -1.552246332168579, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.552246332168579, "logits_per_char": -0.7761231660842896, "num_chars": 2}, {"sum_logits": -1.4617919921875, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4617919921875, "logits_per_char": -0.73089599609375, "num_chars": 2}, {"sum_logits": -1.3644280433654785, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.3644280433654785, "logits_per_char": -0.6822140216827393, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 435, "native_id": "605", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.332920789718628, "incorrect_loss_raw": 1.4204320112864177, "correct_loss_per_char": 0.666460394859314, "incorrect_loss_per_char": 0.7102160056432089, "correct_loss_per_token": 1.332920789718628, "incorrect_loss_per_token": 1.4204320112864177, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2838337421417236, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.2838337421417236, "logits_per_char": -0.6419168710708618, "num_chars": 2}, {"sum_logits": -1.4162214994430542, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4162214994430542, "logits_per_char": -0.7081107497215271, "num_chars": 2}, {"sum_logits": -1.332920789718628, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.332920789718628, "logits_per_char": -0.666460394859314, "num_chars": 2}, {"sum_logits": -1.561240792274475, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.561240792274475, "logits_per_char": -0.7806203961372375, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 436, "native_id": "9-889", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2324347496032715, "incorrect_loss_raw": 1.4526703755060832, "correct_loss_per_char": 0.6162173748016357, "incorrect_loss_per_char": 0.7263351877530416, "correct_loss_per_token": 1.2324347496032715, "incorrect_loss_per_token": 1.4526703755060832, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2324347496032715, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.2324347496032715, "logits_per_char": -0.6162173748016357, "num_chars": 2}, {"sum_logits": -1.4621198177337646, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4621198177337646, "logits_per_char": -0.7310599088668823, "num_chars": 2}, {"sum_logits": -1.4356659650802612, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4356659650802612, "logits_per_char": -0.7178329825401306, "num_chars": 2}, {"sum_logits": -1.4602253437042236, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4602253437042236, "logits_per_char": -0.7301126718521118, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 437, "native_id": "1890", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1488635540008545, "incorrect_loss_raw": 1.4888174931208293, "correct_loss_per_char": 0.5744317770004272, "incorrect_loss_per_char": 0.7444087465604147, "correct_loss_per_token": 1.1488635540008545, "incorrect_loss_per_token": 1.4888174931208293, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1488635540008545, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": true, "logits_per_token": -1.1488635540008545, "logits_per_char": -0.5744317770004272, "num_chars": 2}, {"sum_logits": -1.4260624647140503, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.4260624647140503, "logits_per_char": -0.7130312323570251, "num_chars": 2}, {"sum_logits": -1.5127110481262207, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.5127110481262207, "logits_per_char": -0.7563555240631104, "num_chars": 2}, {"sum_logits": -1.5276789665222168, "num_tokens": 1, "num_tokens_all": 271, "is_greedy": false, "logits_per_token": -1.5276789665222168, "logits_per_char": -0.7638394832611084, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 438, "native_id": "9-618", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4030178785324097, "incorrect_loss_raw": 1.3904738823572795, "correct_loss_per_char": 0.7015089392662048, "incorrect_loss_per_char": 0.6952369411786398, "correct_loss_per_token": 1.4030178785324097, "incorrect_loss_per_token": 1.3904738823572795, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4030178785324097, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4030178785324097, "logits_per_char": -0.7015089392662048, "num_chars": 2}, {"sum_logits": -1.4347964525222778, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.4347964525222778, "logits_per_char": -0.7173982262611389, "num_chars": 2}, {"sum_logits": -1.3911428451538086, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": false, "logits_per_token": -1.3911428451538086, "logits_per_char": -0.6955714225769043, "num_chars": 2}, {"sum_logits": -1.345482349395752, "num_tokens": 1, "num_tokens_all": 259, "is_greedy": true, "logits_per_token": -1.345482349395752, "logits_per_char": -0.672741174697876, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 439, "native_id": "9-523", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0511257648468018, "incorrect_loss_raw": 1.543772300084432, "correct_loss_per_char": 0.5255628824234009, "incorrect_loss_per_char": 0.771886150042216, "correct_loss_per_token": 1.0511257648468018, "incorrect_loss_per_token": 1.543772300084432, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0511257648468018, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": true, "logits_per_token": -1.0511257648468018, "logits_per_char": -0.5255628824234009, "num_chars": 2}, {"sum_logits": -1.5677052736282349, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.5677052736282349, "logits_per_char": -0.7838526368141174, "num_chars": 2}, {"sum_logits": -1.420933723449707, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.420933723449707, "logits_per_char": -0.7104668617248535, "num_chars": 2}, {"sum_logits": -1.642677903175354, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.642677903175354, "logits_per_char": -0.821338951587677, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 440, "native_id": "1126", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4918694496154785, "incorrect_loss_raw": 1.372144341468811, "correct_loss_per_char": 0.7459347248077393, "incorrect_loss_per_char": 0.6860721707344055, "correct_loss_per_token": 1.4918694496154785, "incorrect_loss_per_token": 1.372144341468811, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.161730408668518, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": true, "logits_per_token": -1.161730408668518, "logits_per_char": -0.580865204334259, "num_chars": 2}, {"sum_logits": -1.4736801385879517, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.4736801385879517, "logits_per_char": -0.7368400692939758, "num_chars": 2}, {"sum_logits": -1.4918694496154785, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.4918694496154785, "logits_per_char": -0.7459347248077393, "num_chars": 2}, {"sum_logits": -1.4810224771499634, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.4810224771499634, "logits_per_char": -0.7405112385749817, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 441, "native_id": "644", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.386992335319519, "incorrect_loss_raw": 1.4081042607625325, "correct_loss_per_char": 0.6934961676597595, "incorrect_loss_per_char": 0.7040521303812662, "correct_loss_per_token": 1.386992335319519, "incorrect_loss_per_token": 1.4081042607625325, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2031974792480469, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": true, "logits_per_token": -1.2031974792480469, "logits_per_char": -0.6015987396240234, "num_chars": 2}, {"sum_logits": -1.4172276258468628, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.4172276258468628, "logits_per_char": -0.7086138129234314, "num_chars": 2}, {"sum_logits": -1.386992335319519, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.386992335319519, "logits_per_char": -0.6934961676597595, "num_chars": 2}, {"sum_logits": -1.603887677192688, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.603887677192688, "logits_per_char": -0.801943838596344, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 442, "native_id": "8-365", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2744512557983398, "incorrect_loss_raw": 1.440656582514445, "correct_loss_per_char": 0.6372256278991699, "incorrect_loss_per_char": 0.7203282912572225, "correct_loss_per_token": 1.2744512557983398, "incorrect_loss_per_token": 1.440656582514445, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2744512557983398, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": true, "logits_per_token": -1.2744512557983398, "logits_per_char": -0.6372256278991699, "num_chars": 2}, {"sum_logits": -1.312980055809021, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.312980055809021, "logits_per_char": -0.6564900279045105, "num_chars": 2}, {"sum_logits": -1.5453165769577026, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.5453165769577026, "logits_per_char": -0.7726582884788513, "num_chars": 2}, {"sum_logits": -1.4636731147766113, "num_tokens": 1, "num_tokens_all": 283, "is_greedy": false, "logits_per_token": -1.4636731147766113, "logits_per_char": -0.7318365573883057, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 443, "native_id": "9-727", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.552451252937317, "incorrect_loss_raw": 1.358443816502889, "correct_loss_per_char": 0.7762256264686584, "incorrect_loss_per_char": 0.6792219082514445, "correct_loss_per_token": 1.552451252937317, "incorrect_loss_per_token": 1.358443816502889, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.134385585784912, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": true, "logits_per_token": -1.134385585784912, "logits_per_char": -0.567192792892456, "num_chars": 2}, {"sum_logits": -1.4712532758712769, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4712532758712769, "logits_per_char": -0.7356266379356384, "num_chars": 2}, {"sum_logits": -1.469692587852478, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.469692587852478, "logits_per_char": -0.734846293926239, "num_chars": 2}, {"sum_logits": -1.552451252937317, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.552451252937317, "logits_per_char": -0.7762256264686584, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 444, "native_id": "7-461", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.528589129447937, "incorrect_loss_raw": 1.3549874226252239, "correct_loss_per_char": 0.7642945647239685, "incorrect_loss_per_char": 0.6774937113126119, "correct_loss_per_token": 1.528589129447937, "incorrect_loss_per_token": 1.3549874226252239, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2867250442504883, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": true, "logits_per_token": -1.2867250442504883, "logits_per_char": -0.6433625221252441, "num_chars": 2}, {"sum_logits": -1.4461359977722168, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.4461359977722168, "logits_per_char": -0.7230679988861084, "num_chars": 2}, {"sum_logits": -1.3321012258529663, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.3321012258529663, "logits_per_char": -0.6660506129264832, "num_chars": 2}, {"sum_logits": -1.528589129447937, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.528589129447937, "logits_per_char": -0.7642945647239685, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 445, "native_id": "9-1071", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4784141778945923, "incorrect_loss_raw": 1.3689762353897095, "correct_loss_per_char": 0.7392070889472961, "incorrect_loss_per_char": 0.6844881176948547, "correct_loss_per_token": 1.4784141778945923, "incorrect_loss_per_token": 1.3689762353897095, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2922359704971313, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": true, "logits_per_token": -1.2922359704971313, "logits_per_char": -0.6461179852485657, "num_chars": 2}, {"sum_logits": -1.4784141778945923, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4784141778945923, "logits_per_char": -0.7392070889472961, "num_chars": 2}, {"sum_logits": -1.4336096048355103, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4336096048355103, "logits_per_char": -0.7168048024177551, "num_chars": 2}, {"sum_logits": -1.3810831308364868, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.3810831308364868, "logits_per_char": -0.6905415654182434, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 446, "native_id": "1918", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3895363807678223, "incorrect_loss_raw": 1.409959316253662, "correct_loss_per_char": 0.6947681903839111, "incorrect_loss_per_char": 0.704979658126831, "correct_loss_per_token": 1.3895363807678223, "incorrect_loss_per_token": 1.409959316253662, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1797313690185547, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": true, "logits_per_token": -1.1797313690185547, "logits_per_char": -0.5898656845092773, "num_chars": 2}, {"sum_logits": -1.3895363807678223, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.3895363807678223, "logits_per_char": -0.6947681903839111, "num_chars": 2}, {"sum_logits": -1.4157633781433105, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.4157633781433105, "logits_per_char": -0.7078816890716553, "num_chars": 2}, {"sum_logits": -1.634383201599121, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.634383201599121, "logits_per_char": -0.8171916007995605, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 447, "native_id": "1038", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6300245523452759, "incorrect_loss_raw": 1.3333319425582886, "correct_loss_per_char": 0.8150122761726379, "incorrect_loss_per_char": 0.6666659712791443, "correct_loss_per_token": 1.6300245523452759, "incorrect_loss_per_token": 1.3333319425582886, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1289184093475342, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.1289184093475342, "logits_per_char": -0.5644592046737671, "num_chars": 2}, {"sum_logits": -1.4198236465454102, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4198236465454102, "logits_per_char": -0.7099118232727051, "num_chars": 2}, {"sum_logits": -1.4512537717819214, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4512537717819214, "logits_per_char": -0.7256268858909607, "num_chars": 2}, {"sum_logits": -1.6300245523452759, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.6300245523452759, "logits_per_char": -0.8150122761726379, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 448, "native_id": "9-197", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4020785093307495, "incorrect_loss_raw": 1.3946171601613362, "correct_loss_per_char": 0.7010392546653748, "incorrect_loss_per_char": 0.6973085800806681, "correct_loss_per_token": 1.4020785093307495, "incorrect_loss_per_token": 1.3946171601613362, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3000737428665161, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.3000737428665161, "logits_per_char": -0.6500368714332581, "num_chars": 2}, {"sum_logits": -1.46429443359375, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.46429443359375, "logits_per_char": -0.732147216796875, "num_chars": 2}, {"sum_logits": -1.4194833040237427, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4194833040237427, "logits_per_char": -0.7097416520118713, "num_chars": 2}, {"sum_logits": -1.4020785093307495, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4020785093307495, "logits_per_char": -0.7010392546653748, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 449, "native_id": "1393", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5467160940170288, "incorrect_loss_raw": 1.3458525737126668, "correct_loss_per_char": 0.7733580470085144, "incorrect_loss_per_char": 0.6729262868563334, "correct_loss_per_token": 1.5467160940170288, "incorrect_loss_per_token": 1.3458525737126668, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2719682455062866, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": true, "logits_per_token": -1.2719682455062866, "logits_per_char": -0.6359841227531433, "num_chars": 2}, {"sum_logits": -1.5467160940170288, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.5467160940170288, "logits_per_char": -0.7733580470085144, "num_chars": 2}, {"sum_logits": -1.3858468532562256, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.3858468532562256, "logits_per_char": -0.6929234266281128, "num_chars": 2}, {"sum_logits": -1.3797426223754883, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.3797426223754883, "logits_per_char": -0.6898713111877441, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 450, "native_id": "7-244", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.175149917602539, "incorrect_loss_raw": 1.48818043867747, "correct_loss_per_char": 0.5875749588012695, "incorrect_loss_per_char": 0.744090219338735, "correct_loss_per_token": 1.175149917602539, "incorrect_loss_per_token": 1.48818043867747, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.175149917602539, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": true, "logits_per_token": -1.175149917602539, "logits_per_char": -0.5875749588012695, "num_chars": 2}, {"sum_logits": -1.3909807205200195, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.3909807205200195, "logits_per_char": -0.6954903602600098, "num_chars": 2}, {"sum_logits": -1.359611988067627, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.359611988067627, "logits_per_char": -0.6798059940338135, "num_chars": 2}, {"sum_logits": -1.7139486074447632, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.7139486074447632, "logits_per_char": -0.8569743037223816, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 451, "native_id": "9-916", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4490302801132202, "incorrect_loss_raw": 1.3877615928649902, "correct_loss_per_char": 0.7245151400566101, "incorrect_loss_per_char": 0.6938807964324951, "correct_loss_per_token": 1.4490302801132202, "incorrect_loss_per_token": 1.3877615928649902, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.160628318786621, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": true, "logits_per_token": -1.160628318786621, "logits_per_char": -0.5803141593933105, "num_chars": 2}, {"sum_logits": -1.4490302801132202, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.4490302801132202, "logits_per_char": -0.7245151400566101, "num_chars": 2}, {"sum_logits": -1.49138605594635, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.49138605594635, "logits_per_char": -0.745693027973175, "num_chars": 2}, {"sum_logits": -1.5112704038619995, "num_tokens": 1, "num_tokens_all": 277, "is_greedy": false, "logits_per_token": -1.5112704038619995, "logits_per_char": -0.7556352019309998, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 452, "native_id": "9-1046", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3014521598815918, "incorrect_loss_raw": 1.4310905933380127, "correct_loss_per_char": 0.6507260799407959, "incorrect_loss_per_char": 0.7155452966690063, "correct_loss_per_token": 1.3014521598815918, "incorrect_loss_per_token": 1.4310905933380127, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3014521598815918, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.3014521598815918, "logits_per_char": -0.6507260799407959, "num_chars": 2}, {"sum_logits": -1.349339485168457, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.349339485168457, "logits_per_char": -0.6746697425842285, "num_chars": 2}, {"sum_logits": -1.4669175148010254, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4669175148010254, "logits_per_char": -0.7334587574005127, "num_chars": 2}, {"sum_logits": -1.4770147800445557, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.4770147800445557, "logits_per_char": -0.7385073900222778, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 453, "native_id": "167", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1872284412384033, "incorrect_loss_raw": 1.4765249093373616, "correct_loss_per_char": 0.5936142206192017, "incorrect_loss_per_char": 0.7382624546686808, "correct_loss_per_token": 1.1872284412384033, "incorrect_loss_per_token": 1.4765249093373616, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1872284412384033, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": true, "logits_per_token": -1.1872284412384033, "logits_per_char": -0.5936142206192017, "num_chars": 2}, {"sum_logits": -1.4887638092041016, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.4887638092041016, "logits_per_char": -0.7443819046020508, "num_chars": 2}, {"sum_logits": -1.385856032371521, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.385856032371521, "logits_per_char": -0.6929280161857605, "num_chars": 2}, {"sum_logits": -1.5549548864364624, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.5549548864364624, "logits_per_char": -0.7774774432182312, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 454, "native_id": "9-566", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4886181354522705, "incorrect_loss_raw": 1.3789668877919514, "correct_loss_per_char": 0.7443090677261353, "incorrect_loss_per_char": 0.6894834438959757, "correct_loss_per_token": 1.4886181354522705, "incorrect_loss_per_token": 1.3789668877919514, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1576560735702515, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.1576560735702515, "logits_per_char": -0.5788280367851257, "num_chars": 2}, {"sum_logits": -1.4886181354522705, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4886181354522705, "logits_per_char": -0.7443090677261353, "num_chars": 2}, {"sum_logits": -1.3950103521347046, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.3950103521347046, "logits_per_char": -0.6975051760673523, "num_chars": 2}, {"sum_logits": -1.5842342376708984, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.5842342376708984, "logits_per_char": -0.7921171188354492, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 455, "native_id": "8-28", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5147862434387207, "incorrect_loss_raw": 1.35321843624115, "correct_loss_per_char": 0.7573931217193604, "incorrect_loss_per_char": 0.676609218120575, "correct_loss_per_token": 1.5147862434387207, "incorrect_loss_per_token": 1.35321843624115, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.345818281173706, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.345818281173706, "logits_per_char": -0.672909140586853, "num_chars": 2}, {"sum_logits": -1.3696953058242798, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.3696953058242798, "logits_per_char": -0.6848476529121399, "num_chars": 2}, {"sum_logits": -1.3441417217254639, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": true, "logits_per_token": -1.3441417217254639, "logits_per_char": -0.6720708608627319, "num_chars": 2}, {"sum_logits": -1.5147862434387207, "num_tokens": 1, "num_tokens_all": 280, "is_greedy": false, "logits_per_token": -1.5147862434387207, "logits_per_char": -0.7573931217193604, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 456, "native_id": "7-179", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.452783226966858, "incorrect_loss_raw": 1.3864561716715496, "correct_loss_per_char": 0.726391613483429, "incorrect_loss_per_char": 0.6932280858357748, "correct_loss_per_token": 1.452783226966858, "incorrect_loss_per_token": 1.3864561716715496, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2314529418945312, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.2314529418945312, "logits_per_char": -0.6157264709472656, "num_chars": 2}, {"sum_logits": -1.452783226966858, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.452783226966858, "logits_per_char": -0.726391613483429, "num_chars": 2}, {"sum_logits": -1.3775079250335693, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.3775079250335693, "logits_per_char": -0.6887539625167847, "num_chars": 2}, {"sum_logits": -1.5504076480865479, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.5504076480865479, "logits_per_char": -0.7752038240432739, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 457, "native_id": "389", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4685516357421875, "incorrect_loss_raw": 1.3849302132924397, "correct_loss_per_char": 0.7342758178710938, "incorrect_loss_per_char": 0.6924651066462199, "correct_loss_per_token": 1.4685516357421875, "incorrect_loss_per_token": 1.3849302132924397, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1640055179595947, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": true, "logits_per_token": -1.1640055179595947, "logits_per_char": -0.5820027589797974, "num_chars": 2}, {"sum_logits": -1.4685516357421875, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.4685516357421875, "logits_per_char": -0.7342758178710938, "num_chars": 2}, {"sum_logits": -1.414268970489502, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.414268970489502, "logits_per_char": -0.707134485244751, "num_chars": 2}, {"sum_logits": -1.5765161514282227, "num_tokens": 1, "num_tokens_all": 250, "is_greedy": false, "logits_per_token": -1.5765161514282227, "logits_per_char": -0.7882580757141113, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 458, "native_id": "1528", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.368513822555542, "incorrect_loss_raw": 1.4090869426727295, "correct_loss_per_char": 0.684256911277771, "incorrect_loss_per_char": 0.7045434713363647, "correct_loss_per_token": 1.368513822555542, "incorrect_loss_per_token": 1.4090869426727295, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.222118854522705, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": true, "logits_per_token": -1.222118854522705, "logits_per_char": -0.6110594272613525, "num_chars": 2}, {"sum_logits": -1.5138721466064453, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.5138721466064453, "logits_per_char": -0.7569360733032227, "num_chars": 2}, {"sum_logits": -1.368513822555542, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.368513822555542, "logits_per_char": -0.684256911277771, "num_chars": 2}, {"sum_logits": -1.491269826889038, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.491269826889038, "logits_per_char": -0.745634913444519, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 459, "native_id": "1457", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6162580251693726, "incorrect_loss_raw": 1.3557482560475667, "correct_loss_per_char": 0.8081290125846863, "incorrect_loss_per_char": 0.6778741280237833, "correct_loss_per_token": 1.6162580251693726, "incorrect_loss_per_token": 1.3557482560475667, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0247523784637451, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.0247523784637451, "logits_per_char": -0.5123761892318726, "num_chars": 2}, {"sum_logits": -1.5555312633514404, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.5555312633514404, "logits_per_char": -0.7777656316757202, "num_chars": 2}, {"sum_logits": -1.4869611263275146, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4869611263275146, "logits_per_char": -0.7434805631637573, "num_chars": 2}, {"sum_logits": -1.6162580251693726, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.6162580251693726, "logits_per_char": -0.8081290125846863, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 460, "native_id": "1208", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4389464855194092, "incorrect_loss_raw": 1.3840765555699666, "correct_loss_per_char": 0.7194732427597046, "incorrect_loss_per_char": 0.6920382777849833, "correct_loss_per_token": 1.4389464855194092, "incorrect_loss_per_token": 1.3840765555699666, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2370548248291016, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": true, "logits_per_token": -1.2370548248291016, "logits_per_char": -0.6185274124145508, "num_chars": 2}, {"sum_logits": -1.4389464855194092, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.4389464855194092, "logits_per_char": -0.7194732427597046, "num_chars": 2}, {"sum_logits": -1.3731029033660889, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.3731029033660889, "logits_per_char": -0.6865514516830444, "num_chars": 2}, {"sum_logits": -1.5420719385147095, "num_tokens": 1, "num_tokens_all": 282, "is_greedy": false, "logits_per_token": -1.5420719385147095, "logits_per_char": -0.7710359692573547, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 461, "native_id": "1170", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3932756185531616, "incorrect_loss_raw": 1.4085253477096558, "correct_loss_per_char": 0.6966378092765808, "incorrect_loss_per_char": 0.7042626738548279, "correct_loss_per_token": 1.3932756185531616, "incorrect_loss_per_token": 1.4085253477096558, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1642802953720093, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": true, "logits_per_token": -1.1642802953720093, "logits_per_char": -0.5821401476860046, "num_chars": 2}, {"sum_logits": -1.55446195602417, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.55446195602417, "logits_per_char": -0.777230978012085, "num_chars": 2}, {"sum_logits": -1.3932756185531616, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.3932756185531616, "logits_per_char": -0.6966378092765808, "num_chars": 2}, {"sum_logits": -1.506833791732788, "num_tokens": 1, "num_tokens_all": 251, "is_greedy": false, "logits_per_token": -1.506833791732788, "logits_per_char": -0.753416895866394, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 462, "native_id": "8-409", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4442667961120605, "incorrect_loss_raw": 1.3950064579645793, "correct_loss_per_char": 0.7221333980560303, "incorrect_loss_per_char": 0.6975032289822897, "correct_loss_per_token": 1.4442667961120605, "incorrect_loss_per_token": 1.3950064579645793, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1409955024719238, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": true, "logits_per_token": -1.1409955024719238, "logits_per_char": -0.5704977512359619, "num_chars": 2}, {"sum_logits": -1.4365804195404053, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.4365804195404053, "logits_per_char": -0.7182902097702026, "num_chars": 2}, {"sum_logits": -1.4442667961120605, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.4442667961120605, "logits_per_char": -0.7221333980560303, "num_chars": 2}, {"sum_logits": -1.6074434518814087, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.6074434518814087, "logits_per_char": -0.8037217259407043, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 463, "native_id": "8-307", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0298877954483032, "incorrect_loss_raw": 1.554234266281128, "correct_loss_per_char": 0.5149438977241516, "incorrect_loss_per_char": 0.777117133140564, "correct_loss_per_token": 1.0298877954483032, "incorrect_loss_per_token": 1.554234266281128, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0298877954483032, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": true, "logits_per_token": -1.0298877954483032, "logits_per_char": -0.5149438977241516, "num_chars": 2}, {"sum_logits": -1.5007790327072144, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.5007790327072144, "logits_per_char": -0.7503895163536072, "num_chars": 2}, {"sum_logits": -1.4622399806976318, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.4622399806976318, "logits_per_char": -0.7311199903488159, "num_chars": 2}, {"sum_logits": -1.6996837854385376, "num_tokens": 1, "num_tokens_all": 257, "is_greedy": false, "logits_per_token": -1.6996837854385376, "logits_per_char": -0.8498418927192688, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 464, "native_id": "1948", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.468333125114441, "incorrect_loss_raw": 1.378515362739563, "correct_loss_per_char": 0.7341665625572205, "incorrect_loss_per_char": 0.6892576813697815, "correct_loss_per_token": 1.468333125114441, "incorrect_loss_per_token": 1.378515362739563, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2236610651016235, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": true, "logits_per_token": -1.2236610651016235, "logits_per_char": -0.6118305325508118, "num_chars": 2}, {"sum_logits": -1.351430892944336, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.351430892944336, "logits_per_char": -0.675715446472168, "num_chars": 2}, {"sum_logits": -1.468333125114441, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.468333125114441, "logits_per_char": -0.7341665625572205, "num_chars": 2}, {"sum_logits": -1.5604541301727295, "num_tokens": 1, "num_tokens_all": 260, "is_greedy": false, "logits_per_token": -1.5604541301727295, "logits_per_char": -0.7802270650863647, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 465, "native_id": "661", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4735801219940186, "incorrect_loss_raw": 1.3873918056488037, "correct_loss_per_char": 0.7367900609970093, "incorrect_loss_per_char": 0.6936959028244019, "correct_loss_per_token": 1.4735801219940186, "incorrect_loss_per_token": 1.3873918056488037, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1008336544036865, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": true, "logits_per_token": -1.1008336544036865, "logits_per_char": -0.5504168272018433, "num_chars": 2}, {"sum_logits": -1.5734930038452148, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.5734930038452148, "logits_per_char": -0.7867465019226074, "num_chars": 2}, {"sum_logits": -1.4735801219940186, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4735801219940186, "logits_per_char": -0.7367900609970093, "num_chars": 2}, {"sum_logits": -1.4878487586975098, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.4878487586975098, "logits_per_char": -0.7439243793487549, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 466, "native_id": "7-435", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4799275398254395, "incorrect_loss_raw": 1.4022492965062459, "correct_loss_per_char": 0.7399637699127197, "incorrect_loss_per_char": 0.7011246482531229, "correct_loss_per_token": 1.4799275398254395, "incorrect_loss_per_token": 1.4022492965062459, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.054225206375122, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": true, "logits_per_token": -1.054225206375122, "logits_per_char": -0.527112603187561, "num_chars": 2}, {"sum_logits": -1.4248090982437134, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.4248090982437134, "logits_per_char": -0.7124045491218567, "num_chars": 2}, {"sum_logits": -1.4799275398254395, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.4799275398254395, "logits_per_char": -0.7399637699127197, "num_chars": 2}, {"sum_logits": -1.7277135848999023, "num_tokens": 1, "num_tokens_all": 248, "is_greedy": false, "logits_per_token": -1.7277135848999023, "logits_per_char": -0.8638567924499512, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 467, "native_id": "8-332", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3479877710342407, "incorrect_loss_raw": 1.4211223125457764, "correct_loss_per_char": 0.6739938855171204, "incorrect_loss_per_char": 0.7105611562728882, "correct_loss_per_token": 1.3479877710342407, "incorrect_loss_per_token": 1.4211223125457764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2048338651657104, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": true, "logits_per_token": -1.2048338651657104, "logits_per_char": -0.6024169325828552, "num_chars": 2}, {"sum_logits": -1.5292134284973145, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.5292134284973145, "logits_per_char": -0.7646067142486572, "num_chars": 2}, {"sum_logits": -1.3479877710342407, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.3479877710342407, "logits_per_char": -0.6739938855171204, "num_chars": 2}, {"sum_logits": -1.5293196439743042, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.5293196439743042, "logits_per_char": -0.7646598219871521, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 468, "native_id": "948", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.507293462753296, "incorrect_loss_raw": 1.3595877488454182, "correct_loss_per_char": 0.753646731376648, "incorrect_loss_per_char": 0.6797938744227091, "correct_loss_per_token": 1.507293462753296, "incorrect_loss_per_token": 1.3595877488454182, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.267885446548462, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": true, "logits_per_token": -1.267885446548462, "logits_per_char": -0.633942723274231, "num_chars": 2}, {"sum_logits": -1.507293462753296, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.507293462753296, "logits_per_char": -0.753646731376648, "num_chars": 2}, {"sum_logits": -1.4577362537384033, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.4577362537384033, "logits_per_char": -0.7288681268692017, "num_chars": 2}, {"sum_logits": -1.3531415462493896, "num_tokens": 1, "num_tokens_all": 272, "is_greedy": false, "logits_per_token": -1.3531415462493896, "logits_per_char": -0.6765707731246948, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 469, "native_id": "381", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2593129873275757, "incorrect_loss_raw": 1.444184144337972, "correct_loss_per_char": 0.6296564936637878, "incorrect_loss_per_char": 0.722092072168986, "correct_loss_per_token": 1.2593129873275757, "incorrect_loss_per_token": 1.444184144337972, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2593129873275757, "num_tokens": 1, "num_tokens_all": 245, "is_greedy": true, "logits_per_token": -1.2593129873275757, "logits_per_char": -0.6296564936637878, "num_chars": 2}, {"sum_logits": -1.4400252103805542, "num_tokens": 1, "num_tokens_all": 245, "is_greedy": false, "logits_per_token": -1.4400252103805542, "logits_per_char": -0.7200126051902771, "num_chars": 2}, {"sum_logits": -1.3752459287643433, "num_tokens": 1, "num_tokens_all": 245, "is_greedy": false, "logits_per_token": -1.3752459287643433, "logits_per_char": -0.6876229643821716, "num_chars": 2}, {"sum_logits": -1.5172812938690186, "num_tokens": 1, "num_tokens_all": 245, "is_greedy": false, "logits_per_token": -1.5172812938690186, "logits_per_char": -0.7586406469345093, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 470, "native_id": "9-759", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5980846881866455, "incorrect_loss_raw": 1.3354010979334514, "correct_loss_per_char": 0.7990423440933228, "incorrect_loss_per_char": 0.6677005489667257, "correct_loss_per_token": 1.5980846881866455, "incorrect_loss_per_token": 1.3354010979334514, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2127275466918945, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.2127275466918945, "logits_per_char": -0.6063637733459473, "num_chars": 2}, {"sum_logits": -1.5980846881866455, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.5980846881866455, "logits_per_char": -0.7990423440933228, "num_chars": 2}, {"sum_logits": -1.3937712907791138, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.3937712907791138, "logits_per_char": -0.6968856453895569, "num_chars": 2}, {"sum_logits": -1.3997044563293457, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.3997044563293457, "logits_per_char": -0.6998522281646729, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 471, "native_id": "8-350", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5312550067901611, "incorrect_loss_raw": 1.3652999798456829, "correct_loss_per_char": 0.7656275033950806, "incorrect_loss_per_char": 0.6826499899228414, "correct_loss_per_token": 1.5312550067901611, "incorrect_loss_per_token": 1.3652999798456829, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2193082571029663, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": true, "logits_per_token": -1.2193082571029663, "logits_per_char": -0.6096541285514832, "num_chars": 2}, {"sum_logits": -1.5312550067901611, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.5312550067901611, "logits_per_char": -0.7656275033950806, "num_chars": 2}, {"sum_logits": -1.5618499517440796, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.5618499517440796, "logits_per_char": -0.7809249758720398, "num_chars": 2}, {"sum_logits": -1.3147417306900024, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.3147417306900024, "logits_per_char": -0.6573708653450012, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 472, "native_id": "7-727", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.471932053565979, "incorrect_loss_raw": 1.3790837526321411, "correct_loss_per_char": 0.7359660267829895, "incorrect_loss_per_char": 0.6895418763160706, "correct_loss_per_token": 1.471932053565979, "incorrect_loss_per_token": 1.3790837526321411, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2034391164779663, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": true, "logits_per_token": -1.2034391164779663, "logits_per_char": -0.6017195582389832, "num_chars": 2}, {"sum_logits": -1.381333351135254, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.381333351135254, "logits_per_char": -0.690666675567627, "num_chars": 2}, {"sum_logits": -1.471932053565979, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.471932053565979, "logits_per_char": -0.7359660267829895, "num_chars": 2}, {"sum_logits": -1.5524787902832031, "num_tokens": 1, "num_tokens_all": 258, "is_greedy": false, "logits_per_token": -1.5524787902832031, "logits_per_char": -0.7762393951416016, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 473, "native_id": "850", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4690028429031372, "incorrect_loss_raw": 1.372381289800008, "correct_loss_per_char": 0.7345014214515686, "incorrect_loss_per_char": 0.686190644900004, "correct_loss_per_token": 1.4690028429031372, "incorrect_loss_per_token": 1.372381289800008, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3301684856414795, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.3301684856414795, "logits_per_char": -0.6650842428207397, "num_chars": 2}, {"sum_logits": -1.4690028429031372, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.4690028429031372, "logits_per_char": -0.7345014214515686, "num_chars": 2}, {"sum_logits": -1.4903578758239746, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": false, "logits_per_token": -1.4903578758239746, "logits_per_char": -0.7451789379119873, "num_chars": 2}, {"sum_logits": -1.2966175079345703, "num_tokens": 1, "num_tokens_all": 274, "is_greedy": true, "logits_per_token": -1.2966175079345703, "logits_per_char": -0.6483087539672852, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 474, "native_id": "970", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.528412103652954, "incorrect_loss_raw": 1.361490527788798, "correct_loss_per_char": 0.764206051826477, "incorrect_loss_per_char": 0.680745263894399, "correct_loss_per_token": 1.528412103652954, "incorrect_loss_per_token": 1.361490527788798, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.156898856163025, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": true, "logits_per_token": -1.156898856163025, "logits_per_char": -0.5784494280815125, "num_chars": 2}, {"sum_logits": -1.4516024589538574, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.4516024589538574, "logits_per_char": -0.7258012294769287, "num_chars": 2}, {"sum_logits": -1.4759702682495117, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.4759702682495117, "logits_per_char": -0.7379851341247559, "num_chars": 2}, {"sum_logits": -1.528412103652954, "num_tokens": 1, "num_tokens_all": 261, "is_greedy": false, "logits_per_token": -1.528412103652954, "logits_per_char": -0.764206051826477, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 475, "native_id": "7-381", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6790457963943481, "incorrect_loss_raw": 1.3325653473536174, "correct_loss_per_char": 0.8395228981971741, "incorrect_loss_per_char": 0.6662826736768087, "correct_loss_per_token": 1.6790457963943481, "incorrect_loss_per_token": 1.3325653473536174, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.07132887840271, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": true, "logits_per_token": -1.07132887840271, "logits_per_char": -0.535664439201355, "num_chars": 2}, {"sum_logits": -1.476006269454956, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.476006269454956, "logits_per_char": -0.738003134727478, "num_chars": 2}, {"sum_logits": -1.450360894203186, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.450360894203186, "logits_per_char": -0.725180447101593, "num_chars": 2}, {"sum_logits": -1.6790457963943481, "num_tokens": 1, "num_tokens_all": 249, "is_greedy": false, "logits_per_token": -1.6790457963943481, "logits_per_char": -0.8395228981971741, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 476, "native_id": "9-436", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.505592942237854, "incorrect_loss_raw": 1.4064033031463623, "correct_loss_per_char": 0.752796471118927, "incorrect_loss_per_char": 0.7032016515731812, "correct_loss_per_token": 1.505592942237854, "incorrect_loss_per_token": 1.4064033031463623, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0115264654159546, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": true, "logits_per_token": -1.0115264654159546, "logits_per_char": -0.5057632327079773, "num_chars": 2}, {"sum_logits": -1.4352171421051025, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.4352171421051025, "logits_per_char": -0.7176085710525513, "num_chars": 2}, {"sum_logits": -1.505592942237854, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.505592942237854, "logits_per_char": -0.752796471118927, "num_chars": 2}, {"sum_logits": -1.7724663019180298, "num_tokens": 1, "num_tokens_all": 247, "is_greedy": false, "logits_per_token": -1.7724663019180298, "logits_per_char": -0.8862331509590149, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 477, "native_id": "9-411", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3468037843704224, "incorrect_loss_raw": 1.4167101383209229, "correct_loss_per_char": 0.6734018921852112, "incorrect_loss_per_char": 0.7083550691604614, "correct_loss_per_token": 1.3468037843704224, "incorrect_loss_per_token": 1.4167101383209229, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.23646879196167, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": true, "logits_per_token": -1.23646879196167, "logits_per_char": -0.618234395980835, "num_chars": 2}, {"sum_logits": -1.5372467041015625, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.5372467041015625, "logits_per_char": -0.7686233520507812, "num_chars": 2}, {"sum_logits": -1.3468037843704224, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.3468037843704224, "logits_per_char": -0.6734018921852112, "num_chars": 2}, {"sum_logits": -1.4764149188995361, "num_tokens": 1, "num_tokens_all": 262, "is_greedy": false, "logits_per_token": -1.4764149188995361, "logits_per_char": -0.7382074594497681, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 478, "native_id": "9-692", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5397790670394897, "incorrect_loss_raw": 1.3543989658355713, "correct_loss_per_char": 0.7698895335197449, "incorrect_loss_per_char": 0.6771994829177856, "correct_loss_per_token": 1.5397790670394897, "incorrect_loss_per_token": 1.3543989658355713, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2525092363357544, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.2525092363357544, "logits_per_char": -0.6262546181678772, "num_chars": 2}, {"sum_logits": -1.5397790670394897, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.5397790670394897, "logits_per_char": -0.7698895335197449, "num_chars": 2}, {"sum_logits": -1.364639163017273, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.364639163017273, "logits_per_char": -0.6823195815086365, "num_chars": 2}, {"sum_logits": -1.4460484981536865, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4460484981536865, "logits_per_char": -0.7230242490768433, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 479, "native_id": "1334", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.343956470489502, "incorrect_loss_raw": 1.407622257868449, "correct_loss_per_char": 0.671978235244751, "incorrect_loss_per_char": 0.7038111289342245, "correct_loss_per_token": 1.343956470489502, "incorrect_loss_per_token": 1.407622257868449, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.343956470489502, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.343956470489502, "logits_per_char": -0.671978235244751, "num_chars": 2}, {"sum_logits": -1.4485074281692505, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4485074281692505, "logits_per_char": -0.7242537140846252, "num_chars": 2}, {"sum_logits": -1.3892899751663208, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.3892899751663208, "logits_per_char": -0.6946449875831604, "num_chars": 2}, {"sum_logits": -1.3850693702697754, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.3850693702697754, "logits_per_char": -0.6925346851348877, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 480, "native_id": "9-1160", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.468855857849121, "incorrect_loss_raw": 1.3792244990666707, "correct_loss_per_char": 0.7344279289245605, "incorrect_loss_per_char": 0.6896122495333353, "correct_loss_per_token": 1.468855857849121, "incorrect_loss_per_token": 1.3792244990666707, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.219549536705017, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": true, "logits_per_token": -1.219549536705017, "logits_per_char": -0.6097747683525085, "num_chars": 2}, {"sum_logits": -1.4519124031066895, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.4519124031066895, "logits_per_char": -0.7259562015533447, "num_chars": 2}, {"sum_logits": -1.4662115573883057, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.4662115573883057, "logits_per_char": -0.7331057786941528, "num_chars": 2}, {"sum_logits": -1.468855857849121, "num_tokens": 1, "num_tokens_all": 276, "is_greedy": false, "logits_per_token": -1.468855857849121, "logits_per_char": -0.7344279289245605, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 481, "native_id": "9-89", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2312507629394531, "incorrect_loss_raw": 1.4518581628799438, "correct_loss_per_char": 0.6156253814697266, "incorrect_loss_per_char": 0.7259290814399719, "correct_loss_per_token": 1.2312507629394531, "incorrect_loss_per_token": 1.4518581628799438, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2312507629394531, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": true, "logits_per_token": -1.2312507629394531, "logits_per_char": -0.6156253814697266, "num_chars": 2}, {"sum_logits": -1.4143781661987305, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.4143781661987305, "logits_per_char": -0.7071890830993652, "num_chars": 2}, {"sum_logits": -1.4776066541671753, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.4776066541671753, "logits_per_char": -0.7388033270835876, "num_chars": 2}, {"sum_logits": -1.4635896682739258, "num_tokens": 1, "num_tokens_all": 268, "is_greedy": false, "logits_per_token": -1.4635896682739258, "logits_per_char": -0.7317948341369629, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 482, "native_id": "9-1034", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3977774381637573, "incorrect_loss_raw": 1.396561861038208, "correct_loss_per_char": 0.6988887190818787, "incorrect_loss_per_char": 0.698280930519104, "correct_loss_per_token": 1.3977774381637573, "incorrect_loss_per_token": 1.396561861038208, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3266209363937378, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": true, "logits_per_token": -1.3266209363937378, "logits_per_char": -0.6633104681968689, "num_chars": 2}, {"sum_logits": -1.4753034114837646, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.4753034114837646, "logits_per_char": -0.7376517057418823, "num_chars": 2}, {"sum_logits": -1.3877612352371216, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.3877612352371216, "logits_per_char": -0.6938806176185608, "num_chars": 2}, {"sum_logits": -1.3977774381637573, "num_tokens": 1, "num_tokens_all": 269, "is_greedy": false, "logits_per_token": -1.3977774381637573, "logits_per_char": -0.6988887190818787, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 483, "native_id": "8-293", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3592945337295532, "incorrect_loss_raw": 1.4023882150650024, "correct_loss_per_char": 0.6796472668647766, "incorrect_loss_per_char": 0.7011941075325012, "correct_loss_per_token": 1.3592945337295532, "incorrect_loss_per_token": 1.4023882150650024, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3577320575714111, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": true, "logits_per_token": -1.3577320575714111, "logits_per_char": -0.6788660287857056, "num_chars": 2}, {"sum_logits": -1.3592945337295532, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.3592945337295532, "logits_per_char": -0.6796472668647766, "num_chars": 2}, {"sum_logits": -1.4592519998550415, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.4592519998550415, "logits_per_char": -0.7296259999275208, "num_chars": 2}, {"sum_logits": -1.3901805877685547, "num_tokens": 1, "num_tokens_all": 295, "is_greedy": false, "logits_per_token": -1.3901805877685547, "logits_per_char": -0.6950902938842773, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 484, "native_id": "9-652", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.461535096168518, "incorrect_loss_raw": 1.378747860590617, "correct_loss_per_char": 0.730767548084259, "incorrect_loss_per_char": 0.6893739302953085, "correct_loss_per_token": 1.461535096168518, "incorrect_loss_per_token": 1.378747860590617, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2032153606414795, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": true, "logits_per_token": -1.2032153606414795, "logits_per_char": -0.6016076803207397, "num_chars": 2}, {"sum_logits": -1.3940485715866089, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.3940485715866089, "logits_per_char": -0.6970242857933044, "num_chars": 2}, {"sum_logits": -1.461535096168518, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.461535096168518, "logits_per_char": -0.730767548084259, "num_chars": 2}, {"sum_logits": -1.5389796495437622, "num_tokens": 1, "num_tokens_all": 263, "is_greedy": false, "logits_per_token": -1.5389796495437622, "logits_per_char": -0.7694898247718811, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 485, "native_id": "1391", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5259108543395996, "incorrect_loss_raw": 1.3652854363123577, "correct_loss_per_char": 0.7629554271697998, "incorrect_loss_per_char": 0.6826427181561788, "correct_loss_per_token": 1.5259108543395996, "incorrect_loss_per_token": 1.3652854363123577, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2332866191864014, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": true, "logits_per_token": -1.2332866191864014, "logits_per_char": -0.6166433095932007, "num_chars": 2}, {"sum_logits": -1.4312078952789307, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4312078952789307, "logits_per_char": -0.7156039476394653, "num_chars": 2}, {"sum_logits": -1.5259108543395996, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.5259108543395996, "logits_per_char": -0.7629554271697998, "num_chars": 2}, {"sum_logits": -1.4313617944717407, "num_tokens": 1, "num_tokens_all": 254, "is_greedy": false, "logits_per_token": -1.4313617944717407, "logits_per_char": -0.7156808972358704, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 486, "native_id": "9-948", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4347410202026367, "incorrect_loss_raw": 1.391456921895345, "correct_loss_per_char": 0.7173705101013184, "incorrect_loss_per_char": 0.6957284609476725, "correct_loss_per_token": 1.4347410202026367, "incorrect_loss_per_token": 1.391456921895345, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1905027627944946, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": true, "logits_per_token": -1.1905027627944946, "logits_per_char": -0.5952513813972473, "num_chars": 2}, {"sum_logits": -1.4347410202026367, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.4347410202026367, "logits_per_char": -0.7173705101013184, "num_chars": 2}, {"sum_logits": -1.4430301189422607, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.4430301189422607, "logits_per_char": -0.7215150594711304, "num_chars": 2}, {"sum_logits": -1.5408378839492798, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.5408378839492798, "logits_per_char": -0.7704189419746399, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 487, "native_id": "8-213", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5072505474090576, "incorrect_loss_raw": 1.3637172778447468, "correct_loss_per_char": 0.7536252737045288, "incorrect_loss_per_char": 0.6818586389223734, "correct_loss_per_token": 1.5072505474090576, "incorrect_loss_per_token": 1.3637172778447468, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2168405055999756, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": true, "logits_per_token": -1.2168405055999756, "logits_per_char": -0.6084202527999878, "num_chars": 2}, {"sum_logits": -1.4620471000671387, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4620471000671387, "logits_per_char": -0.7310235500335693, "num_chars": 2}, {"sum_logits": -1.4122642278671265, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.4122642278671265, "logits_per_char": -0.7061321139335632, "num_chars": 2}, {"sum_logits": -1.5072505474090576, "num_tokens": 1, "num_tokens_all": 256, "is_greedy": false, "logits_per_token": -1.5072505474090576, "logits_per_char": -0.7536252737045288, "num_chars": 2}], "label": 3, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 488, "native_id": "162", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5788475275039673, "incorrect_loss_raw": 1.3575125535329182, "correct_loss_per_char": 0.7894237637519836, "incorrect_loss_per_char": 0.6787562767664591, "correct_loss_per_token": 1.5788475275039673, "incorrect_loss_per_token": 1.3575125535329182, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1166114807128906, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": true, "logits_per_token": -1.1166114807128906, "logits_per_char": -0.5583057403564453, "num_chars": 2}, {"sum_logits": -1.5788475275039673, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.5788475275039673, "logits_per_char": -0.7894237637519836, "num_chars": 2}, {"sum_logits": -1.3791840076446533, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.3791840076446533, "logits_per_char": -0.6895920038223267, "num_chars": 2}, {"sum_logits": -1.576742172241211, "num_tokens": 1, "num_tokens_all": 253, "is_greedy": false, "logits_per_token": -1.576742172241211, "logits_per_char": -0.7883710861206055, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 489, "native_id": "1359", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4405450820922852, "incorrect_loss_raw": 1.3807088136672974, "correct_loss_per_char": 0.7202725410461426, "incorrect_loss_per_char": 0.6903544068336487, "correct_loss_per_token": 1.4405450820922852, "incorrect_loss_per_token": 1.3807088136672974, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.290155291557312, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.290155291557312, "logits_per_char": -0.645077645778656, "num_chars": 2}, {"sum_logits": -1.4405450820922852, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.4405450820922852, "logits_per_char": -0.7202725410461426, "num_chars": 2}, {"sum_logits": -1.5078240633010864, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5078240633010864, "logits_per_char": -0.7539120316505432, "num_chars": 2}, {"sum_logits": -1.3441470861434937, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.3441470861434937, "logits_per_char": -0.6720735430717468, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 490, "native_id": "9-743", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.195495843887329, "incorrect_loss_raw": 1.473975658416748, "correct_loss_per_char": 0.5977479219436646, "incorrect_loss_per_char": 0.736987829208374, "correct_loss_per_token": 1.195495843887329, "incorrect_loss_per_token": 1.473975658416748, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.195495843887329, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": true, "logits_per_token": -1.195495843887329, "logits_per_char": -0.5977479219436646, "num_chars": 2}, {"sum_logits": -1.4971243143081665, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.4971243143081665, "logits_per_char": -0.7485621571540833, "num_chars": 2}, {"sum_logits": -1.3686631917953491, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.3686631917953491, "logits_per_char": -0.6843315958976746, "num_chars": 2}, {"sum_logits": -1.5561394691467285, "num_tokens": 1, "num_tokens_all": 255, "is_greedy": false, "logits_per_token": -1.5561394691467285, "logits_per_char": -0.7780697345733643, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 491, "native_id": "9-645", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2068119049072266, "incorrect_loss_raw": 1.4631776809692383, "correct_loss_per_char": 0.6034059524536133, "incorrect_loss_per_char": 0.7315888404846191, "correct_loss_per_token": 1.2068119049072266, "incorrect_loss_per_token": 1.4631776809692383, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2068119049072266, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": true, "logits_per_token": -1.2068119049072266, "logits_per_char": -0.6034059524536133, "num_chars": 2}, {"sum_logits": -1.4039294719696045, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.4039294719696045, "logits_per_char": -0.7019647359848022, "num_chars": 2}, {"sum_logits": -1.4410295486450195, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.4410295486450195, "logits_per_char": -0.7205147743225098, "num_chars": 2}, {"sum_logits": -1.5445740222930908, "num_tokens": 1, "num_tokens_all": 267, "is_greedy": false, "logits_per_token": -1.5445740222930908, "logits_per_char": -0.7722870111465454, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 492, "native_id": "8-250", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.446921467781067, "incorrect_loss_raw": 1.3838454087575276, "correct_loss_per_char": 0.7234607338905334, "incorrect_loss_per_char": 0.6919227043787638, "correct_loss_per_token": 1.446921467781067, "incorrect_loss_per_token": 1.3838454087575276, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4934161901474, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4934161901474, "logits_per_char": -0.7467080950737, "num_chars": 2}, {"sum_logits": -1.4499013423919678, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.4499013423919678, "logits_per_char": -0.7249506711959839, "num_chars": 2}, {"sum_logits": -1.446921467781067, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": false, "logits_per_token": -1.446921467781067, "logits_per_char": -0.7234607338905334, "num_chars": 2}, {"sum_logits": -1.2082186937332153, "num_tokens": 1, "num_tokens_all": 281, "is_greedy": true, "logits_per_token": -1.2082186937332153, "logits_per_char": -0.6041093468666077, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 493, "native_id": "283", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5440802574157715, "incorrect_loss_raw": 1.3569796880086262, "correct_loss_per_char": 0.7720401287078857, "incorrect_loss_per_char": 0.6784898440043131, "correct_loss_per_token": 1.5440802574157715, "incorrect_loss_per_token": 1.3569796880086262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1742215156555176, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": true, "logits_per_token": -1.1742215156555176, "logits_per_char": -0.5871107578277588, "num_chars": 2}, {"sum_logits": -1.5215733051300049, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5215733051300049, "logits_per_char": -0.7607866525650024, "num_chars": 2}, {"sum_logits": -1.5440802574157715, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.5440802574157715, "logits_per_char": -0.7720401287078857, "num_chars": 2}, {"sum_logits": -1.3751442432403564, "num_tokens": 1, "num_tokens_all": 284, "is_greedy": false, "logits_per_token": -1.3751442432403564, "logits_per_char": -0.6875721216201782, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 494, "native_id": "8-183", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3173773288726807, "incorrect_loss_raw": 1.4197583595911663, "correct_loss_per_char": 0.6586886644363403, "incorrect_loss_per_char": 0.7098791797955831, "correct_loss_per_token": 1.3173773288726807, "incorrect_loss_per_token": 1.4197583595911663, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3173773288726807, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.3173773288726807, "logits_per_char": -0.6586886644363403, "num_chars": 2}, {"sum_logits": -1.3157373666763306, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": true, "logits_per_token": -1.3157373666763306, "logits_per_char": -0.6578686833381653, "num_chars": 2}, {"sum_logits": -1.4804565906524658, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4804565906524658, "logits_per_char": -0.7402282953262329, "num_chars": 2}, {"sum_logits": -1.4630811214447021, "num_tokens": 1, "num_tokens_all": 315, "is_greedy": false, "logits_per_token": -1.4630811214447021, "logits_per_char": -0.7315405607223511, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 495, "native_id": "9-284", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2832094430923462, "incorrect_loss_raw": 1.4324691692988079, "correct_loss_per_char": 0.6416047215461731, "incorrect_loss_per_char": 0.7162345846494039, "correct_loss_per_token": 1.2832094430923462, "incorrect_loss_per_token": 1.4324691692988079, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2832094430923462, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": true, "logits_per_token": -1.2832094430923462, "logits_per_char": -0.6416047215461731, "num_chars": 2}, {"sum_logits": -1.3703570365905762, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.3703570365905762, "logits_per_char": -0.6851785182952881, "num_chars": 2}, {"sum_logits": -1.512622356414795, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.512622356414795, "logits_per_char": -0.7563111782073975, "num_chars": 2}, {"sum_logits": -1.4144281148910522, "num_tokens": 1, "num_tokens_all": 270, "is_greedy": false, "logits_per_token": -1.4144281148910522, "logits_per_char": -0.7072140574455261, "num_chars": 2}], "label": 0, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 496, "native_id": "7-1186", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5093717575073242, "incorrect_loss_raw": 1.366945505142212, "correct_loss_per_char": 0.7546858787536621, "incorrect_loss_per_char": 0.683472752571106, "correct_loss_per_token": 1.5093717575073242, "incorrect_loss_per_token": 1.366945505142212, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1651090383529663, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": true, "logits_per_token": -1.1651090383529663, "logits_per_char": -0.5825545191764832, "num_chars": 2}, {"sum_logits": -1.4959197044372559, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4959197044372559, "logits_per_char": -0.7479598522186279, "num_chars": 2}, {"sum_logits": -1.5093717575073242, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.5093717575073242, "logits_per_char": -0.7546858787536621, "num_chars": 2}, {"sum_logits": -1.4398077726364136, "num_tokens": 1, "num_tokens_all": 266, "is_greedy": false, "logits_per_token": -1.4398077726364136, "logits_per_char": -0.7199038863182068, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 497, "native_id": "926", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5263367891311646, "incorrect_loss_raw": 1.3526829878489177, "correct_loss_per_char": 0.7631683945655823, "incorrect_loss_per_char": 0.6763414939244589, "correct_loss_per_token": 1.5263367891311646, "incorrect_loss_per_token": 1.3526829878489177, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2832492589950562, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": true, "logits_per_token": -1.2832492589950562, "logits_per_char": -0.6416246294975281, "num_chars": 2}, {"sum_logits": -1.3810484409332275, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.3810484409332275, "logits_per_char": -0.6905242204666138, "num_chars": 2}, {"sum_logits": -1.5263367891311646, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.5263367891311646, "logits_per_char": -0.7631683945655823, "num_chars": 2}, {"sum_logits": -1.3937512636184692, "num_tokens": 1, "num_tokens_all": 265, "is_greedy": false, "logits_per_token": -1.3937512636184692, "logits_per_char": -0.6968756318092346, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 498, "native_id": "7-519", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.426411509513855, "incorrect_loss_raw": 1.3910472393035889, "correct_loss_per_char": 0.7132057547569275, "incorrect_loss_per_char": 0.6955236196517944, "correct_loss_per_token": 1.426411509513855, "incorrect_loss_per_token": 1.3910472393035889, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2105824947357178, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": true, "logits_per_token": -1.2105824947357178, "logits_per_char": -0.6052912473678589, "num_chars": 2}, {"sum_logits": -1.426411509513855, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.426411509513855, "logits_per_char": -0.7132057547569275, "num_chars": 2}, {"sum_logits": -1.539154291152954, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.539154291152954, "logits_per_char": -0.769577145576477, "num_chars": 2}, {"sum_logits": -1.4234049320220947, "num_tokens": 1, "num_tokens_all": 273, "is_greedy": false, "logits_per_token": -1.4234049320220947, "logits_per_char": -0.7117024660110474, "num_chars": 2}], "label": 1, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 499, "native_id": "7-7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4347442388534546, "incorrect_loss_raw": 1.393375833829244, "correct_loss_per_char": 0.7173721194267273, "incorrect_loss_per_char": 0.696687916914622, "correct_loss_per_token": 1.4347442388534546, "incorrect_loss_per_token": 1.393375833829244, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.191692590713501, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": true, "logits_per_token": -1.191692590713501, "logits_per_char": -0.5958462953567505, "num_chars": 2}, {"sum_logits": -1.374725103378296, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.374725103378296, "logits_per_char": -0.687362551689148, "num_chars": 2}, {"sum_logits": -1.4347442388534546, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.4347442388534546, "logits_per_char": -0.7173721194267273, "num_chars": 2}, {"sum_logits": -1.613709807395935, "num_tokens": 1, "num_tokens_all": 252, "is_greedy": false, "logits_per_token": -1.613709807395935, "logits_per_char": -0.8068549036979675, "num_chars": 2}], "label": 2, "task_hash": "aec5918df9c1126cd5bd8e2000fae9f7", "model_hash": "03418cf8091a9882619950ffb07429a5"}