{"doc_id": 0, "native_id": 0, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2791002988815308, "incorrect_loss_raw": 1.4296733538309734, "correct_loss_per_char": 0.6395501494407654, "incorrect_loss_per_char": 0.7148366769154867, "correct_loss_per_token": 1.2791002988815308, "incorrect_loss_per_token": 1.4296733538309734, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4071874618530273, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.4071874618530273, "logits_per_char": -0.7035937309265137, "num_chars": 2}, {"sum_logits": -1.2791002988815308, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": true, "logits_per_token": -1.2791002988815308, "logits_per_char": -0.6395501494407654, "num_chars": 2}, {"sum_logits": -1.4789854288101196, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.4789854288101196, "logits_per_char": -0.7394927144050598, "num_chars": 2}, {"sum_logits": -1.402847170829773, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.402847170829773, "logits_per_char": -0.7014235854148865, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 1, "native_id": 1, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5454140901565552, "incorrect_loss_raw": 1.3480912446975708, "correct_loss_per_char": 0.7727070450782776, "incorrect_loss_per_char": 0.6740456223487854, "correct_loss_per_token": 1.5454140901565552, "incorrect_loss_per_token": 1.3480912446975708, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4646297693252563, "num_tokens": 1, "num_tokens_all": 480, "is_greedy": false, "logits_per_token": -1.4646297693252563, "logits_per_char": -0.7323148846626282, "num_chars": 2}, {"sum_logits": -1.3460662364959717, "num_tokens": 1, "num_tokens_all": 480, "is_greedy": false, "logits_per_token": -1.3460662364959717, "logits_per_char": -0.6730331182479858, "num_chars": 2}, {"sum_logits": -1.5454140901565552, "num_tokens": 1, "num_tokens_all": 480, "is_greedy": false, "logits_per_token": -1.5454140901565552, "logits_per_char": -0.7727070450782776, "num_chars": 2}, {"sum_logits": -1.2335777282714844, "num_tokens": 1, "num_tokens_all": 480, "is_greedy": true, "logits_per_token": -1.2335777282714844, "logits_per_char": -0.6167888641357422, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 2, "native_id": 2, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4875355958938599, "incorrect_loss_raw": 1.3638638655344646, "correct_loss_per_char": 0.7437677979469299, "incorrect_loss_per_char": 0.6819319327672323, "correct_loss_per_token": 1.4875355958938599, "incorrect_loss_per_token": 1.3638638655344646, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3286280632019043, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.3286280632019043, "logits_per_char": -0.6643140316009521, "num_chars": 2}, {"sum_logits": -1.3081437349319458, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": true, "logits_per_token": -1.3081437349319458, "logits_per_char": -0.6540718674659729, "num_chars": 2}, {"sum_logits": -1.4875355958938599, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.4875355958938599, "logits_per_char": -0.7437677979469299, "num_chars": 2}, {"sum_logits": -1.4548197984695435, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.4548197984695435, "logits_per_char": -0.7274098992347717, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 3, "native_id": 3, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3903602361679077, "incorrect_loss_raw": 1.395260214805603, "correct_loss_per_char": 0.6951801180839539, "incorrect_loss_per_char": 0.6976301074028015, "correct_loss_per_token": 1.3903602361679077, "incorrect_loss_per_token": 1.395260214805603, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3903602361679077, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.3903602361679077, "logits_per_char": -0.6951801180839539, "num_chars": 2}, {"sum_logits": -1.307786226272583, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": true, "logits_per_token": -1.307786226272583, "logits_per_char": -0.6538931131362915, "num_chars": 2}, {"sum_logits": -1.5322271585464478, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.5322271585464478, "logits_per_char": -0.7661135792732239, "num_chars": 2}, {"sum_logits": -1.3457672595977783, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.3457672595977783, "logits_per_char": -0.6728836297988892, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 4, "native_id": 4, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2953962087631226, "incorrect_loss_raw": 1.4345641533533733, "correct_loss_per_char": 0.6476981043815613, "incorrect_loss_per_char": 0.7172820766766866, "correct_loss_per_token": 1.2953962087631226, "incorrect_loss_per_token": 1.4345641533533733, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5721828937530518, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.5721828937530518, "logits_per_char": -0.7860914468765259, "num_chars": 2}, {"sum_logits": -1.2953962087631226, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.2953962087631226, "logits_per_char": -0.6476981043815613, "num_chars": 2}, {"sum_logits": -1.480890154838562, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.480890154838562, "logits_per_char": -0.740445077419281, "num_chars": 2}, {"sum_logits": -1.2506194114685059, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": true, "logits_per_token": -1.2506194114685059, "logits_per_char": -0.6253097057342529, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 5, "native_id": 5, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4279288053512573, "incorrect_loss_raw": 1.384389877319336, "correct_loss_per_char": 0.7139644026756287, "incorrect_loss_per_char": 0.692194938659668, "correct_loss_per_token": 1.4279288053512573, "incorrect_loss_per_token": 1.384389877319336, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5313677787780762, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": false, "logits_per_token": -1.5313677787780762, "logits_per_char": -0.7656838893890381, "num_chars": 2}, {"sum_logits": -1.3337334394454956, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": false, "logits_per_token": -1.3337334394454956, "logits_per_char": -0.6668667197227478, "num_chars": 2}, {"sum_logits": -1.4279288053512573, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": false, "logits_per_token": -1.4279288053512573, "logits_per_char": -0.7139644026756287, "num_chars": 2}, {"sum_logits": -1.288068413734436, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": true, "logits_per_token": -1.288068413734436, "logits_per_char": -0.644034206867218, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 6, "native_id": 6, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.268392562866211, "incorrect_loss_raw": 1.4366327126820881, "correct_loss_per_char": 0.6341962814331055, "incorrect_loss_per_char": 0.7183163563410441, "correct_loss_per_token": 1.268392562866211, "incorrect_loss_per_token": 1.4366327126820881, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5164686441421509, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.5164686441421509, "logits_per_char": -0.7582343220710754, "num_chars": 2}, {"sum_logits": -1.344135046005249, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.344135046005249, "logits_per_char": -0.6720675230026245, "num_chars": 2}, {"sum_logits": -1.4492944478988647, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.4492944478988647, "logits_per_char": -0.7246472239494324, "num_chars": 2}, {"sum_logits": -1.268392562866211, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": true, "logits_per_token": -1.268392562866211, "logits_per_char": -0.6341962814331055, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 7, "native_id": 7, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5935741662979126, "incorrect_loss_raw": 1.336811939875285, "correct_loss_per_char": 0.7967870831489563, "incorrect_loss_per_char": 0.6684059699376425, "correct_loss_per_token": 1.5935741662979126, "incorrect_loss_per_token": 1.336811939875285, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5935741662979126, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": false, "logits_per_token": -1.5935741662979126, "logits_per_char": -0.7967870831489563, "num_chars": 2}, {"sum_logits": -1.2870465517044067, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": false, "logits_per_token": -1.2870465517044067, "logits_per_char": -0.6435232758522034, "num_chars": 2}, {"sum_logits": -1.4888203144073486, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": false, "logits_per_token": -1.4888203144073486, "logits_per_char": -0.7444101572036743, "num_chars": 2}, {"sum_logits": -1.2345689535140991, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": true, "logits_per_token": -1.2345689535140991, "logits_per_char": -0.6172844767570496, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 8, "native_id": 8, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4873530864715576, "incorrect_loss_raw": 1.3678174416224163, "correct_loss_per_char": 0.7436765432357788, "incorrect_loss_per_char": 0.6839087208112081, "correct_loss_per_token": 1.4873530864715576, "incorrect_loss_per_token": 1.3678174416224163, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4873530864715576, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": false, "logits_per_token": -1.4873530864715576, "logits_per_char": -0.7436765432357788, "num_chars": 2}, {"sum_logits": -1.3198423385620117, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": false, "logits_per_token": -1.3198423385620117, "logits_per_char": -0.6599211692810059, "num_chars": 2}, {"sum_logits": -1.5206279754638672, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": false, "logits_per_token": -1.5206279754638672, "logits_per_char": -0.7603139877319336, "num_chars": 2}, {"sum_logits": -1.2629820108413696, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": true, "logits_per_token": -1.2629820108413696, "logits_per_char": -0.6314910054206848, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 9, "native_id": 9, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2760896682739258, "incorrect_loss_raw": 1.4396508534749348, "correct_loss_per_char": 0.6380448341369629, "incorrect_loss_per_char": 0.7198254267374674, "correct_loss_per_token": 1.2760896682739258, "incorrect_loss_per_token": 1.4396508534749348, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5205377340316772, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.5205377340316772, "logits_per_char": -0.7602688670158386, "num_chars": 2}, {"sum_logits": -1.2746567726135254, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": true, "logits_per_token": -1.2746567726135254, "logits_per_char": -0.6373283863067627, "num_chars": 2}, {"sum_logits": -1.523758053779602, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.523758053779602, "logits_per_char": -0.761879026889801, "num_chars": 2}, {"sum_logits": -1.2760896682739258, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.2760896682739258, "logits_per_char": -0.6380448341369629, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 10, "native_id": 10, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3453590869903564, "incorrect_loss_raw": 1.4090227683385212, "correct_loss_per_char": 0.6726795434951782, "incorrect_loss_per_char": 0.7045113841692606, "correct_loss_per_token": 1.3453590869903564, "incorrect_loss_per_token": 1.4090227683385212, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.416566014289856, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": false, "logits_per_token": -1.416566014289856, "logits_per_char": -0.708283007144928, "num_chars": 2}, {"sum_logits": -1.3298816680908203, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": true, "logits_per_token": -1.3298816680908203, "logits_per_char": -0.6649408340454102, "num_chars": 2}, {"sum_logits": -1.4806206226348877, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": false, "logits_per_token": -1.4806206226348877, "logits_per_char": -0.7403103113174438, "num_chars": 2}, {"sum_logits": -1.3453590869903564, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": false, "logits_per_token": -1.3453590869903564, "logits_per_char": -0.6726795434951782, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 11, "native_id": 11, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.262722373008728, "incorrect_loss_raw": 1.4388370513916016, "correct_loss_per_char": 0.631361186504364, "incorrect_loss_per_char": 0.7194185256958008, "correct_loss_per_token": 1.262722373008728, "incorrect_loss_per_token": 1.4388370513916016, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5428391695022583, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.5428391695022583, "logits_per_char": -0.7714195847511292, "num_chars": 2}, {"sum_logits": -1.342387080192566, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.342387080192566, "logits_per_char": -0.671193540096283, "num_chars": 2}, {"sum_logits": -1.4312849044799805, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.4312849044799805, "logits_per_char": -0.7156424522399902, "num_chars": 2}, {"sum_logits": -1.262722373008728, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": true, "logits_per_token": -1.262722373008728, "logits_per_char": -0.631361186504364, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 12, "native_id": 12, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3690965175628662, "incorrect_loss_raw": 1.4060137669245403, "correct_loss_per_char": 0.6845482587814331, "incorrect_loss_per_char": 0.7030068834622701, "correct_loss_per_token": 1.3690965175628662, "incorrect_loss_per_token": 1.4060137669245403, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.417660117149353, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.417660117149353, "logits_per_char": -0.7088300585746765, "num_chars": 2}, {"sum_logits": -1.272972822189331, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": true, "logits_per_token": -1.272972822189331, "logits_per_char": -0.6364864110946655, "num_chars": 2}, {"sum_logits": -1.5274083614349365, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.5274083614349365, "logits_per_char": -0.7637041807174683, "num_chars": 2}, {"sum_logits": -1.3690965175628662, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.3690965175628662, "logits_per_char": -0.6845482587814331, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 13, "native_id": 13, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.238163948059082, "incorrect_loss_raw": 1.4541021188100178, "correct_loss_per_char": 0.619081974029541, "incorrect_loss_per_char": 0.7270510594050089, "correct_loss_per_token": 1.238163948059082, "incorrect_loss_per_token": 1.4541021188100178, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5210652351379395, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.5210652351379395, "logits_per_char": -0.7605326175689697, "num_chars": 2}, {"sum_logits": -1.238163948059082, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": true, "logits_per_token": -1.238163948059082, "logits_per_char": -0.619081974029541, "num_chars": 2}, {"sum_logits": -1.54954195022583, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.54954195022583, "logits_per_char": -0.774770975112915, "num_chars": 2}, {"sum_logits": -1.2916991710662842, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.2916991710662842, "logits_per_char": -0.6458495855331421, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 14, "native_id": 14, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2896043062210083, "incorrect_loss_raw": 1.435489575068156, "correct_loss_per_char": 0.6448021531105042, "incorrect_loss_per_char": 0.717744787534078, "correct_loss_per_token": 1.2896043062210083, "incorrect_loss_per_token": 1.435489575068156, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2896043062210083, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": false, "logits_per_token": -1.2896043062210083, "logits_per_char": -0.6448021531105042, "num_chars": 2}, {"sum_logits": -1.2830166816711426, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": true, "logits_per_token": -1.2830166816711426, "logits_per_char": -0.6415083408355713, "num_chars": 2}, {"sum_logits": -1.5211220979690552, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": false, "logits_per_token": -1.5211220979690552, "logits_per_char": -0.7605610489845276, "num_chars": 2}, {"sum_logits": -1.50232994556427, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": false, "logits_per_token": -1.50232994556427, "logits_per_char": -0.751164972782135, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 15, "native_id": 15, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4196279048919678, "incorrect_loss_raw": 1.38624107837677, "correct_loss_per_char": 0.7098139524459839, "incorrect_loss_per_char": 0.693120539188385, "correct_loss_per_token": 1.4196279048919678, "incorrect_loss_per_token": 1.38624107837677, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3589565753936768, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.3589565753936768, "logits_per_char": -0.6794782876968384, "num_chars": 2}, {"sum_logits": -1.3582451343536377, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": true, "logits_per_token": -1.3582451343536377, "logits_per_char": -0.6791225671768188, "num_chars": 2}, {"sum_logits": -1.4415215253829956, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.4415215253829956, "logits_per_char": -0.7207607626914978, "num_chars": 2}, {"sum_logits": -1.4196279048919678, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.4196279048919678, "logits_per_char": -0.7098139524459839, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 16, "native_id": 16, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2953962087631226, "incorrect_loss_raw": 1.4345641533533733, "correct_loss_per_char": 0.6476981043815613, "incorrect_loss_per_char": 0.7172820766766866, "correct_loss_per_token": 1.2953962087631226, "incorrect_loss_per_token": 1.4345641533533733, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5721828937530518, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.5721828937530518, "logits_per_char": -0.7860914468765259, "num_chars": 2}, {"sum_logits": -1.2953962087631226, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.2953962087631226, "logits_per_char": -0.6476981043815613, "num_chars": 2}, {"sum_logits": -1.480890154838562, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.480890154838562, "logits_per_char": -0.740445077419281, "num_chars": 2}, {"sum_logits": -1.2506194114685059, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": true, "logits_per_token": -1.2506194114685059, "logits_per_char": -0.6253097057342529, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 17, "native_id": 17, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6254867315292358, "incorrect_loss_raw": 1.331621249516805, "correct_loss_per_char": 0.8127433657646179, "incorrect_loss_per_char": 0.6658106247584025, "correct_loss_per_token": 1.6254867315292358, "incorrect_loss_per_token": 1.331621249516805, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6254867315292358, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": false, "logits_per_token": -1.6254867315292358, "logits_per_char": -0.8127433657646179, "num_chars": 2}, {"sum_logits": -1.2938421964645386, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": false, "logits_per_token": -1.2938421964645386, "logits_per_char": -0.6469210982322693, "num_chars": 2}, {"sum_logits": -1.525632619857788, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": false, "logits_per_token": -1.525632619857788, "logits_per_char": -0.762816309928894, "num_chars": 2}, {"sum_logits": -1.1753889322280884, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": true, "logits_per_token": -1.1753889322280884, "logits_per_char": -0.5876944661140442, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 18, "native_id": 18, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2809048891067505, "incorrect_loss_raw": 1.4395376841227214, "correct_loss_per_char": 0.6404524445533752, "incorrect_loss_per_char": 0.7197688420613607, "correct_loss_per_token": 1.2809048891067505, "incorrect_loss_per_token": 1.4395376841227214, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.58017897605896, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.58017897605896, "logits_per_char": -0.79008948802948, "num_chars": 2}, {"sum_logits": -1.2809048891067505, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.2809048891067505, "logits_per_char": -0.6404524445533752, "num_chars": 2}, {"sum_logits": -1.4850050210952759, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4850050210952759, "logits_per_char": -0.7425025105476379, "num_chars": 2}, {"sum_logits": -1.2534290552139282, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": true, "logits_per_token": -1.2534290552139282, "logits_per_char": -0.6267145276069641, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 19, "native_id": 19, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5995903015136719, "incorrect_loss_raw": 1.335210641225179, "correct_loss_per_char": 0.7997951507568359, "incorrect_loss_per_char": 0.6676053206125895, "correct_loss_per_token": 1.5995903015136719, "incorrect_loss_per_token": 1.335210641225179, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5995903015136719, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": false, "logits_per_token": -1.5995903015136719, "logits_per_char": -0.7997951507568359, "num_chars": 2}, {"sum_logits": -1.2887976169586182, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": false, "logits_per_token": -1.2887976169586182, "logits_per_char": -0.6443988084793091, "num_chars": 2}, {"sum_logits": -1.4818167686462402, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": false, "logits_per_token": -1.4818167686462402, "logits_per_char": -0.7409083843231201, "num_chars": 2}, {"sum_logits": -1.2350175380706787, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": true, "logits_per_token": -1.2350175380706787, "logits_per_char": -0.6175087690353394, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 20, "native_id": 20, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4470157623291016, "incorrect_loss_raw": 1.3812738259633381, "correct_loss_per_char": 0.7235078811645508, "incorrect_loss_per_char": 0.6906369129816691, "correct_loss_per_token": 1.4470157623291016, "incorrect_loss_per_token": 1.3812738259633381, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5687581300735474, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.5687581300735474, "logits_per_char": -0.7843790650367737, "num_chars": 2}, {"sum_logits": -1.2957125902175903, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.2957125902175903, "logits_per_char": -0.6478562951087952, "num_chars": 2}, {"sum_logits": -1.4470157623291016, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.4470157623291016, "logits_per_char": -0.7235078811645508, "num_chars": 2}, {"sum_logits": -1.279350757598877, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": true, "logits_per_token": -1.279350757598877, "logits_per_char": -0.6396753787994385, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 21, "native_id": 21, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3179996013641357, "incorrect_loss_raw": 1.4200855096181233, "correct_loss_per_char": 0.6589998006820679, "incorrect_loss_per_char": 0.7100427548090616, "correct_loss_per_token": 1.3179996013641357, "incorrect_loss_per_token": 1.4200855096181233, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.538313388824463, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.538313388824463, "logits_per_char": -0.7691566944122314, "num_chars": 2}, {"sum_logits": -1.370119571685791, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.370119571685791, "logits_per_char": -0.6850597858428955, "num_chars": 2}, {"sum_logits": -1.3518235683441162, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.3518235683441162, "logits_per_char": -0.6759117841720581, "num_chars": 2}, {"sum_logits": -1.3179996013641357, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": true, "logits_per_token": -1.3179996013641357, "logits_per_char": -0.6589998006820679, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 22, "native_id": 22, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3294919729232788, "incorrect_loss_raw": 1.4205259084701538, "correct_loss_per_char": 0.6647459864616394, "incorrect_loss_per_char": 0.7102629542350769, "correct_loss_per_token": 1.3294919729232788, "incorrect_loss_per_token": 1.4205259084701538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5112956762313843, "num_tokens": 1, "num_tokens_all": 491, "is_greedy": false, "logits_per_token": -1.5112956762313843, "logits_per_char": -0.7556478381156921, "num_chars": 2}, {"sum_logits": -1.3294919729232788, "num_tokens": 1, "num_tokens_all": 491, "is_greedy": false, "logits_per_token": -1.3294919729232788, "logits_per_char": -0.6647459864616394, "num_chars": 2}, {"sum_logits": -1.5278668403625488, "num_tokens": 1, "num_tokens_all": 491, "is_greedy": false, "logits_per_token": -1.5278668403625488, "logits_per_char": -0.7639334201812744, "num_chars": 2}, {"sum_logits": -1.2224152088165283, "num_tokens": 1, "num_tokens_all": 491, "is_greedy": true, "logits_per_token": -1.2224152088165283, "logits_per_char": -0.6112076044082642, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 23, "native_id": 23, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4754561185836792, "incorrect_loss_raw": 1.3733639319737752, "correct_loss_per_char": 0.7377280592918396, "incorrect_loss_per_char": 0.6866819659868876, "correct_loss_per_token": 1.4754561185836792, "incorrect_loss_per_token": 1.3733639319737752, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5741939544677734, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.5741939544677734, "logits_per_char": -0.7870969772338867, "num_chars": 2}, {"sum_logits": -1.3303979635238647, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.3303979635238647, "logits_per_char": -0.6651989817619324, "num_chars": 2}, {"sum_logits": -1.4754561185836792, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.4754561185836792, "logits_per_char": -0.7377280592918396, "num_chars": 2}, {"sum_logits": -1.2154998779296875, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": true, "logits_per_token": -1.2154998779296875, "logits_per_char": -0.6077499389648438, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 24, "native_id": 24, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.264153003692627, "incorrect_loss_raw": 1.4417688846588135, "correct_loss_per_char": 0.6320765018463135, "incorrect_loss_per_char": 0.7208844423294067, "correct_loss_per_token": 1.264153003692627, "incorrect_loss_per_token": 1.4417688846588135, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4144494533538818, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.4144494533538818, "logits_per_char": -0.7072247266769409, "num_chars": 2}, {"sum_logits": -1.264153003692627, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": true, "logits_per_token": -1.264153003692627, "logits_per_char": -0.6320765018463135, "num_chars": 2}, {"sum_logits": -1.5378637313842773, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.5378637313842773, "logits_per_char": -0.7689318656921387, "num_chars": 2}, {"sum_logits": -1.3729934692382812, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.3729934692382812, "logits_per_char": -0.6864967346191406, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 25, "native_id": 25, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.122913122177124, "incorrect_loss_raw": 1.5052194992701213, "correct_loss_per_char": 0.561456561088562, "incorrect_loss_per_char": 0.7526097496350607, "correct_loss_per_token": 1.122913122177124, "incorrect_loss_per_token": 1.5052194992701213, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5763286352157593, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.5763286352157593, "logits_per_char": -0.7881643176078796, "num_chars": 2}, {"sum_logits": -1.5416052341461182, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.5416052341461182, "logits_per_char": -0.7708026170730591, "num_chars": 2}, {"sum_logits": -1.3977246284484863, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.3977246284484863, "logits_per_char": -0.6988623142242432, "num_chars": 2}, {"sum_logits": -1.122913122177124, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": true, "logits_per_token": -1.122913122177124, "logits_per_char": -0.561456561088562, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 26, "native_id": 26, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.464856505393982, "incorrect_loss_raw": 1.3705037037531536, "correct_loss_per_char": 0.732428252696991, "incorrect_loss_per_char": 0.6852518518765768, "correct_loss_per_token": 1.464856505393982, "incorrect_loss_per_token": 1.3705037037531536, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.464856505393982, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": false, "logits_per_token": -1.464856505393982, "logits_per_char": -0.732428252696991, "num_chars": 2}, {"sum_logits": -1.289872646331787, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": true, "logits_per_token": -1.289872646331787, "logits_per_char": -0.6449363231658936, "num_chars": 2}, {"sum_logits": -1.4611852169036865, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": false, "logits_per_token": -1.4611852169036865, "logits_per_char": -0.7305926084518433, "num_chars": 2}, {"sum_logits": -1.3604532480239868, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": false, "logits_per_token": -1.3604532480239868, "logits_per_char": -0.6802266240119934, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 27, "native_id": 27, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3784514665603638, "incorrect_loss_raw": 1.399299422899882, "correct_loss_per_char": 0.6892257332801819, "incorrect_loss_per_char": 0.699649711449941, "correct_loss_per_token": 1.3784514665603638, "incorrect_loss_per_token": 1.399299422899882, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5173033475875854, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.5173033475875854, "logits_per_char": -0.7586516737937927, "num_chars": 2}, {"sum_logits": -1.3784514665603638, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.3784514665603638, "logits_per_char": -0.6892257332801819, "num_chars": 2}, {"sum_logits": -1.4134305715560913, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.4134305715560913, "logits_per_char": -0.7067152857780457, "num_chars": 2}, {"sum_logits": -1.2671643495559692, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": true, "logits_per_token": -1.2671643495559692, "logits_per_char": -0.6335821747779846, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 28, "native_id": 28, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3201764822006226, "incorrect_loss_raw": 1.432762861251831, "correct_loss_per_char": 0.6600882411003113, "incorrect_loss_per_char": 0.7163814306259155, "correct_loss_per_token": 1.3201764822006226, "incorrect_loss_per_token": 1.432762861251831, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5675263404846191, "num_tokens": 1, "num_tokens_all": 527, "is_greedy": false, "logits_per_token": -1.5675263404846191, "logits_per_char": -0.7837631702423096, "num_chars": 2}, {"sum_logits": -1.3201764822006226, "num_tokens": 1, "num_tokens_all": 527, "is_greedy": false, "logits_per_token": -1.3201764822006226, "logits_per_char": -0.6600882411003113, "num_chars": 2}, {"sum_logits": -1.5701802968978882, "num_tokens": 1, "num_tokens_all": 527, "is_greedy": false, "logits_per_token": -1.5701802968978882, "logits_per_char": -0.7850901484489441, "num_chars": 2}, {"sum_logits": -1.1605819463729858, "num_tokens": 1, "num_tokens_all": 527, "is_greedy": true, "logits_per_token": -1.1605819463729858, "logits_per_char": -0.5802909731864929, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 29, "native_id": 29, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.383171796798706, "incorrect_loss_raw": 1.4005184570948284, "correct_loss_per_char": 0.691585898399353, "incorrect_loss_per_char": 0.7002592285474142, "correct_loss_per_token": 1.383171796798706, "incorrect_loss_per_token": 1.4005184570948284, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4681737422943115, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.4681737422943115, "logits_per_char": -0.7340868711471558, "num_chars": 2}, {"sum_logits": -1.2092362642288208, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": true, "logits_per_token": -1.2092362642288208, "logits_per_char": -0.6046181321144104, "num_chars": 2}, {"sum_logits": -1.5241453647613525, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.5241453647613525, "logits_per_char": -0.7620726823806763, "num_chars": 2}, {"sum_logits": -1.383171796798706, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.383171796798706, "logits_per_char": -0.691585898399353, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 30, "native_id": 30, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3046305179595947, "incorrect_loss_raw": 1.4283267656962078, "correct_loss_per_char": 0.6523152589797974, "incorrect_loss_per_char": 0.7141633828481039, "correct_loss_per_token": 1.3046305179595947, "incorrect_loss_per_token": 1.4283267656962078, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.535867691040039, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.535867691040039, "logits_per_char": -0.7679338455200195, "num_chars": 2}, {"sum_logits": -1.2726435661315918, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": true, "logits_per_token": -1.2726435661315918, "logits_per_char": -0.6363217830657959, "num_chars": 2}, {"sum_logits": -1.4764690399169922, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.4764690399169922, "logits_per_char": -0.7382345199584961, "num_chars": 2}, {"sum_logits": -1.3046305179595947, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.3046305179595947, "logits_per_char": -0.6523152589797974, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 31, "native_id": 31, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1073541641235352, "incorrect_loss_raw": 1.5099453926086426, "correct_loss_per_char": 0.5536770820617676, "incorrect_loss_per_char": 0.7549726963043213, "correct_loss_per_token": 1.1073541641235352, "incorrect_loss_per_token": 1.5099453926086426, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.647695779800415, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.647695779800415, "logits_per_char": -0.8238478899002075, "num_chars": 2}, {"sum_logits": -1.4323585033416748, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.4323585033416748, "logits_per_char": -0.7161792516708374, "num_chars": 2}, {"sum_logits": -1.449781894683838, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.449781894683838, "logits_per_char": -0.724890947341919, "num_chars": 2}, {"sum_logits": -1.1073541641235352, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": true, "logits_per_token": -1.1073541641235352, "logits_per_char": -0.5536770820617676, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 32, "native_id": 32, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4750478267669678, "incorrect_loss_raw": 1.37689475218455, "correct_loss_per_char": 0.7375239133834839, "incorrect_loss_per_char": 0.688447376092275, "correct_loss_per_token": 1.4750478267669678, "incorrect_loss_per_token": 1.37689475218455, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5585932731628418, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.5585932731628418, "logits_per_char": -0.7792966365814209, "num_chars": 2}, {"sum_logits": -1.4750478267669678, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.4750478267669678, "logits_per_char": -0.7375239133834839, "num_chars": 2}, {"sum_logits": -1.4123740196228027, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.4123740196228027, "logits_per_char": -0.7061870098114014, "num_chars": 2}, {"sum_logits": -1.1597169637680054, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": true, "logits_per_token": -1.1597169637680054, "logits_per_char": -0.5798584818840027, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 33, "native_id": 33, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3543298244476318, "incorrect_loss_raw": 1.4043703476587932, "correct_loss_per_char": 0.6771649122238159, "incorrect_loss_per_char": 0.7021851738293966, "correct_loss_per_token": 1.3543298244476318, "incorrect_loss_per_token": 1.4043703476587932, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4602563381195068, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.4602563381195068, "logits_per_char": -0.7301281690597534, "num_chars": 2}, {"sum_logits": -1.2990975379943848, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": true, "logits_per_token": -1.2990975379943848, "logits_per_char": -0.6495487689971924, "num_chars": 2}, {"sum_logits": -1.4537571668624878, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.4537571668624878, "logits_per_char": -0.7268785834312439, "num_chars": 2}, {"sum_logits": -1.3543298244476318, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.3543298244476318, "logits_per_char": -0.6771649122238159, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 34, "native_id": 34, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2725162506103516, "incorrect_loss_raw": 1.4484004974365234, "correct_loss_per_char": 0.6362581253051758, "incorrect_loss_per_char": 0.7242002487182617, "correct_loss_per_token": 1.2725162506103516, "incorrect_loss_per_token": 1.4484004974365234, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5752317905426025, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": false, "logits_per_token": -1.5752317905426025, "logits_per_char": -0.7876158952713013, "num_chars": 2}, {"sum_logits": -1.1988962888717651, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": true, "logits_per_token": -1.1988962888717651, "logits_per_char": -0.5994481444358826, "num_chars": 2}, {"sum_logits": -1.5710734128952026, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": false, "logits_per_token": -1.5710734128952026, "logits_per_char": -0.7855367064476013, "num_chars": 2}, {"sum_logits": -1.2725162506103516, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": false, "logits_per_token": -1.2725162506103516, "logits_per_char": -0.6362581253051758, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 35, "native_id": 35, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2174698114395142, "incorrect_loss_raw": 1.4616393248240154, "correct_loss_per_char": 0.6087349057197571, "incorrect_loss_per_char": 0.7308196624120077, "correct_loss_per_token": 1.2174698114395142, "incorrect_loss_per_token": 1.4616393248240154, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5331987142562866, "num_tokens": 1, "num_tokens_all": 492, "is_greedy": false, "logits_per_token": -1.5331987142562866, "logits_per_char": -0.7665993571281433, "num_chars": 2}, {"sum_logits": -1.303675651550293, "num_tokens": 1, "num_tokens_all": 492, "is_greedy": false, "logits_per_token": -1.303675651550293, "logits_per_char": -0.6518378257751465, "num_chars": 2}, {"sum_logits": -1.5480436086654663, "num_tokens": 1, "num_tokens_all": 492, "is_greedy": false, "logits_per_token": -1.5480436086654663, "logits_per_char": -0.7740218043327332, "num_chars": 2}, {"sum_logits": -1.2174698114395142, "num_tokens": 1, "num_tokens_all": 492, "is_greedy": true, "logits_per_token": -1.2174698114395142, "logits_per_char": -0.6087349057197571, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 36, "native_id": 36, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2888389825820923, "incorrect_loss_raw": 1.435696283976237, "correct_loss_per_char": 0.6444194912910461, "incorrect_loss_per_char": 0.7178481419881185, "correct_loss_per_token": 1.2888389825820923, "incorrect_loss_per_token": 1.435696283976237, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5402731895446777, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": false, "logits_per_token": -1.5402731895446777, "logits_per_char": -0.7701365947723389, "num_chars": 2}, {"sum_logits": -1.2462284564971924, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": true, "logits_per_token": -1.2462284564971924, "logits_per_char": -0.6231142282485962, "num_chars": 2}, {"sum_logits": -1.5205872058868408, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": false, "logits_per_token": -1.5205872058868408, "logits_per_char": -0.7602936029434204, "num_chars": 2}, {"sum_logits": -1.2888389825820923, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": false, "logits_per_token": -1.2888389825820923, "logits_per_char": -0.6444194912910461, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 37, "native_id": 37, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4745656251907349, "incorrect_loss_raw": 1.3700645764668782, "correct_loss_per_char": 0.7372828125953674, "incorrect_loss_per_char": 0.6850322882334391, "correct_loss_per_token": 1.4745656251907349, "incorrect_loss_per_token": 1.3700645764668782, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4745656251907349, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.4745656251907349, "logits_per_char": -0.7372828125953674, "num_chars": 2}, {"sum_logits": -1.39313805103302, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.39313805103302, "logits_per_char": -0.69656902551651, "num_chars": 2}, {"sum_logits": -1.4029147624969482, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.4029147624969482, "logits_per_char": -0.7014573812484741, "num_chars": 2}, {"sum_logits": -1.3141409158706665, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": true, "logits_per_token": -1.3141409158706665, "logits_per_char": -0.6570704579353333, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 38, "native_id": 38, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5129646062850952, "incorrect_loss_raw": 1.3577250242233276, "correct_loss_per_char": 0.7564823031425476, "incorrect_loss_per_char": 0.6788625121116638, "correct_loss_per_token": 1.5129646062850952, "incorrect_loss_per_token": 1.3577250242233276, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5201302766799927, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": false, "logits_per_token": -1.5201302766799927, "logits_per_char": -0.7600651383399963, "num_chars": 2}, {"sum_logits": -1.2774033546447754, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": false, "logits_per_token": -1.2774033546447754, "logits_per_char": -0.6387016773223877, "num_chars": 2}, {"sum_logits": -1.5129646062850952, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": false, "logits_per_token": -1.5129646062850952, "logits_per_char": -0.7564823031425476, "num_chars": 2}, {"sum_logits": -1.2756414413452148, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": true, "logits_per_token": -1.2756414413452148, "logits_per_char": -0.6378207206726074, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 39, "native_id": 39, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3682260513305664, "incorrect_loss_raw": 1.4084193309148152, "correct_loss_per_char": 0.6841130256652832, "incorrect_loss_per_char": 0.7042096654574076, "correct_loss_per_token": 1.3682260513305664, "incorrect_loss_per_token": 1.4084193309148152, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.579162836074829, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.579162836074829, "logits_per_char": -0.7895814180374146, "num_chars": 2}, {"sum_logits": -1.2176036834716797, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": true, "logits_per_token": -1.2176036834716797, "logits_per_char": -0.6088018417358398, "num_chars": 2}, {"sum_logits": -1.428491473197937, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.428491473197937, "logits_per_char": -0.7142457365989685, "num_chars": 2}, {"sum_logits": -1.3682260513305664, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.3682260513305664, "logits_per_char": -0.6841130256652832, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 40, "native_id": 40, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2678961753845215, "incorrect_loss_raw": 1.4354066054026287, "correct_loss_per_char": 0.6339480876922607, "incorrect_loss_per_char": 0.7177033027013143, "correct_loss_per_token": 1.2678961753845215, "incorrect_loss_per_token": 1.4354066054026287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.525328516960144, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": false, "logits_per_token": -1.525328516960144, "logits_per_char": -0.762664258480072, "num_chars": 2}, {"sum_logits": -1.3797773122787476, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": false, "logits_per_token": -1.3797773122787476, "logits_per_char": -0.6898886561393738, "num_chars": 2}, {"sum_logits": -1.4011139869689941, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": false, "logits_per_token": -1.4011139869689941, "logits_per_char": -0.7005569934844971, "num_chars": 2}, {"sum_logits": -1.2678961753845215, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": true, "logits_per_token": -1.2678961753845215, "logits_per_char": -0.6339480876922607, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 41, "native_id": 41, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.383196473121643, "incorrect_loss_raw": 1.4132013320922852, "correct_loss_per_char": 0.6915982365608215, "incorrect_loss_per_char": 0.7066006660461426, "correct_loss_per_token": 1.383196473121643, "incorrect_loss_per_token": 1.4132013320922852, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6124827861785889, "num_tokens": 1, "num_tokens_all": 498, "is_greedy": false, "logits_per_token": -1.6124827861785889, "logits_per_char": -0.8062413930892944, "num_chars": 2}, {"sum_logits": -1.383196473121643, "num_tokens": 1, "num_tokens_all": 498, "is_greedy": false, "logits_per_token": -1.383196473121643, "logits_per_char": -0.6915982365608215, "num_chars": 2}, {"sum_logits": -1.4923280477523804, "num_tokens": 1, "num_tokens_all": 498, "is_greedy": false, "logits_per_token": -1.4923280477523804, "logits_per_char": -0.7461640238761902, "num_chars": 2}, {"sum_logits": -1.1347931623458862, "num_tokens": 1, "num_tokens_all": 498, "is_greedy": true, "logits_per_token": -1.1347931623458862, "logits_per_char": -0.5673965811729431, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 42, "native_id": 42, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3983062505722046, "incorrect_loss_raw": 1.3955403963724773, "correct_loss_per_char": 0.6991531252861023, "incorrect_loss_per_char": 0.6977701981862386, "correct_loss_per_token": 1.3983062505722046, "incorrect_loss_per_token": 1.3955403963724773, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5679186582565308, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.5679186582565308, "logits_per_char": -0.7839593291282654, "num_chars": 2}, {"sum_logits": -1.3428828716278076, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.3428828716278076, "logits_per_char": -0.6714414358139038, "num_chars": 2}, {"sum_logits": -1.3983062505722046, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.3983062505722046, "logits_per_char": -0.6991531252861023, "num_chars": 2}, {"sum_logits": -1.2758196592330933, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": true, "logits_per_token": -1.2758196592330933, "logits_per_char": -0.6379098296165466, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 43, "native_id": 43, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3111064434051514, "incorrect_loss_raw": 1.4305883646011353, "correct_loss_per_char": 0.6555532217025757, "incorrect_loss_per_char": 0.7152941823005676, "correct_loss_per_token": 1.3111064434051514, "incorrect_loss_per_token": 1.4305883646011353, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6154577732086182, "num_tokens": 1, "num_tokens_all": 462, "is_greedy": false, "logits_per_token": -1.6154577732086182, "logits_per_char": -0.8077288866043091, "num_chars": 2}, {"sum_logits": -1.3111064434051514, "num_tokens": 1, "num_tokens_all": 462, "is_greedy": false, "logits_per_token": -1.3111064434051514, "logits_per_char": -0.6555532217025757, "num_chars": 2}, {"sum_logits": -1.443202018737793, "num_tokens": 1, "num_tokens_all": 462, "is_greedy": false, "logits_per_token": -1.443202018737793, "logits_per_char": -0.7216010093688965, "num_chars": 2}, {"sum_logits": -1.2331053018569946, "num_tokens": 1, "num_tokens_all": 462, "is_greedy": true, "logits_per_token": -1.2331053018569946, "logits_per_char": -0.6165526509284973, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 44, "native_id": 44, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3627967834472656, "incorrect_loss_raw": 1.420264999071757, "correct_loss_per_char": 0.6813983917236328, "incorrect_loss_per_char": 0.7101324995358785, "correct_loss_per_token": 1.3627967834472656, "incorrect_loss_per_token": 1.420264999071757, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6540756225585938, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.6540756225585938, "logits_per_char": -0.8270378112792969, "num_chars": 2}, {"sum_logits": -1.3627967834472656, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.3627967834472656, "logits_per_char": -0.6813983917236328, "num_chars": 2}, {"sum_logits": -1.4476412534713745, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.4476412534713745, "logits_per_char": -0.7238206267356873, "num_chars": 2}, {"sum_logits": -1.1590781211853027, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": true, "logits_per_token": -1.1590781211853027, "logits_per_char": -0.5795390605926514, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 45, "native_id": 45, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2670693397521973, "incorrect_loss_raw": 1.4383463462193806, "correct_loss_per_char": 0.6335346698760986, "incorrect_loss_per_char": 0.7191731731096903, "correct_loss_per_token": 1.2670693397521973, "incorrect_loss_per_token": 1.4383463462193806, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4303056001663208, "num_tokens": 1, "num_tokens_all": 480, "is_greedy": false, "logits_per_token": -1.4303056001663208, "logits_per_char": -0.7151528000831604, "num_chars": 2}, {"sum_logits": -1.2670693397521973, "num_tokens": 1, "num_tokens_all": 480, "is_greedy": true, "logits_per_token": -1.2670693397521973, "logits_per_char": -0.6335346698760986, "num_chars": 2}, {"sum_logits": -1.5532023906707764, "num_tokens": 1, "num_tokens_all": 480, "is_greedy": false, "logits_per_token": -1.5532023906707764, "logits_per_char": -0.7766011953353882, "num_chars": 2}, {"sum_logits": -1.331531047821045, "num_tokens": 1, "num_tokens_all": 480, "is_greedy": false, "logits_per_token": -1.331531047821045, "logits_per_char": -0.6657655239105225, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 46, "native_id": 46, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5601327419281006, "incorrect_loss_raw": 1.3483258883158367, "correct_loss_per_char": 0.7800663709640503, "incorrect_loss_per_char": 0.6741629441579183, "correct_loss_per_token": 1.5601327419281006, "incorrect_loss_per_token": 1.3483258883158367, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5150563716888428, "num_tokens": 1, "num_tokens_all": 495, "is_greedy": false, "logits_per_token": -1.5150563716888428, "logits_per_char": -0.7575281858444214, "num_chars": 2}, {"sum_logits": -1.309926152229309, "num_tokens": 1, "num_tokens_all": 495, "is_greedy": false, "logits_per_token": -1.309926152229309, "logits_per_char": -0.6549630761146545, "num_chars": 2}, {"sum_logits": -1.5601327419281006, "num_tokens": 1, "num_tokens_all": 495, "is_greedy": false, "logits_per_token": -1.5601327419281006, "logits_per_char": -0.7800663709640503, "num_chars": 2}, {"sum_logits": -1.219995141029358, "num_tokens": 1, "num_tokens_all": 495, "is_greedy": true, "logits_per_token": -1.219995141029358, "logits_per_char": -0.609997570514679, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 47, "native_id": 47, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.339284896850586, "incorrect_loss_raw": 1.409336527188619, "correct_loss_per_char": 0.669642448425293, "incorrect_loss_per_char": 0.7046682635943095, "correct_loss_per_token": 1.339284896850586, "incorrect_loss_per_token": 1.409336527188619, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4697487354278564, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.4697487354278564, "logits_per_char": -0.7348743677139282, "num_chars": 2}, {"sum_logits": -1.3285880088806152, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": true, "logits_per_token": -1.3285880088806152, "logits_per_char": -0.6642940044403076, "num_chars": 2}, {"sum_logits": -1.4296728372573853, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.4296728372573853, "logits_per_char": -0.7148364186286926, "num_chars": 2}, {"sum_logits": -1.339284896850586, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.339284896850586, "logits_per_char": -0.669642448425293, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 48, "native_id": 48, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.178096055984497, "incorrect_loss_raw": 1.4763800303141277, "correct_loss_per_char": 0.5890480279922485, "incorrect_loss_per_char": 0.7381900151570638, "correct_loss_per_token": 1.178096055984497, "incorrect_loss_per_token": 1.4763800303141277, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5931293964385986, "num_tokens": 1, "num_tokens_all": 491, "is_greedy": false, "logits_per_token": -1.5931293964385986, "logits_per_char": -0.7965646982192993, "num_chars": 2}, {"sum_logits": -1.3781684637069702, "num_tokens": 1, "num_tokens_all": 491, "is_greedy": false, "logits_per_token": -1.3781684637069702, "logits_per_char": -0.6890842318534851, "num_chars": 2}, {"sum_logits": -1.457842230796814, "num_tokens": 1, "num_tokens_all": 491, "is_greedy": false, "logits_per_token": -1.457842230796814, "logits_per_char": -0.728921115398407, "num_chars": 2}, {"sum_logits": -1.178096055984497, "num_tokens": 1, "num_tokens_all": 491, "is_greedy": true, "logits_per_token": -1.178096055984497, "logits_per_char": -0.5890480279922485, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 49, "native_id": 49, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2018120288848877, "incorrect_loss_raw": 1.4693981011708577, "correct_loss_per_char": 0.6009060144424438, "incorrect_loss_per_char": 0.7346990505854288, "correct_loss_per_token": 1.2018120288848877, "incorrect_loss_per_token": 1.4693981011708577, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5616827011108398, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.5616827011108398, "logits_per_char": -0.7808413505554199, "num_chars": 2}, {"sum_logits": -1.2891285419464111, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.2891285419464111, "logits_per_char": -0.6445642709732056, "num_chars": 2}, {"sum_logits": -1.5573830604553223, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.5573830604553223, "logits_per_char": -0.7786915302276611, "num_chars": 2}, {"sum_logits": -1.2018120288848877, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": true, "logits_per_token": -1.2018120288848877, "logits_per_char": -0.6009060144424438, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 50, "native_id": 50, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3172591924667358, "incorrect_loss_raw": 1.4189612468083699, "correct_loss_per_char": 0.6586295962333679, "incorrect_loss_per_char": 0.7094806234041849, "correct_loss_per_token": 1.3172591924667358, "incorrect_loss_per_token": 1.4189612468083699, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4223051071166992, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.4223051071166992, "logits_per_char": -0.7111525535583496, "num_chars": 2}, {"sum_logits": -1.3172591924667358, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": true, "logits_per_token": -1.3172591924667358, "logits_per_char": -0.6586295962333679, "num_chars": 2}, {"sum_logits": -1.4979082345962524, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.4979082345962524, "logits_per_char": -0.7489541172981262, "num_chars": 2}, {"sum_logits": -1.3366703987121582, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.3366703987121582, "logits_per_char": -0.6683351993560791, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 51, "native_id": 51, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3903602361679077, "incorrect_loss_raw": 1.395260214805603, "correct_loss_per_char": 0.6951801180839539, "incorrect_loss_per_char": 0.6976301074028015, "correct_loss_per_token": 1.3903602361679077, "incorrect_loss_per_token": 1.395260214805603, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3903602361679077, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.3903602361679077, "logits_per_char": -0.6951801180839539, "num_chars": 2}, {"sum_logits": -1.307786226272583, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": true, "logits_per_token": -1.307786226272583, "logits_per_char": -0.6538931131362915, "num_chars": 2}, {"sum_logits": -1.5322271585464478, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.5322271585464478, "logits_per_char": -0.7661135792732239, "num_chars": 2}, {"sum_logits": -1.3457672595977783, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.3457672595977783, "logits_per_char": -0.6728836297988892, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 52, "native_id": 52, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3359532356262207, "incorrect_loss_raw": 1.4146610895792644, "correct_loss_per_char": 0.6679766178131104, "incorrect_loss_per_char": 0.7073305447896322, "correct_loss_per_token": 1.3359532356262207, "incorrect_loss_per_token": 1.4146610895792644, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3359532356262207, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.3359532356262207, "logits_per_char": -0.6679766178131104, "num_chars": 2}, {"sum_logits": -1.3656820058822632, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.3656820058822632, "logits_per_char": -0.6828410029411316, "num_chars": 2}, {"sum_logits": -1.5470534563064575, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.5470534563064575, "logits_per_char": -0.7735267281532288, "num_chars": 2}, {"sum_logits": -1.3312478065490723, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": true, "logits_per_token": -1.3312478065490723, "logits_per_char": -0.6656239032745361, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 53, "native_id": 53, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0379786491394043, "incorrect_loss_raw": 1.5590505599975586, "correct_loss_per_char": 0.5189893245697021, "incorrect_loss_per_char": 0.7795252799987793, "correct_loss_per_token": 1.0379786491394043, "incorrect_loss_per_token": 1.5590505599975586, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.8335405588150024, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": false, "logits_per_token": -1.8335405588150024, "logits_per_char": -0.9167702794075012, "num_chars": 2}, {"sum_logits": -1.3940712213516235, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": false, "logits_per_token": -1.3940712213516235, "logits_per_char": -0.6970356106758118, "num_chars": 2}, {"sum_logits": -1.4495398998260498, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": false, "logits_per_token": -1.4495398998260498, "logits_per_char": -0.7247699499130249, "num_chars": 2}, {"sum_logits": -1.0379786491394043, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": true, "logits_per_token": -1.0379786491394043, "logits_per_char": -0.5189893245697021, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 54, "native_id": 54, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4193519353866577, "incorrect_loss_raw": 1.402406096458435, "correct_loss_per_char": 0.7096759676933289, "incorrect_loss_per_char": 0.7012030482292175, "correct_loss_per_token": 1.4193519353866577, "incorrect_loss_per_token": 1.402406096458435, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5015214681625366, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.5015214681625366, "logits_per_char": -0.7507607340812683, "num_chars": 2}, {"sum_logits": -1.5843746662139893, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.5843746662139893, "logits_per_char": -0.7921873331069946, "num_chars": 2}, {"sum_logits": -1.4193519353866577, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.4193519353866577, "logits_per_char": -0.7096759676933289, "num_chars": 2}, {"sum_logits": -1.1213221549987793, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": true, "logits_per_token": -1.1213221549987793, "logits_per_char": -0.5606610774993896, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 55, "native_id": 55, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1656503677368164, "incorrect_loss_raw": 1.4909249544143677, "correct_loss_per_char": 0.5828251838684082, "incorrect_loss_per_char": 0.7454624772071838, "correct_loss_per_token": 1.1656503677368164, "incorrect_loss_per_token": 1.4909249544143677, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7210685014724731, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": false, "logits_per_token": -1.7210685014724731, "logits_per_char": -0.8605342507362366, "num_chars": 2}, {"sum_logits": -1.3114445209503174, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": false, "logits_per_token": -1.3114445209503174, "logits_per_char": -0.6557222604751587, "num_chars": 2}, {"sum_logits": -1.4402618408203125, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": false, "logits_per_token": -1.4402618408203125, "logits_per_char": -0.7201309204101562, "num_chars": 2}, {"sum_logits": -1.1656503677368164, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": true, "logits_per_token": -1.1656503677368164, "logits_per_char": -0.5828251838684082, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 56, "native_id": 56, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5683095455169678, "incorrect_loss_raw": 1.337177077929179, "correct_loss_per_char": 0.7841547727584839, "incorrect_loss_per_char": 0.6685885389645895, "correct_loss_per_token": 1.5683095455169678, "incorrect_loss_per_token": 1.337177077929179, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3968372344970703, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": false, "logits_per_token": -1.3968372344970703, "logits_per_char": -0.6984186172485352, "num_chars": 2}, {"sum_logits": -1.336538314819336, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": false, "logits_per_token": -1.336538314819336, "logits_per_char": -0.668269157409668, "num_chars": 2}, {"sum_logits": -1.5683095455169678, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": false, "logits_per_token": -1.5683095455169678, "logits_per_char": -0.7841547727584839, "num_chars": 2}, {"sum_logits": -1.2781556844711304, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": true, "logits_per_token": -1.2781556844711304, "logits_per_char": -0.6390778422355652, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 57, "native_id": 57, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2116373777389526, "incorrect_loss_raw": 1.4633195002873738, "correct_loss_per_char": 0.6058186888694763, "incorrect_loss_per_char": 0.7316597501436869, "correct_loss_per_token": 1.2116373777389526, "incorrect_loss_per_token": 1.4633195002873738, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5026627779006958, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": false, "logits_per_token": -1.5026627779006958, "logits_per_char": -0.7513313889503479, "num_chars": 2}, {"sum_logits": -1.313603401184082, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": false, "logits_per_token": -1.313603401184082, "logits_per_char": -0.656801700592041, "num_chars": 2}, {"sum_logits": -1.5736923217773438, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": false, "logits_per_token": -1.5736923217773438, "logits_per_char": -0.7868461608886719, "num_chars": 2}, {"sum_logits": -1.2116373777389526, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": true, "logits_per_token": -1.2116373777389526, "logits_per_char": -0.6058186888694763, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 58, "native_id": 58, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.383171796798706, "incorrect_loss_raw": 1.4005184570948284, "correct_loss_per_char": 0.691585898399353, "incorrect_loss_per_char": 0.7002592285474142, "correct_loss_per_token": 1.383171796798706, "incorrect_loss_per_token": 1.4005184570948284, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4681737422943115, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.4681737422943115, "logits_per_char": -0.7340868711471558, "num_chars": 2}, {"sum_logits": -1.2092362642288208, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": true, "logits_per_token": -1.2092362642288208, "logits_per_char": -0.6046181321144104, "num_chars": 2}, {"sum_logits": -1.5241453647613525, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.5241453647613525, "logits_per_char": -0.7620726823806763, "num_chars": 2}, {"sum_logits": -1.383171796798706, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.383171796798706, "logits_per_char": -0.691585898399353, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 59, "native_id": 59, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4875355958938599, "incorrect_loss_raw": 1.3638638655344646, "correct_loss_per_char": 0.7437677979469299, "incorrect_loss_per_char": 0.6819319327672323, "correct_loss_per_token": 1.4875355958938599, "incorrect_loss_per_token": 1.3638638655344646, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3286280632019043, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.3286280632019043, "logits_per_char": -0.6643140316009521, "num_chars": 2}, {"sum_logits": -1.3081437349319458, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": true, "logits_per_token": -1.3081437349319458, "logits_per_char": -0.6540718674659729, "num_chars": 2}, {"sum_logits": -1.4875355958938599, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.4875355958938599, "logits_per_char": -0.7437677979469299, "num_chars": 2}, {"sum_logits": -1.4548197984695435, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.4548197984695435, "logits_per_char": -0.7274098992347717, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 60, "native_id": 60, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1770415306091309, "incorrect_loss_raw": 1.4746570189793904, "correct_loss_per_char": 0.5885207653045654, "incorrect_loss_per_char": 0.7373285094896952, "correct_loss_per_token": 1.1770415306091309, "incorrect_loss_per_token": 1.4746570189793904, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5524423122406006, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.5524423122406006, "logits_per_char": -0.7762211561203003, "num_chars": 2}, {"sum_logits": -1.4865258932113647, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.4865258932113647, "logits_per_char": -0.7432629466056824, "num_chars": 2}, {"sum_logits": -1.385002851486206, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.385002851486206, "logits_per_char": -0.692501425743103, "num_chars": 2}, {"sum_logits": -1.1770415306091309, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": true, "logits_per_token": -1.1770415306091309, "logits_per_char": -0.5885207653045654, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 61, "native_id": 61, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.412358283996582, "incorrect_loss_raw": 1.3937496741612752, "correct_loss_per_char": 0.706179141998291, "incorrect_loss_per_char": 0.6968748370806376, "correct_loss_per_token": 1.412358283996582, "incorrect_loss_per_token": 1.3937496741612752, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.412358283996582, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.412358283996582, "logits_per_char": -0.706179141998291, "num_chars": 2}, {"sum_logits": -1.4399887323379517, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.4399887323379517, "logits_per_char": -0.7199943661689758, "num_chars": 2}, {"sum_logits": -1.5384962558746338, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.5384962558746338, "logits_per_char": -0.7692481279373169, "num_chars": 2}, {"sum_logits": -1.2027640342712402, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": true, "logits_per_token": -1.2027640342712402, "logits_per_char": -0.6013820171356201, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 62, "native_id": 62, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2134156227111816, "incorrect_loss_raw": 1.458727240562439, "correct_loss_per_char": 0.6067078113555908, "incorrect_loss_per_char": 0.7293636202812195, "correct_loss_per_token": 1.2134156227111816, "incorrect_loss_per_token": 1.458727240562439, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4984933137893677, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": false, "logits_per_token": -1.4984933137893677, "logits_per_char": -0.7492466568946838, "num_chars": 2}, {"sum_logits": -1.4010603427886963, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": false, "logits_per_token": -1.4010603427886963, "logits_per_char": -0.7005301713943481, "num_chars": 2}, {"sum_logits": -1.476628065109253, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": false, "logits_per_token": -1.476628065109253, "logits_per_char": -0.7383140325546265, "num_chars": 2}, {"sum_logits": -1.2134156227111816, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": true, "logits_per_token": -1.2134156227111816, "logits_per_char": -0.6067078113555908, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 63, "native_id": 63, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.444360375404358, "incorrect_loss_raw": 1.3807753721872966, "correct_loss_per_char": 0.722180187702179, "incorrect_loss_per_char": 0.6903876860936483, "correct_loss_per_token": 1.444360375404358, "incorrect_loss_per_token": 1.3807753721872966, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4777215719223022, "num_tokens": 1, "num_tokens_all": 447, "is_greedy": false, "logits_per_token": -1.4777215719223022, "logits_per_char": -0.7388607859611511, "num_chars": 2}, {"sum_logits": -1.1943246126174927, "num_tokens": 1, "num_tokens_all": 447, "is_greedy": true, "logits_per_token": -1.1943246126174927, "logits_per_char": -0.5971623063087463, "num_chars": 2}, {"sum_logits": -1.4702799320220947, "num_tokens": 1, "num_tokens_all": 447, "is_greedy": false, "logits_per_token": -1.4702799320220947, "logits_per_char": -0.7351399660110474, "num_chars": 2}, {"sum_logits": -1.444360375404358, "num_tokens": 1, "num_tokens_all": 447, "is_greedy": false, "logits_per_token": -1.444360375404358, "logits_per_char": -0.722180187702179, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 64, "native_id": 64, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6032484769821167, "incorrect_loss_raw": 1.3347571690877278, "correct_loss_per_char": 0.8016242384910583, "incorrect_loss_per_char": 0.6673785845438639, "correct_loss_per_token": 1.6032484769821167, "incorrect_loss_per_token": 1.3347571690877278, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6032484769821167, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": false, "logits_per_token": -1.6032484769821167, "logits_per_char": -0.8016242384910583, "num_chars": 2}, {"sum_logits": -1.3031269311904907, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": false, "logits_per_token": -1.3031269311904907, "logits_per_char": -0.6515634655952454, "num_chars": 2}, {"sum_logits": -1.4871124029159546, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": false, "logits_per_token": -1.4871124029159546, "logits_per_char": -0.7435562014579773, "num_chars": 2}, {"sum_logits": -1.2140321731567383, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": true, "logits_per_token": -1.2140321731567383, "logits_per_char": -0.6070160865783691, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 65, "native_id": 65, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.384584665298462, "incorrect_loss_raw": 1.3992423216501872, "correct_loss_per_char": 0.692292332649231, "incorrect_loss_per_char": 0.6996211608250936, "correct_loss_per_token": 1.384584665298462, "incorrect_loss_per_token": 1.3992423216501872, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4732739925384521, "num_tokens": 1, "num_tokens_all": 449, "is_greedy": false, "logits_per_token": -1.4732739925384521, "logits_per_char": -0.7366369962692261, "num_chars": 2}, {"sum_logits": -1.245496153831482, "num_tokens": 1, "num_tokens_all": 449, "is_greedy": true, "logits_per_token": -1.245496153831482, "logits_per_char": -0.622748076915741, "num_chars": 2}, {"sum_logits": -1.4789568185806274, "num_tokens": 1, "num_tokens_all": 449, "is_greedy": false, "logits_per_token": -1.4789568185806274, "logits_per_char": -0.7394784092903137, "num_chars": 2}, {"sum_logits": -1.384584665298462, "num_tokens": 1, "num_tokens_all": 449, "is_greedy": false, "logits_per_token": -1.384584665298462, "logits_per_char": -0.692292332649231, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 66, "native_id": 66, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3784514665603638, "incorrect_loss_raw": 1.399299422899882, "correct_loss_per_char": 0.6892257332801819, "incorrect_loss_per_char": 0.699649711449941, "correct_loss_per_token": 1.3784514665603638, "incorrect_loss_per_token": 1.399299422899882, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5173033475875854, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.5173033475875854, "logits_per_char": -0.7586516737937927, "num_chars": 2}, {"sum_logits": -1.3784514665603638, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.3784514665603638, "logits_per_char": -0.6892257332801819, "num_chars": 2}, {"sum_logits": -1.4134305715560913, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.4134305715560913, "logits_per_char": -0.7067152857780457, "num_chars": 2}, {"sum_logits": -1.2671643495559692, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": true, "logits_per_token": -1.2671643495559692, "logits_per_char": -0.6335821747779846, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 67, "native_id": 67, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4380501508712769, "incorrect_loss_raw": 1.3782573143641155, "correct_loss_per_char": 0.7190250754356384, "incorrect_loss_per_char": 0.6891286571820577, "correct_loss_per_token": 1.4380501508712769, "incorrect_loss_per_token": 1.3782573143641155, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4380501508712769, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.4380501508712769, "logits_per_char": -0.7190250754356384, "num_chars": 2}, {"sum_logits": -1.387561321258545, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.387561321258545, "logits_per_char": -0.6937806606292725, "num_chars": 2}, {"sum_logits": -1.472913146018982, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.472913146018982, "logits_per_char": -0.736456573009491, "num_chars": 2}, {"sum_logits": -1.2742974758148193, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": true, "logits_per_token": -1.2742974758148193, "logits_per_char": -0.6371487379074097, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 68, "native_id": 68, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5103766918182373, "incorrect_loss_raw": 1.360539197921753, "correct_loss_per_char": 0.7551883459091187, "incorrect_loss_per_char": 0.6802695989608765, "correct_loss_per_token": 1.5103766918182373, "incorrect_loss_per_token": 1.360539197921753, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5103766918182373, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.5103766918182373, "logits_per_char": -0.7551883459091187, "num_chars": 2}, {"sum_logits": -1.2845150232315063, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.2845150232315063, "logits_per_char": -0.6422575116157532, "num_chars": 2}, {"sum_logits": -1.541689157485962, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.541689157485962, "logits_per_char": -0.770844578742981, "num_chars": 2}, {"sum_logits": -1.2554134130477905, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": true, "logits_per_token": -1.2554134130477905, "logits_per_char": -0.6277067065238953, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 69, "native_id": 69, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4986902475357056, "incorrect_loss_raw": 1.3668265342712402, "correct_loss_per_char": 0.7493451237678528, "incorrect_loss_per_char": 0.6834132671356201, "correct_loss_per_token": 1.4986902475357056, "incorrect_loss_per_token": 1.3668265342712402, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4986902475357056, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": false, "logits_per_token": -1.4986902475357056, "logits_per_char": -0.7493451237678528, "num_chars": 2}, {"sum_logits": -1.2057111263275146, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": true, "logits_per_token": -1.2057111263275146, "logits_per_char": -0.6028555631637573, "num_chars": 2}, {"sum_logits": -1.561126947402954, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": false, "logits_per_token": -1.561126947402954, "logits_per_char": -0.780563473701477, "num_chars": 2}, {"sum_logits": -1.333641529083252, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": false, "logits_per_token": -1.333641529083252, "logits_per_char": -0.666820764541626, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 70, "native_id": 70, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2785179615020752, "incorrect_loss_raw": 1.4360779126485188, "correct_loss_per_char": 0.6392589807510376, "incorrect_loss_per_char": 0.7180389563242594, "correct_loss_per_token": 1.2785179615020752, "incorrect_loss_per_token": 1.4360779126485188, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5610498189926147, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.5610498189926147, "logits_per_char": -0.7805249094963074, "num_chars": 2}, {"sum_logits": -1.2781016826629639, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": true, "logits_per_token": -1.2781016826629639, "logits_per_char": -0.6390508413314819, "num_chars": 2}, {"sum_logits": -1.469082236289978, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.469082236289978, "logits_per_char": -0.734541118144989, "num_chars": 2}, {"sum_logits": -1.2785179615020752, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.2785179615020752, "logits_per_char": -0.6392589807510376, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 71, "native_id": 71, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6302589178085327, "incorrect_loss_raw": 1.3310244878133137, "correct_loss_per_char": 0.8151294589042664, "incorrect_loss_per_char": 0.6655122439066569, "correct_loss_per_token": 1.6302589178085327, "incorrect_loss_per_token": 1.3310244878133137, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5352060794830322, "num_tokens": 1, "num_tokens_all": 501, "is_greedy": false, "logits_per_token": -1.5352060794830322, "logits_per_char": -0.7676030397415161, "num_chars": 2}, {"sum_logits": -1.2403268814086914, "num_tokens": 1, "num_tokens_all": 501, "is_greedy": false, "logits_per_token": -1.2403268814086914, "logits_per_char": -0.6201634407043457, "num_chars": 2}, {"sum_logits": -1.6302589178085327, "num_tokens": 1, "num_tokens_all": 501, "is_greedy": false, "logits_per_token": -1.6302589178085327, "logits_per_char": -0.8151294589042664, "num_chars": 2}, {"sum_logits": -1.2175405025482178, "num_tokens": 1, "num_tokens_all": 501, "is_greedy": true, "logits_per_token": -1.2175405025482178, "logits_per_char": -0.6087702512741089, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 72, "native_id": 72, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3633050918579102, "incorrect_loss_raw": 1.426581343015035, "correct_loss_per_char": 0.6816525459289551, "incorrect_loss_per_char": 0.7132906715075175, "correct_loss_per_token": 1.3633050918579102, "incorrect_loss_per_token": 1.426581343015035, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.763999342918396, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.763999342918396, "logits_per_char": -0.881999671459198, "num_chars": 2}, {"sum_logits": -1.3170783519744873, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.3170783519744873, "logits_per_char": -0.6585391759872437, "num_chars": 2}, {"sum_logits": -1.3633050918579102, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.3633050918579102, "logits_per_char": -0.6816525459289551, "num_chars": 2}, {"sum_logits": -1.1986663341522217, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": true, "logits_per_token": -1.1986663341522217, "logits_per_char": -0.5993331670761108, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 73, "native_id": 73, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3724665641784668, "incorrect_loss_raw": 1.4024688402811687, "correct_loss_per_char": 0.6862332820892334, "incorrect_loss_per_char": 0.7012344201405843, "correct_loss_per_token": 1.3724665641784668, "incorrect_loss_per_token": 1.4024688402811687, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5016052722930908, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.5016052722930908, "logits_per_char": -0.7508026361465454, "num_chars": 2}, {"sum_logits": -1.3724665641784668, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.3724665641784668, "logits_per_char": -0.6862332820892334, "num_chars": 2}, {"sum_logits": -1.4647431373596191, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.4647431373596191, "logits_per_char": -0.7323715686798096, "num_chars": 2}, {"sum_logits": -1.241058111190796, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": true, "logits_per_token": -1.241058111190796, "logits_per_char": -0.620529055595398, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 74, "native_id": 74, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4913853406906128, "incorrect_loss_raw": 1.3712632656097412, "correct_loss_per_char": 0.7456926703453064, "incorrect_loss_per_char": 0.6856316328048706, "correct_loss_per_token": 1.4913853406906128, "incorrect_loss_per_token": 1.3712632656097412, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6112620830535889, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.6112620830535889, "logits_per_char": -0.8056310415267944, "num_chars": 2}, {"sum_logits": -1.2767564058303833, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.2767564058303833, "logits_per_char": -0.6383782029151917, "num_chars": 2}, {"sum_logits": -1.4913853406906128, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.4913853406906128, "logits_per_char": -0.7456926703453064, "num_chars": 2}, {"sum_logits": -1.2257713079452515, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": true, "logits_per_token": -1.2257713079452515, "logits_per_char": -0.6128856539726257, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 75, "native_id": 75, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5780833959579468, "incorrect_loss_raw": 1.3398456970850627, "correct_loss_per_char": 0.7890416979789734, "incorrect_loss_per_char": 0.6699228485425314, "correct_loss_per_token": 1.5780833959579468, "incorrect_loss_per_token": 1.3398456970850627, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4582610130310059, "num_tokens": 1, "num_tokens_all": 466, "is_greedy": false, "logits_per_token": -1.4582610130310059, "logits_per_char": -0.7291305065155029, "num_chars": 2}, {"sum_logits": -1.2133536338806152, "num_tokens": 1, "num_tokens_all": 466, "is_greedy": true, "logits_per_token": -1.2133536338806152, "logits_per_char": -0.6066768169403076, "num_chars": 2}, {"sum_logits": -1.5780833959579468, "num_tokens": 1, "num_tokens_all": 466, "is_greedy": false, "logits_per_token": -1.5780833959579468, "logits_per_char": -0.7890416979789734, "num_chars": 2}, {"sum_logits": -1.347922444343567, "num_tokens": 1, "num_tokens_all": 466, "is_greedy": false, "logits_per_token": -1.347922444343567, "logits_per_char": -0.6739612221717834, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 76, "native_id": 76, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3633050918579102, "incorrect_loss_raw": 1.426581343015035, "correct_loss_per_char": 0.6816525459289551, "incorrect_loss_per_char": 0.7132906715075175, "correct_loss_per_token": 1.3633050918579102, "incorrect_loss_per_token": 1.426581343015035, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.763999342918396, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.763999342918396, "logits_per_char": -0.881999671459198, "num_chars": 2}, {"sum_logits": -1.3170783519744873, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.3170783519744873, "logits_per_char": -0.6585391759872437, "num_chars": 2}, {"sum_logits": -1.3633050918579102, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.3633050918579102, "logits_per_char": -0.6816525459289551, "num_chars": 2}, {"sum_logits": -1.1986663341522217, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": true, "logits_per_token": -1.1986663341522217, "logits_per_char": -0.5993331670761108, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 77, "native_id": 77, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3046305179595947, "incorrect_loss_raw": 1.4283267656962078, "correct_loss_per_char": 0.6523152589797974, "incorrect_loss_per_char": 0.7141633828481039, "correct_loss_per_token": 1.3046305179595947, "incorrect_loss_per_token": 1.4283267656962078, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.535867691040039, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.535867691040039, "logits_per_char": -0.7679338455200195, "num_chars": 2}, {"sum_logits": -1.2726435661315918, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": true, "logits_per_token": -1.2726435661315918, "logits_per_char": -0.6363217830657959, "num_chars": 2}, {"sum_logits": -1.4764690399169922, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.4764690399169922, "logits_per_char": -0.7382345199584961, "num_chars": 2}, {"sum_logits": -1.3046305179595947, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.3046305179595947, "logits_per_char": -0.6523152589797974, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 78, "native_id": 78, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3472381830215454, "incorrect_loss_raw": 1.4105456272761028, "correct_loss_per_char": 0.6736190915107727, "incorrect_loss_per_char": 0.7052728136380514, "correct_loss_per_token": 1.3472381830215454, "incorrect_loss_per_token": 1.4105456272761028, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5206574201583862, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.5206574201583862, "logits_per_char": -0.7603287100791931, "num_chars": 2}, {"sum_logits": -1.25180983543396, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": true, "logits_per_token": -1.25180983543396, "logits_per_char": -0.62590491771698, "num_chars": 2}, {"sum_logits": -1.459169626235962, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.459169626235962, "logits_per_char": -0.729584813117981, "num_chars": 2}, {"sum_logits": -1.3472381830215454, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.3472381830215454, "logits_per_char": -0.6736190915107727, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 79, "native_id": 79, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4359854459762573, "incorrect_loss_raw": 1.400887171427409, "correct_loss_per_char": 0.7179927229881287, "incorrect_loss_per_char": 0.7004435857137045, "correct_loss_per_token": 1.4359854459762573, "incorrect_loss_per_token": 1.400887171427409, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7136740684509277, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.7136740684509277, "logits_per_char": -0.8568370342254639, "num_chars": 2}, {"sum_logits": -1.333456039428711, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.333456039428711, "logits_per_char": -0.6667280197143555, "num_chars": 2}, {"sum_logits": -1.4359854459762573, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.4359854459762573, "logits_per_char": -0.7179927229881287, "num_chars": 2}, {"sum_logits": -1.155531406402588, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": true, "logits_per_token": -1.155531406402588, "logits_per_char": -0.577765703201294, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 80, "native_id": 80, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4948310852050781, "incorrect_loss_raw": 1.3652775287628174, "correct_loss_per_char": 0.7474155426025391, "incorrect_loss_per_char": 0.6826387643814087, "correct_loss_per_token": 1.4948310852050781, "incorrect_loss_per_token": 1.3652775287628174, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4948310852050781, "num_tokens": 1, "num_tokens_all": 483, "is_greedy": false, "logits_per_token": -1.4948310852050781, "logits_per_char": -0.7474155426025391, "num_chars": 2}, {"sum_logits": -1.5055172443389893, "num_tokens": 1, "num_tokens_all": 483, "is_greedy": false, "logits_per_token": -1.5055172443389893, "logits_per_char": -0.7527586221694946, "num_chars": 2}, {"sum_logits": -1.337844967842102, "num_tokens": 1, "num_tokens_all": 483, "is_greedy": false, "logits_per_token": -1.337844967842102, "logits_per_char": -0.668922483921051, "num_chars": 2}, {"sum_logits": -1.2524703741073608, "num_tokens": 1, "num_tokens_all": 483, "is_greedy": true, "logits_per_token": -1.2524703741073608, "logits_per_char": -0.6262351870536804, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 81, "native_id": 81, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3422203063964844, "incorrect_loss_raw": 1.4129903316497803, "correct_loss_per_char": 0.6711101531982422, "incorrect_loss_per_char": 0.7064951658248901, "correct_loss_per_token": 1.3422203063964844, "incorrect_loss_per_token": 1.4129903316497803, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4368035793304443, "num_tokens": 1, "num_tokens_all": 474, "is_greedy": false, "logits_per_token": -1.4368035793304443, "logits_per_char": -0.7184017896652222, "num_chars": 2}, {"sum_logits": -1.3422203063964844, "num_tokens": 1, "num_tokens_all": 474, "is_greedy": false, "logits_per_token": -1.3422203063964844, "logits_per_char": -0.6711101531982422, "num_chars": 2}, {"sum_logits": -1.5074986219406128, "num_tokens": 1, "num_tokens_all": 474, "is_greedy": false, "logits_per_token": -1.5074986219406128, "logits_per_char": -0.7537493109703064, "num_chars": 2}, {"sum_logits": -1.2946687936782837, "num_tokens": 1, "num_tokens_all": 474, "is_greedy": true, "logits_per_token": -1.2946687936782837, "logits_per_char": -0.6473343968391418, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 82, "native_id": 82, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4950060844421387, "incorrect_loss_raw": 1.3680081764856975, "correct_loss_per_char": 0.7475030422210693, "incorrect_loss_per_char": 0.6840040882428488, "correct_loss_per_token": 1.4950060844421387, "incorrect_loss_per_token": 1.3680081764856975, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4950060844421387, "num_tokens": 1, "num_tokens_all": 447, "is_greedy": false, "logits_per_token": -1.4950060844421387, "logits_per_char": -0.7475030422210693, "num_chars": 2}, {"sum_logits": -1.15909743309021, "num_tokens": 1, "num_tokens_all": 447, "is_greedy": true, "logits_per_token": -1.15909743309021, "logits_per_char": -0.579548716545105, "num_chars": 2}, {"sum_logits": -1.4566272497177124, "num_tokens": 1, "num_tokens_all": 447, "is_greedy": false, "logits_per_token": -1.4566272497177124, "logits_per_char": -0.7283136248588562, "num_chars": 2}, {"sum_logits": -1.48829984664917, "num_tokens": 1, "num_tokens_all": 447, "is_greedy": false, "logits_per_token": -1.48829984664917, "logits_per_char": -0.744149923324585, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 83, "native_id": 83, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2076870203018188, "incorrect_loss_raw": 1.4671514828999836, "correct_loss_per_char": 0.6038435101509094, "incorrect_loss_per_char": 0.7335757414499918, "correct_loss_per_token": 1.2076870203018188, "incorrect_loss_per_token": 1.4671514828999836, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5722389221191406, "num_tokens": 1, "num_tokens_all": 496, "is_greedy": false, "logits_per_token": -1.5722389221191406, "logits_per_char": -0.7861194610595703, "num_chars": 2}, {"sum_logits": -1.3104710578918457, "num_tokens": 1, "num_tokens_all": 496, "is_greedy": false, "logits_per_token": -1.3104710578918457, "logits_per_char": -0.6552355289459229, "num_chars": 2}, {"sum_logits": -1.5187444686889648, "num_tokens": 1, "num_tokens_all": 496, "is_greedy": false, "logits_per_token": -1.5187444686889648, "logits_per_char": -0.7593722343444824, "num_chars": 2}, {"sum_logits": -1.2076870203018188, "num_tokens": 1, "num_tokens_all": 496, "is_greedy": true, "logits_per_token": -1.2076870203018188, "logits_per_char": -0.6038435101509094, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 84, "native_id": 84, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.414919137954712, "incorrect_loss_raw": 1.396870772043864, "correct_loss_per_char": 0.707459568977356, "incorrect_loss_per_char": 0.698435386021932, "correct_loss_per_token": 1.414919137954712, "incorrect_loss_per_token": 1.396870772043864, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.414919137954712, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.414919137954712, "logits_per_char": -0.707459568977356, "num_chars": 2}, {"sum_logits": -1.1931499242782593, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": true, "logits_per_token": -1.1931499242782593, "logits_per_char": -0.5965749621391296, "num_chars": 2}, {"sum_logits": -1.627519965171814, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.627519965171814, "logits_per_char": -0.813759982585907, "num_chars": 2}, {"sum_logits": -1.3699424266815186, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.3699424266815186, "logits_per_char": -0.6849712133407593, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 85, "native_id": 85, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2452418804168701, "incorrect_loss_raw": 1.4559446970621746, "correct_loss_per_char": 0.6226209402084351, "incorrect_loss_per_char": 0.7279723485310873, "correct_loss_per_token": 1.2452418804168701, "incorrect_loss_per_token": 1.4559446970621746, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6052465438842773, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": false, "logits_per_token": -1.6052465438842773, "logits_per_char": -0.8026232719421387, "num_chars": 2}, {"sum_logits": -1.23677396774292, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": true, "logits_per_token": -1.23677396774292, "logits_per_char": -0.61838698387146, "num_chars": 2}, {"sum_logits": -1.5258135795593262, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": false, "logits_per_token": -1.5258135795593262, "logits_per_char": -0.7629067897796631, "num_chars": 2}, {"sum_logits": -1.2452418804168701, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": false, "logits_per_token": -1.2452418804168701, "logits_per_char": -0.6226209402084351, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 86, "native_id": 86, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.312124490737915, "incorrect_loss_raw": 1.4240609407424927, "correct_loss_per_char": 0.6560622453689575, "incorrect_loss_per_char": 0.7120304703712463, "correct_loss_per_token": 1.312124490737915, "incorrect_loss_per_token": 1.4240609407424927, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.505865216255188, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.505865216255188, "logits_per_char": -0.752932608127594, "num_chars": 2}, {"sum_logits": -1.2842261791229248, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": true, "logits_per_token": -1.2842261791229248, "logits_per_char": -0.6421130895614624, "num_chars": 2}, {"sum_logits": -1.4820914268493652, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.4820914268493652, "logits_per_char": -0.7410457134246826, "num_chars": 2}, {"sum_logits": -1.312124490737915, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.312124490737915, "logits_per_char": -0.6560622453689575, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 87, "native_id": 87, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1968457698822021, "incorrect_loss_raw": 1.4667286078135173, "correct_loss_per_char": 0.5984228849411011, "incorrect_loss_per_char": 0.7333643039067587, "correct_loss_per_token": 1.1968457698822021, "incorrect_loss_per_token": 1.4667286078135173, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5230934619903564, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": false, "logits_per_token": -1.5230934619903564, "logits_per_char": -0.7615467309951782, "num_chars": 2}, {"sum_logits": -1.3680799007415771, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": false, "logits_per_token": -1.3680799007415771, "logits_per_char": -0.6840399503707886, "num_chars": 2}, {"sum_logits": -1.5090124607086182, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": false, "logits_per_token": -1.5090124607086182, "logits_per_char": -0.7545062303543091, "num_chars": 2}, {"sum_logits": -1.1968457698822021, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": true, "logits_per_token": -1.1968457698822021, "logits_per_char": -0.5984228849411011, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 88, "native_id": 88, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2203723192214966, "incorrect_loss_raw": 1.4591072003046672, "correct_loss_per_char": 0.6101861596107483, "incorrect_loss_per_char": 0.7295536001523336, "correct_loss_per_token": 1.2203723192214966, "incorrect_loss_per_token": 1.4591072003046672, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.547489881515503, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": false, "logits_per_token": -1.547489881515503, "logits_per_char": -0.7737449407577515, "num_chars": 2}, {"sum_logits": -1.3423033952713013, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": false, "logits_per_token": -1.3423033952713013, "logits_per_char": -0.6711516976356506, "num_chars": 2}, {"sum_logits": -1.4875283241271973, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": false, "logits_per_token": -1.4875283241271973, "logits_per_char": -0.7437641620635986, "num_chars": 2}, {"sum_logits": -1.2203723192214966, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": true, "logits_per_token": -1.2203723192214966, "logits_per_char": -0.6101861596107483, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 89, "native_id": 89, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.414919137954712, "incorrect_loss_raw": 1.396870772043864, "correct_loss_per_char": 0.707459568977356, "incorrect_loss_per_char": 0.698435386021932, "correct_loss_per_token": 1.414919137954712, "incorrect_loss_per_token": 1.396870772043864, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.414919137954712, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.414919137954712, "logits_per_char": -0.707459568977356, "num_chars": 2}, {"sum_logits": -1.1931499242782593, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": true, "logits_per_token": -1.1931499242782593, "logits_per_char": -0.5965749621391296, "num_chars": 2}, {"sum_logits": -1.627519965171814, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.627519965171814, "logits_per_char": -0.813759982585907, "num_chars": 2}, {"sum_logits": -1.3699424266815186, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.3699424266815186, "logits_per_char": -0.6849712133407593, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 90, "native_id": 90, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3111064434051514, "incorrect_loss_raw": 1.4305883646011353, "correct_loss_per_char": 0.6555532217025757, "incorrect_loss_per_char": 0.7152941823005676, "correct_loss_per_token": 1.3111064434051514, "incorrect_loss_per_token": 1.4305883646011353, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6154577732086182, "num_tokens": 1, "num_tokens_all": 462, "is_greedy": false, "logits_per_token": -1.6154577732086182, "logits_per_char": -0.8077288866043091, "num_chars": 2}, {"sum_logits": -1.3111064434051514, "num_tokens": 1, "num_tokens_all": 462, "is_greedy": false, "logits_per_token": -1.3111064434051514, "logits_per_char": -0.6555532217025757, "num_chars": 2}, {"sum_logits": -1.443202018737793, "num_tokens": 1, "num_tokens_all": 462, "is_greedy": false, "logits_per_token": -1.443202018737793, "logits_per_char": -0.7216010093688965, "num_chars": 2}, {"sum_logits": -1.2331053018569946, "num_tokens": 1, "num_tokens_all": 462, "is_greedy": true, "logits_per_token": -1.2331053018569946, "logits_per_char": -0.6165526509284973, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 91, "native_id": 91, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5143327713012695, "incorrect_loss_raw": 1.3573914368947346, "correct_loss_per_char": 0.7571663856506348, "incorrect_loss_per_char": 0.6786957184473673, "correct_loss_per_token": 1.5143327713012695, "incorrect_loss_per_token": 1.3573914368947346, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5170063972473145, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": false, "logits_per_token": -1.5170063972473145, "logits_per_char": -0.7585031986236572, "num_chars": 2}, {"sum_logits": -1.2839553356170654, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": false, "logits_per_token": -1.2839553356170654, "logits_per_char": -0.6419776678085327, "num_chars": 2}, {"sum_logits": -1.5143327713012695, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": false, "logits_per_token": -1.5143327713012695, "logits_per_char": -0.7571663856506348, "num_chars": 2}, {"sum_logits": -1.2712125778198242, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": true, "logits_per_token": -1.2712125778198242, "logits_per_char": -0.6356062889099121, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 92, "native_id": 92, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4196064472198486, "incorrect_loss_raw": 1.38169527053833, "correct_loss_per_char": 0.7098032236099243, "incorrect_loss_per_char": 0.690847635269165, "correct_loss_per_token": 1.4196064472198486, "incorrect_loss_per_token": 1.38169527053833, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4196064472198486, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.4196064472198486, "logits_per_char": -0.7098032236099243, "num_chars": 2}, {"sum_logits": -1.303558349609375, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": true, "logits_per_token": -1.303558349609375, "logits_per_char": -0.6517791748046875, "num_chars": 2}, {"sum_logits": -1.4776068925857544, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.4776068925857544, "logits_per_char": -0.7388034462928772, "num_chars": 2}, {"sum_logits": -1.3639205694198608, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.3639205694198608, "logits_per_char": -0.6819602847099304, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 93, "native_id": 93, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3983062505722046, "incorrect_loss_raw": 1.3955403963724773, "correct_loss_per_char": 0.6991531252861023, "incorrect_loss_per_char": 0.6977701981862386, "correct_loss_per_token": 1.3983062505722046, "incorrect_loss_per_token": 1.3955403963724773, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5679186582565308, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.5679186582565308, "logits_per_char": -0.7839593291282654, "num_chars": 2}, {"sum_logits": -1.3428828716278076, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.3428828716278076, "logits_per_char": -0.6714414358139038, "num_chars": 2}, {"sum_logits": -1.3983062505722046, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.3983062505722046, "logits_per_char": -0.6991531252861023, "num_chars": 2}, {"sum_logits": -1.2758196592330933, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": true, "logits_per_token": -1.2758196592330933, "logits_per_char": -0.6379098296165466, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 94, "native_id": 94, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5871678590774536, "incorrect_loss_raw": 1.3414260149002075, "correct_loss_per_char": 0.7935839295387268, "incorrect_loss_per_char": 0.6707130074501038, "correct_loss_per_token": 1.5871678590774536, "incorrect_loss_per_token": 1.3414260149002075, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5030277967453003, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.5030277967453003, "logits_per_char": -0.7515138983726501, "num_chars": 2}, {"sum_logits": -1.3304102420806885, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.3304102420806885, "logits_per_char": -0.6652051210403442, "num_chars": 2}, {"sum_logits": -1.5871678590774536, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.5871678590774536, "logits_per_char": -0.7935839295387268, "num_chars": 2}, {"sum_logits": -1.1908400058746338, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": true, "logits_per_token": -1.1908400058746338, "logits_per_char": -0.5954200029373169, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 95, "native_id": 95, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1109486818313599, "incorrect_loss_raw": 1.5082002480824788, "correct_loss_per_char": 0.5554743409156799, "incorrect_loss_per_char": 0.7541001240412394, "correct_loss_per_token": 1.1109486818313599, "incorrect_loss_per_token": 1.5082002480824788, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6310350894927979, "num_tokens": 1, "num_tokens_all": 486, "is_greedy": false, "logits_per_token": -1.6310350894927979, "logits_per_char": -0.8155175447463989, "num_chars": 2}, {"sum_logits": -1.422995686531067, "num_tokens": 1, "num_tokens_all": 486, "is_greedy": false, "logits_per_token": -1.422995686531067, "logits_per_char": -0.7114978432655334, "num_chars": 2}, {"sum_logits": -1.4705699682235718, "num_tokens": 1, "num_tokens_all": 486, "is_greedy": false, "logits_per_token": -1.4705699682235718, "logits_per_char": -0.7352849841117859, "num_chars": 2}, {"sum_logits": -1.1109486818313599, "num_tokens": 1, "num_tokens_all": 486, "is_greedy": true, "logits_per_token": -1.1109486818313599, "logits_per_char": -0.5554743409156799, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 96, "native_id": 96, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.430153489112854, "incorrect_loss_raw": 1.3845887978871663, "correct_loss_per_char": 0.715076744556427, "incorrect_loss_per_char": 0.6922943989435831, "correct_loss_per_token": 1.430153489112854, "incorrect_loss_per_token": 1.3845887978871663, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5418140888214111, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": false, "logits_per_token": -1.5418140888214111, "logits_per_char": -0.7709070444107056, "num_chars": 2}, {"sum_logits": -1.376583218574524, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": false, "logits_per_token": -1.376583218574524, "logits_per_char": -0.688291609287262, "num_chars": 2}, {"sum_logits": -1.430153489112854, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": false, "logits_per_token": -1.430153489112854, "logits_per_char": -0.715076744556427, "num_chars": 2}, {"sum_logits": -1.235369086265564, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": true, "logits_per_token": -1.235369086265564, "logits_per_char": -0.617684543132782, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 97, "native_id": 97, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5454140901565552, "incorrect_loss_raw": 1.3480912446975708, "correct_loss_per_char": 0.7727070450782776, "incorrect_loss_per_char": 0.6740456223487854, "correct_loss_per_token": 1.5454140901565552, "incorrect_loss_per_token": 1.3480912446975708, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4646297693252563, "num_tokens": 1, "num_tokens_all": 480, "is_greedy": false, "logits_per_token": -1.4646297693252563, "logits_per_char": -0.7323148846626282, "num_chars": 2}, {"sum_logits": -1.3460662364959717, "num_tokens": 1, "num_tokens_all": 480, "is_greedy": false, "logits_per_token": -1.3460662364959717, "logits_per_char": -0.6730331182479858, "num_chars": 2}, {"sum_logits": -1.5454140901565552, "num_tokens": 1, "num_tokens_all": 480, "is_greedy": false, "logits_per_token": -1.5454140901565552, "logits_per_char": -0.7727070450782776, "num_chars": 2}, {"sum_logits": -1.2335777282714844, "num_tokens": 1, "num_tokens_all": 480, "is_greedy": true, "logits_per_token": -1.2335777282714844, "logits_per_char": -0.6167888641357422, "num_chars": 2}], "label": 2, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 98, "native_id": 98, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5735087394714355, "incorrect_loss_raw": 1.3436229228973389, "correct_loss_per_char": 0.7867543697357178, "incorrect_loss_per_char": 0.6718114614486694, "correct_loss_per_token": 1.5735087394714355, "incorrect_loss_per_token": 1.3436229228973389, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5735087394714355, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": false, "logits_per_token": -1.5735087394714355, "logits_per_char": -0.7867543697357178, "num_chars": 2}, {"sum_logits": -1.3200373649597168, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": false, "logits_per_token": -1.3200373649597168, "logits_per_char": -0.6600186824798584, "num_chars": 2}, {"sum_logits": -1.5055049657821655, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": false, "logits_per_token": -1.5055049657821655, "logits_per_char": -0.7527524828910828, "num_chars": 2}, {"sum_logits": -1.2053264379501343, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": true, "logits_per_token": -1.2053264379501343, "logits_per_char": -0.6026632189750671, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 99, "native_id": 99, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3806700706481934, "incorrect_loss_raw": 1.3960463603337605, "correct_loss_per_char": 0.6903350353240967, "incorrect_loss_per_char": 0.6980231801668803, "correct_loss_per_token": 1.3806700706481934, "incorrect_loss_per_token": 1.3960463603337605, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4330074787139893, "num_tokens": 1, "num_tokens_all": 488, "is_greedy": false, "logits_per_token": -1.4330074787139893, "logits_per_char": -0.7165037393569946, "num_chars": 2}, {"sum_logits": -1.3806700706481934, "num_tokens": 1, "num_tokens_all": 488, "is_greedy": false, "logits_per_token": -1.3806700706481934, "logits_per_char": -0.6903350353240967, "num_chars": 2}, {"sum_logits": -1.4573017358779907, "num_tokens": 1, "num_tokens_all": 488, "is_greedy": false, "logits_per_token": -1.4573017358779907, "logits_per_char": -0.7286508679389954, "num_chars": 2}, {"sum_logits": -1.2978298664093018, "num_tokens": 1, "num_tokens_all": 488, "is_greedy": true, "logits_per_token": -1.2978298664093018, "logits_per_char": -0.6489149332046509, "num_chars": 2}], "label": 1, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 100, "native_id": 100, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5046310424804688, "incorrect_loss_raw": 1.369724154472351, "correct_loss_per_char": 0.7523155212402344, "incorrect_loss_per_char": 0.6848620772361755, "correct_loss_per_token": 1.5046310424804688, "incorrect_loss_per_token": 1.369724154472351, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5046310424804688, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.5046310424804688, "logits_per_char": -0.7523155212402344, "num_chars": 2}, {"sum_logits": -1.3056397438049316, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.3056397438049316, "logits_per_char": -0.6528198719024658, "num_chars": 2}, {"sum_logits": -1.606218695640564, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.606218695640564, "logits_per_char": -0.803109347820282, "num_chars": 2}, {"sum_logits": -1.1973140239715576, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": true, "logits_per_token": -1.1973140239715576, "logits_per_char": -0.5986570119857788, "num_chars": 2}], "label": 0, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 101, "native_id": 101, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.242026448249817, "incorrect_loss_raw": 1.455142656962077, "correct_loss_per_char": 0.6210132241249084, "incorrect_loss_per_char": 0.7275713284810384, "correct_loss_per_token": 1.242026448249817, "incorrect_loss_per_token": 1.455142656962077, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5408982038497925, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": false, "logits_per_token": -1.5408982038497925, "logits_per_char": -0.7704491019248962, "num_chars": 2}, {"sum_logits": -1.2619202136993408, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": false, "logits_per_token": -1.2619202136993408, "logits_per_char": -0.6309601068496704, "num_chars": 2}, {"sum_logits": -1.5626095533370972, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": false, "logits_per_token": -1.5626095533370972, "logits_per_char": -0.7813047766685486, "num_chars": 2}, {"sum_logits": -1.242026448249817, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": true, "logits_per_token": -1.242026448249817, "logits_per_char": -0.6210132241249084, "num_chars": 2}], "label": 3, "task_hash": "2c97b2d8aac8dff8cd2656474c1dfb86", "model_hash": "03418cf8091a9882619950ffb07429a5"}