{"doc_id": 0, "native_id": 0, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5584678649902344, "incorrect_loss_raw": 1.3620541493097942, "correct_loss_per_char": 0.7792339324951172, "incorrect_loss_per_char": 0.6810270746548971, "correct_loss_per_token": 1.5584678649902344, "incorrect_loss_per_token": 1.3620541493097942, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5341626405715942, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.5341626405715942, "logits_per_char": -0.7670813202857971, "num_chars": 2}, {"sum_logits": -1.5584678649902344, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.5584678649902344, "logits_per_char": -0.7792339324951172, "num_chars": 2}, {"sum_logits": -1.4637404680252075, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": false, "logits_per_token": -1.4637404680252075, "logits_per_char": -0.7318702340126038, "num_chars": 2}, {"sum_logits": -1.0882593393325806, "num_tokens": 1, "num_tokens_all": 584, "is_greedy": true, "logits_per_token": -1.0882593393325806, "logits_per_char": -0.5441296696662903, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 1, "native_id": 1, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2451343536376953, "incorrect_loss_raw": 1.4528456528981526, "correct_loss_per_char": 0.6225671768188477, "incorrect_loss_per_char": 0.7264228264490763, "correct_loss_per_token": 1.2451343536376953, "incorrect_loss_per_token": 1.4528456528981526, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.511372685432434, "num_tokens": 1, "num_tokens_all": 455, "is_greedy": false, "logits_per_token": -1.511372685432434, "logits_per_char": -0.755686342716217, "num_chars": 2}, {"sum_logits": -1.4381133317947388, "num_tokens": 1, "num_tokens_all": 455, "is_greedy": false, "logits_per_token": -1.4381133317947388, "logits_per_char": -0.7190566658973694, "num_chars": 2}, {"sum_logits": -1.4090509414672852, "num_tokens": 1, "num_tokens_all": 455, "is_greedy": false, "logits_per_token": -1.4090509414672852, "logits_per_char": -0.7045254707336426, "num_chars": 2}, {"sum_logits": -1.2451343536376953, "num_tokens": 1, "num_tokens_all": 455, "is_greedy": true, "logits_per_token": -1.2451343536376953, "logits_per_char": -0.6225671768188477, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 2, "native_id": 2, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4695875644683838, "incorrect_loss_raw": 1.4592355489730835, "correct_loss_per_char": 0.7347937822341919, "incorrect_loss_per_char": 0.7296177744865417, "correct_loss_per_token": 1.4695875644683838, "incorrect_loss_per_token": 1.4592355489730835, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3970566987991333, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": true, "logits_per_token": -1.3970566987991333, "logits_per_char": -0.6985283493995667, "num_chars": 2}, {"sum_logits": -1.4946931600570679, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": false, "logits_per_token": -1.4946931600570679, "logits_per_char": -0.7473465800285339, "num_chars": 2}, {"sum_logits": -1.4859567880630493, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": false, "logits_per_token": -1.4859567880630493, "logits_per_char": -0.7429783940315247, "num_chars": 2}, {"sum_logits": -1.4695875644683838, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": false, "logits_per_token": -1.4695875644683838, "logits_per_char": -0.7347937822341919, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 3, "native_id": 3, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3816561698913574, "incorrect_loss_raw": 1.396674354871114, "correct_loss_per_char": 0.6908280849456787, "incorrect_loss_per_char": 0.698337177435557, "correct_loss_per_token": 1.3816561698913574, "incorrect_loss_per_token": 1.396674354871114, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2977908849716187, "num_tokens": 1, "num_tokens_all": 462, "is_greedy": true, "logits_per_token": -1.2977908849716187, "logits_per_char": -0.6488954424858093, "num_chars": 2}, {"sum_logits": -1.4867233037948608, "num_tokens": 1, "num_tokens_all": 462, "is_greedy": false, "logits_per_token": -1.4867233037948608, "logits_per_char": -0.7433616518974304, "num_chars": 2}, {"sum_logits": -1.4055088758468628, "num_tokens": 1, "num_tokens_all": 462, "is_greedy": false, "logits_per_token": -1.4055088758468628, "logits_per_char": -0.7027544379234314, "num_chars": 2}, {"sum_logits": -1.3816561698913574, "num_tokens": 1, "num_tokens_all": 462, "is_greedy": false, "logits_per_token": -1.3816561698913574, "logits_per_char": -0.6908280849456787, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 4, "native_id": 4, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5896363258361816, "incorrect_loss_raw": 1.3448867400487263, "correct_loss_per_char": 0.7948181629180908, "incorrect_loss_per_char": 0.6724433700243632, "correct_loss_per_token": 1.5896363258361816, "incorrect_loss_per_token": 1.3448867400487263, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5896363258361816, "num_tokens": 1, "num_tokens_all": 528, "is_greedy": false, "logits_per_token": -1.5896363258361816, "logits_per_char": -0.7948181629180908, "num_chars": 2}, {"sum_logits": -1.3893039226531982, "num_tokens": 1, "num_tokens_all": 528, "is_greedy": false, "logits_per_token": -1.3893039226531982, "logits_per_char": -0.6946519613265991, "num_chars": 2}, {"sum_logits": -1.49875807762146, "num_tokens": 1, "num_tokens_all": 528, "is_greedy": false, "logits_per_token": -1.49875807762146, "logits_per_char": -0.74937903881073, "num_chars": 2}, {"sum_logits": -1.146598219871521, "num_tokens": 1, "num_tokens_all": 528, "is_greedy": true, "logits_per_token": -1.146598219871521, "logits_per_char": -0.5732991099357605, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 5, "native_id": 5, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5708640813827515, "incorrect_loss_raw": 1.3510974248250325, "correct_loss_per_char": 0.7854320406913757, "incorrect_loss_per_char": 0.6755487124125162, "correct_loss_per_token": 1.5708640813827515, "incorrect_loss_per_token": 1.3510974248250325, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1678211688995361, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": true, "logits_per_token": -1.1678211688995361, "logits_per_char": -0.5839105844497681, "num_chars": 2}, {"sum_logits": -1.330070972442627, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.330070972442627, "logits_per_char": -0.6650354862213135, "num_chars": 2}, {"sum_logits": -1.5554001331329346, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.5554001331329346, "logits_per_char": -0.7777000665664673, "num_chars": 2}, {"sum_logits": -1.5708640813827515, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.5708640813827515, "logits_per_char": -0.7854320406913757, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 6, "native_id": 6, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.32494056224823, "incorrect_loss_raw": 1.4206875960032146, "correct_loss_per_char": 0.662470281124115, "incorrect_loss_per_char": 0.7103437980016073, "correct_loss_per_token": 1.32494056224823, "incorrect_loss_per_token": 1.4206875960032146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.32494056224823, "num_tokens": 1, "num_tokens_all": 1392, "is_greedy": true, "logits_per_token": -1.32494056224823, "logits_per_char": -0.662470281124115, "num_chars": 2}, {"sum_logits": -1.3320895433425903, "num_tokens": 1, "num_tokens_all": 1392, "is_greedy": false, "logits_per_token": -1.3320895433425903, "logits_per_char": -0.6660447716712952, "num_chars": 2}, {"sum_logits": -1.5223525762557983, "num_tokens": 1, "num_tokens_all": 1392, "is_greedy": false, "logits_per_token": -1.5223525762557983, "logits_per_char": -0.7611762881278992, "num_chars": 2}, {"sum_logits": -1.4076206684112549, "num_tokens": 1, "num_tokens_all": 1392, "is_greedy": false, "logits_per_token": -1.4076206684112549, "logits_per_char": -0.7038103342056274, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 7, "native_id": 7, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6627131700515747, "incorrect_loss_raw": 1.3228769302368164, "correct_loss_per_char": 0.8313565850257874, "incorrect_loss_per_char": 0.6614384651184082, "correct_loss_per_token": 1.6627131700515747, "incorrect_loss_per_token": 1.3228769302368164, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1758086681365967, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": true, "logits_per_token": -1.1758086681365967, "logits_per_char": -0.5879043340682983, "num_chars": 2}, {"sum_logits": -1.3131598234176636, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": false, "logits_per_token": -1.3131598234176636, "logits_per_char": -0.6565799117088318, "num_chars": 2}, {"sum_logits": -1.6627131700515747, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": false, "logits_per_token": -1.6627131700515747, "logits_per_char": -0.8313565850257874, "num_chars": 2}, {"sum_logits": -1.479662299156189, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": false, "logits_per_token": -1.479662299156189, "logits_per_char": -0.7398311495780945, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 8, "native_id": 8, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.277909755706787, "incorrect_loss_raw": 1.431893030802409, "correct_loss_per_char": 0.6389548778533936, "incorrect_loss_per_char": 0.7159465154012045, "correct_loss_per_token": 1.277909755706787, "incorrect_loss_per_token": 1.431893030802409, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.277909755706787, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": true, "logits_per_token": -1.277909755706787, "logits_per_char": -0.6389548778533936, "num_chars": 2}, {"sum_logits": -1.4452459812164307, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4452459812164307, "logits_per_char": -0.7226229906082153, "num_chars": 2}, {"sum_logits": -1.3660941123962402, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.3660941123962402, "logits_per_char": -0.6830470561981201, "num_chars": 2}, {"sum_logits": -1.4843389987945557, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4843389987945557, "logits_per_char": -0.7421694993972778, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 9, "native_id": 9, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3196642398834229, "incorrect_loss_raw": 1.4172377983729045, "correct_loss_per_char": 0.6598321199417114, "incorrect_loss_per_char": 0.7086188991864523, "correct_loss_per_token": 1.3196642398834229, "incorrect_loss_per_token": 1.4172377983729045, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3196642398834229, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": true, "logits_per_token": -1.3196642398834229, "logits_per_char": -0.6598321199417114, "num_chars": 2}, {"sum_logits": -1.4230765104293823, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.4230765104293823, "logits_per_char": -0.7115382552146912, "num_chars": 2}, {"sum_logits": -1.460330605506897, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.460330605506897, "logits_per_char": -0.7301653027534485, "num_chars": 2}, {"sum_logits": -1.368306279182434, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.368306279182434, "logits_per_char": -0.684153139591217, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 10, "native_id": 10, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3854598999023438, "incorrect_loss_raw": 1.3962950706481934, "correct_loss_per_char": 0.6927299499511719, "incorrect_loss_per_char": 0.6981475353240967, "correct_loss_per_token": 1.3854598999023438, "incorrect_loss_per_token": 1.3962950706481934, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4433038234710693, "num_tokens": 1, "num_tokens_all": 490, "is_greedy": false, "logits_per_token": -1.4433038234710693, "logits_per_char": -0.7216519117355347, "num_chars": 2}, {"sum_logits": -1.3672125339508057, "num_tokens": 1, "num_tokens_all": 490, "is_greedy": true, "logits_per_token": -1.3672125339508057, "logits_per_char": -0.6836062669754028, "num_chars": 2}, {"sum_logits": -1.3854598999023438, "num_tokens": 1, "num_tokens_all": 490, "is_greedy": false, "logits_per_token": -1.3854598999023438, "logits_per_char": -0.6927299499511719, "num_chars": 2}, {"sum_logits": -1.378368854522705, "num_tokens": 1, "num_tokens_all": 490, "is_greedy": false, "logits_per_token": -1.378368854522705, "logits_per_char": -0.6891844272613525, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 11, "native_id": 11, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.478841781616211, "incorrect_loss_raw": 1.3687097628911336, "correct_loss_per_char": 0.7394208908081055, "incorrect_loss_per_char": 0.6843548814455668, "correct_loss_per_token": 1.478841781616211, "incorrect_loss_per_token": 1.3687097628911336, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.478841781616211, "num_tokens": 1, "num_tokens_all": 531, "is_greedy": false, "logits_per_token": -1.478841781616211, "logits_per_char": -0.7394208908081055, "num_chars": 2}, {"sum_logits": -1.3188341856002808, "num_tokens": 1, "num_tokens_all": 531, "is_greedy": false, "logits_per_token": -1.3188341856002808, "logits_per_char": -0.6594170928001404, "num_chars": 2}, {"sum_logits": -1.4987707138061523, "num_tokens": 1, "num_tokens_all": 531, "is_greedy": false, "logits_per_token": -1.4987707138061523, "logits_per_char": -0.7493853569030762, "num_chars": 2}, {"sum_logits": -1.2885243892669678, "num_tokens": 1, "num_tokens_all": 531, "is_greedy": true, "logits_per_token": -1.2885243892669678, "logits_per_char": -0.6442621946334839, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 12, "native_id": 12, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2535085678100586, "incorrect_loss_raw": 1.4518697261810303, "correct_loss_per_char": 0.6267542839050293, "incorrect_loss_per_char": 0.7259348630905151, "correct_loss_per_token": 1.2535085678100586, "incorrect_loss_per_token": 1.4518697261810303, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3884004354476929, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.3884004354476929, "logits_per_char": -0.6942002177238464, "num_chars": 2}, {"sum_logits": -1.422650933265686, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.422650933265686, "logits_per_char": -0.711325466632843, "num_chars": 2}, {"sum_logits": -1.544557809829712, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.544557809829712, "logits_per_char": -0.772278904914856, "num_chars": 2}, {"sum_logits": -1.2535085678100586, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": true, "logits_per_token": -1.2535085678100586, "logits_per_char": -0.6267542839050293, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 13, "native_id": 13, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4866937398910522, "incorrect_loss_raw": 1.363874872525533, "correct_loss_per_char": 0.7433468699455261, "incorrect_loss_per_char": 0.6819374362627665, "correct_loss_per_token": 1.4866937398910522, "incorrect_loss_per_token": 1.363874872525533, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.295467495918274, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": true, "logits_per_token": -1.295467495918274, "logits_per_char": -0.647733747959137, "num_chars": 2}, {"sum_logits": -1.4866937398910522, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": false, "logits_per_token": -1.4866937398910522, "logits_per_char": -0.7433468699455261, "num_chars": 2}, {"sum_logits": -1.475719928741455, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": false, "logits_per_token": -1.475719928741455, "logits_per_char": -0.7378599643707275, "num_chars": 2}, {"sum_logits": -1.3204371929168701, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": false, "logits_per_token": -1.3204371929168701, "logits_per_char": -0.6602185964584351, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 14, "native_id": 14, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4261687994003296, "incorrect_loss_raw": 1.3876469135284424, "correct_loss_per_char": 0.7130843997001648, "incorrect_loss_per_char": 0.6938234567642212, "correct_loss_per_token": 1.4261687994003296, "incorrect_loss_per_token": 1.3876469135284424, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4797003269195557, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": false, "logits_per_token": -1.4797003269195557, "logits_per_char": -0.7398501634597778, "num_chars": 2}, {"sum_logits": -1.4261687994003296, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": false, "logits_per_token": -1.4261687994003296, "logits_per_char": -0.7130843997001648, "num_chars": 2}, {"sum_logits": -1.4743455648422241, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": false, "logits_per_token": -1.4743455648422241, "logits_per_char": -0.7371727824211121, "num_chars": 2}, {"sum_logits": -1.2088948488235474, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": true, "logits_per_token": -1.2088948488235474, "logits_per_char": -0.6044474244117737, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 15, "native_id": 15, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.370200514793396, "incorrect_loss_raw": 1.4165513912836711, "correct_loss_per_char": 0.685100257396698, "incorrect_loss_per_char": 0.7082756956418356, "correct_loss_per_token": 1.370200514793396, "incorrect_loss_per_token": 1.4165513912836711, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4196611642837524, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": false, "logits_per_token": -1.4196611642837524, "logits_per_char": -0.7098305821418762, "num_chars": 2}, {"sum_logits": -1.370200514793396, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": false, "logits_per_token": -1.370200514793396, "logits_per_char": -0.685100257396698, "num_chars": 2}, {"sum_logits": -1.540717363357544, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": false, "logits_per_token": -1.540717363357544, "logits_per_char": -0.770358681678772, "num_chars": 2}, {"sum_logits": -1.2892756462097168, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": true, "logits_per_token": -1.2892756462097168, "logits_per_char": -0.6446378231048584, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 16, "native_id": 16, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4390922784805298, "incorrect_loss_raw": 1.3821521202723186, "correct_loss_per_char": 0.7195461392402649, "incorrect_loss_per_char": 0.6910760601361593, "correct_loss_per_token": 1.4390922784805298, "incorrect_loss_per_token": 1.3821521202723186, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2223243713378906, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": true, "logits_per_token": -1.2223243713378906, "logits_per_char": -0.6111621856689453, "num_chars": 2}, {"sum_logits": -1.4978370666503906, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.4978370666503906, "logits_per_char": -0.7489185333251953, "num_chars": 2}, {"sum_logits": -1.4390922784805298, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.4390922784805298, "logits_per_char": -0.7195461392402649, "num_chars": 2}, {"sum_logits": -1.4262949228286743, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.4262949228286743, "logits_per_char": -0.7131474614143372, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 17, "native_id": 17, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2815855741500854, "incorrect_loss_raw": 1.4372107187906902, "correct_loss_per_char": 0.6407927870750427, "incorrect_loss_per_char": 0.7186053593953451, "correct_loss_per_token": 1.2815855741500854, "incorrect_loss_per_token": 1.4372107187906902, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3920092582702637, "num_tokens": 1, "num_tokens_all": 513, "is_greedy": false, "logits_per_token": -1.3920092582702637, "logits_per_char": -0.6960046291351318, "num_chars": 2}, {"sum_logits": -1.2815855741500854, "num_tokens": 1, "num_tokens_all": 513, "is_greedy": true, "logits_per_token": -1.2815855741500854, "logits_per_char": -0.6407927870750427, "num_chars": 2}, {"sum_logits": -1.5808714628219604, "num_tokens": 1, "num_tokens_all": 513, "is_greedy": false, "logits_per_token": -1.5808714628219604, "logits_per_char": -0.7904357314109802, "num_chars": 2}, {"sum_logits": -1.3387514352798462, "num_tokens": 1, "num_tokens_all": 513, "is_greedy": false, "logits_per_token": -1.3387514352798462, "logits_per_char": -0.6693757176399231, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 18, "native_id": 18, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3534014225006104, "incorrect_loss_raw": 1.4118282794952393, "correct_loss_per_char": 0.6767007112503052, "incorrect_loss_per_char": 0.7059141397476196, "correct_loss_per_token": 1.3534014225006104, "incorrect_loss_per_token": 1.4118282794952393, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2921398878097534, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": true, "logits_per_token": -1.2921398878097534, "logits_per_char": -0.6460699439048767, "num_chars": 2}, {"sum_logits": -1.3534014225006104, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.3534014225006104, "logits_per_char": -0.6767007112503052, "num_chars": 2}, {"sum_logits": -1.542092204093933, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.542092204093933, "logits_per_char": -0.7710461020469666, "num_chars": 2}, {"sum_logits": -1.4012527465820312, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4012527465820312, "logits_per_char": -0.7006263732910156, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 19, "native_id": 19, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4309757947921753, "incorrect_loss_raw": 1.379763086636861, "correct_loss_per_char": 0.7154878973960876, "incorrect_loss_per_char": 0.6898815433184305, "correct_loss_per_token": 1.4309757947921753, "incorrect_loss_per_token": 1.379763086636861, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3142852783203125, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": true, "logits_per_token": -1.3142852783203125, "logits_per_char": -0.6571426391601562, "num_chars": 2}, {"sum_logits": -1.4309757947921753, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": false, "logits_per_token": -1.4309757947921753, "logits_per_char": -0.7154878973960876, "num_chars": 2}, {"sum_logits": -1.4597671031951904, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": false, "logits_per_token": -1.4597671031951904, "logits_per_char": -0.7298835515975952, "num_chars": 2}, {"sum_logits": -1.3652368783950806, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": false, "logits_per_token": -1.3652368783950806, "logits_per_char": -0.6826184391975403, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 20, "native_id": 20, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4657206535339355, "incorrect_loss_raw": 1.4357870022455852, "correct_loss_per_char": 0.7328603267669678, "incorrect_loss_per_char": 0.7178935011227926, "correct_loss_per_token": 1.4657206535339355, "incorrect_loss_per_token": 1.4357870022455852, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.411729097366333, "num_tokens": 1, "num_tokens_all": 477, "is_greedy": false, "logits_per_token": -1.411729097366333, "logits_per_char": -0.7058645486831665, "num_chars": 2}, {"sum_logits": -1.5518215894699097, "num_tokens": 1, "num_tokens_all": 477, "is_greedy": false, "logits_per_token": -1.5518215894699097, "logits_per_char": -0.7759107947349548, "num_chars": 2}, {"sum_logits": -1.4657206535339355, "num_tokens": 1, "num_tokens_all": 477, "is_greedy": false, "logits_per_token": -1.4657206535339355, "logits_per_char": -0.7328603267669678, "num_chars": 2}, {"sum_logits": -1.3438103199005127, "num_tokens": 1, "num_tokens_all": 477, "is_greedy": true, "logits_per_token": -1.3438103199005127, "logits_per_char": -0.6719051599502563, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 21, "native_id": 21, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5058389902114868, "incorrect_loss_raw": 1.3661928176879883, "correct_loss_per_char": 0.7529194951057434, "incorrect_loss_per_char": 0.6830964088439941, "correct_loss_per_token": 1.5058389902114868, "incorrect_loss_per_token": 1.3661928176879883, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1614277362823486, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": true, "logits_per_token": -1.1614277362823486, "logits_per_char": -0.5807138681411743, "num_chars": 2}, {"sum_logits": -1.4376548528671265, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.4376548528671265, "logits_per_char": -0.7188274264335632, "num_chars": 2}, {"sum_logits": -1.4994958639144897, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.4994958639144897, "logits_per_char": -0.7497479319572449, "num_chars": 2}, {"sum_logits": -1.5058389902114868, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.5058389902114868, "logits_per_char": -0.7529194951057434, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 22, "native_id": 22, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4080616235733032, "incorrect_loss_raw": 1.3957430124282837, "correct_loss_per_char": 0.7040308117866516, "incorrect_loss_per_char": 0.6978715062141418, "correct_loss_per_token": 1.4080616235733032, "incorrect_loss_per_token": 1.3957430124282837, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2360601425170898, "num_tokens": 1, "num_tokens_all": 455, "is_greedy": true, "logits_per_token": -1.2360601425170898, "logits_per_char": -0.6180300712585449, "num_chars": 2}, {"sum_logits": -1.4006415605545044, "num_tokens": 1, "num_tokens_all": 455, "is_greedy": false, "logits_per_token": -1.4006415605545044, "logits_per_char": -0.7003207802772522, "num_chars": 2}, {"sum_logits": -1.5505273342132568, "num_tokens": 1, "num_tokens_all": 455, "is_greedy": false, "logits_per_token": -1.5505273342132568, "logits_per_char": -0.7752636671066284, "num_chars": 2}, {"sum_logits": -1.4080616235733032, "num_tokens": 1, "num_tokens_all": 455, "is_greedy": false, "logits_per_token": -1.4080616235733032, "logits_per_char": -0.7040308117866516, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 23, "native_id": 23, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3665412664413452, "incorrect_loss_raw": 1.4150698979695637, "correct_loss_per_char": 0.6832706332206726, "incorrect_loss_per_char": 0.7075349489847819, "correct_loss_per_token": 1.3665412664413452, "incorrect_loss_per_token": 1.4150698979695637, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3665412664413452, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.3665412664413452, "logits_per_char": -0.6832706332206726, "num_chars": 2}, {"sum_logits": -1.5584070682525635, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.5584070682525635, "logits_per_char": -0.7792035341262817, "num_chars": 2}, {"sum_logits": -1.4762401580810547, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.4762401580810547, "logits_per_char": -0.7381200790405273, "num_chars": 2}, {"sum_logits": -1.2105624675750732, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": true, "logits_per_token": -1.2105624675750732, "logits_per_char": -0.6052812337875366, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 24, "native_id": 24, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3390671014785767, "incorrect_loss_raw": 1.4185795783996582, "correct_loss_per_char": 0.6695335507392883, "incorrect_loss_per_char": 0.7092897891998291, "correct_loss_per_token": 1.3390671014785767, "incorrect_loss_per_token": 1.4185795783996582, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2275004386901855, "num_tokens": 1, "num_tokens_all": 477, "is_greedy": true, "logits_per_token": -1.2275004386901855, "logits_per_char": -0.6137502193450928, "num_chars": 2}, {"sum_logits": -1.3390671014785767, "num_tokens": 1, "num_tokens_all": 477, "is_greedy": false, "logits_per_token": -1.3390671014785767, "logits_per_char": -0.6695335507392883, "num_chars": 2}, {"sum_logits": -1.4958856105804443, "num_tokens": 1, "num_tokens_all": 477, "is_greedy": false, "logits_per_token": -1.4958856105804443, "logits_per_char": -0.7479428052902222, "num_chars": 2}, {"sum_logits": -1.5323526859283447, "num_tokens": 1, "num_tokens_all": 477, "is_greedy": false, "logits_per_token": -1.5323526859283447, "logits_per_char": -0.7661763429641724, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 25, "native_id": 25, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3489959239959717, "incorrect_loss_raw": 1.406640648841858, "correct_loss_per_char": 0.6744979619979858, "incorrect_loss_per_char": 0.703320324420929, "correct_loss_per_token": 1.3489959239959717, "incorrect_loss_per_token": 1.406640648841858, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4240988492965698, "num_tokens": 1, "num_tokens_all": 518, "is_greedy": false, "logits_per_token": -1.4240988492965698, "logits_per_char": -0.7120494246482849, "num_chars": 2}, {"sum_logits": -1.4148985147476196, "num_tokens": 1, "num_tokens_all": 518, "is_greedy": false, "logits_per_token": -1.4148985147476196, "logits_per_char": -0.7074492573738098, "num_chars": 2}, {"sum_logits": -1.3489959239959717, "num_tokens": 1, "num_tokens_all": 518, "is_greedy": true, "logits_per_token": -1.3489959239959717, "logits_per_char": -0.6744979619979858, "num_chars": 2}, {"sum_logits": -1.3809245824813843, "num_tokens": 1, "num_tokens_all": 518, "is_greedy": false, "logits_per_token": -1.3809245824813843, "logits_per_char": -0.6904622912406921, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 26, "native_id": 26, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5097390413284302, "incorrect_loss_raw": 1.3569442828496296, "correct_loss_per_char": 0.7548695206642151, "incorrect_loss_per_char": 0.6784721414248148, "correct_loss_per_token": 1.5097390413284302, "incorrect_loss_per_token": 1.3569442828496296, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4006943702697754, "num_tokens": 1, "num_tokens_all": 542, "is_greedy": false, "logits_per_token": -1.4006943702697754, "logits_per_char": -0.7003471851348877, "num_chars": 2}, {"sum_logits": -1.409703016281128, "num_tokens": 1, "num_tokens_all": 542, "is_greedy": false, "logits_per_token": -1.409703016281128, "logits_per_char": -0.704851508140564, "num_chars": 2}, {"sum_logits": -1.5097390413284302, "num_tokens": 1, "num_tokens_all": 542, "is_greedy": false, "logits_per_token": -1.5097390413284302, "logits_per_char": -0.7548695206642151, "num_chars": 2}, {"sum_logits": -1.2604354619979858, "num_tokens": 1, "num_tokens_all": 542, "is_greedy": true, "logits_per_token": -1.2604354619979858, "logits_per_char": -0.6302177309989929, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 27, "native_id": 27, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.348651647567749, "incorrect_loss_raw": 1.4096668561299641, "correct_loss_per_char": 0.6743258237838745, "incorrect_loss_per_char": 0.7048334280649821, "correct_loss_per_token": 1.348651647567749, "incorrect_loss_per_token": 1.4096668561299641, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4420167207717896, "num_tokens": 1, "num_tokens_all": 514, "is_greedy": false, "logits_per_token": -1.4420167207717896, "logits_per_char": -0.7210083603858948, "num_chars": 2}, {"sum_logits": -1.3100529909133911, "num_tokens": 1, "num_tokens_all": 514, "is_greedy": true, "logits_per_token": -1.3100529909133911, "logits_per_char": -0.6550264954566956, "num_chars": 2}, {"sum_logits": -1.476930856704712, "num_tokens": 1, "num_tokens_all": 514, "is_greedy": false, "logits_per_token": -1.476930856704712, "logits_per_char": -0.738465428352356, "num_chars": 2}, {"sum_logits": -1.348651647567749, "num_tokens": 1, "num_tokens_all": 514, "is_greedy": false, "logits_per_token": -1.348651647567749, "logits_per_char": -0.6743258237838745, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 28, "native_id": 28, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4813071489334106, "incorrect_loss_raw": 1.3655145168304443, "correct_loss_per_char": 0.7406535744667053, "incorrect_loss_per_char": 0.6827572584152222, "correct_loss_per_token": 1.4813071489334106, "incorrect_loss_per_token": 1.3655145168304443, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4813071489334106, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.4813071489334106, "logits_per_char": -0.7406535744667053, "num_chars": 2}, {"sum_logits": -1.255352258682251, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": true, "logits_per_token": -1.255352258682251, "logits_per_char": -0.6276761293411255, "num_chars": 2}, {"sum_logits": -1.4598925113677979, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.4598925113677979, "logits_per_char": -0.7299462556838989, "num_chars": 2}, {"sum_logits": -1.3812987804412842, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.3812987804412842, "logits_per_char": -0.6906493902206421, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 29, "native_id": 29, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3546279668807983, "incorrect_loss_raw": 1.4046557744344075, "correct_loss_per_char": 0.6773139834403992, "incorrect_loss_per_char": 0.7023278872172037, "correct_loss_per_token": 1.3546279668807983, "incorrect_loss_per_token": 1.4046557744344075, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3545571565628052, "num_tokens": 1, "num_tokens_all": 528, "is_greedy": true, "logits_per_token": -1.3545571565628052, "logits_per_char": -0.6772785782814026, "num_chars": 2}, {"sum_logits": -1.3969037532806396, "num_tokens": 1, "num_tokens_all": 528, "is_greedy": false, "logits_per_token": -1.3969037532806396, "logits_per_char": -0.6984518766403198, "num_chars": 2}, {"sum_logits": -1.4625064134597778, "num_tokens": 1, "num_tokens_all": 528, "is_greedy": false, "logits_per_token": -1.4625064134597778, "logits_per_char": -0.7312532067298889, "num_chars": 2}, {"sum_logits": -1.3546279668807983, "num_tokens": 1, "num_tokens_all": 528, "is_greedy": false, "logits_per_token": -1.3546279668807983, "logits_per_char": -0.6773139834403992, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 30, "native_id": 30, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2810782194137573, "incorrect_loss_raw": 1.4345624049504597, "correct_loss_per_char": 0.6405391097068787, "incorrect_loss_per_char": 0.7172812024752299, "correct_loss_per_token": 1.2810782194137573, "incorrect_loss_per_token": 1.4345624049504597, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3822522163391113, "num_tokens": 1, "num_tokens_all": 501, "is_greedy": false, "logits_per_token": -1.3822522163391113, "logits_per_char": -0.6911261081695557, "num_chars": 2}, {"sum_logits": -1.3937937021255493, "num_tokens": 1, "num_tokens_all": 501, "is_greedy": false, "logits_per_token": -1.3937937021255493, "logits_per_char": -0.6968968510627747, "num_chars": 2}, {"sum_logits": -1.5276412963867188, "num_tokens": 1, "num_tokens_all": 501, "is_greedy": false, "logits_per_token": -1.5276412963867188, "logits_per_char": -0.7638206481933594, "num_chars": 2}, {"sum_logits": -1.2810782194137573, "num_tokens": 1, "num_tokens_all": 501, "is_greedy": true, "logits_per_token": -1.2810782194137573, "logits_per_char": -0.6405391097068787, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 31, "native_id": 31, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1731011867523193, "incorrect_loss_raw": 1.4816722869873047, "correct_loss_per_char": 0.5865505933761597, "incorrect_loss_per_char": 0.7408361434936523, "correct_loss_per_token": 1.1731011867523193, "incorrect_loss_per_token": 1.4816722869873047, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.543405294418335, "num_tokens": 1, "num_tokens_all": 632, "is_greedy": false, "logits_per_token": -1.543405294418335, "logits_per_char": -0.7717026472091675, "num_chars": 2}, {"sum_logits": -1.1731011867523193, "num_tokens": 1, "num_tokens_all": 632, "is_greedy": true, "logits_per_token": -1.1731011867523193, "logits_per_char": -0.5865505933761597, "num_chars": 2}, {"sum_logits": -1.5597046613693237, "num_tokens": 1, "num_tokens_all": 632, "is_greedy": false, "logits_per_token": -1.5597046613693237, "logits_per_char": -0.7798523306846619, "num_chars": 2}, {"sum_logits": -1.3419069051742554, "num_tokens": 1, "num_tokens_all": 632, "is_greedy": false, "logits_per_token": -1.3419069051742554, "logits_per_char": -0.6709534525871277, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 32, "native_id": 32, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5052011013031006, "incorrect_loss_raw": 1.3625189463297527, "correct_loss_per_char": 0.7526005506515503, "incorrect_loss_per_char": 0.6812594731648763, "correct_loss_per_token": 1.5052011013031006, "incorrect_loss_per_token": 1.3625189463297527, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.285205364227295, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": true, "logits_per_token": -1.285205364227295, "logits_per_char": -0.6426026821136475, "num_chars": 2}, {"sum_logits": -1.462512731552124, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": false, "logits_per_token": -1.462512731552124, "logits_per_char": -0.731256365776062, "num_chars": 2}, {"sum_logits": -1.5052011013031006, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": false, "logits_per_token": -1.5052011013031006, "logits_per_char": -0.7526005506515503, "num_chars": 2}, {"sum_logits": -1.3398387432098389, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": false, "logits_per_token": -1.3398387432098389, "logits_per_char": -0.6699193716049194, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 33, "native_id": 33, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4421653747558594, "incorrect_loss_raw": 1.3770289818445842, "correct_loss_per_char": 0.7210826873779297, "incorrect_loss_per_char": 0.6885144909222921, "correct_loss_per_token": 1.4421653747558594, "incorrect_loss_per_token": 1.3770289818445842, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3359699249267578, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": true, "logits_per_token": -1.3359699249267578, "logits_per_char": -0.6679849624633789, "num_chars": 2}, {"sum_logits": -1.3387434482574463, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": false, "logits_per_token": -1.3387434482574463, "logits_per_char": -0.6693717241287231, "num_chars": 2}, {"sum_logits": -1.4563735723495483, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": false, "logits_per_token": -1.4563735723495483, "logits_per_char": -0.7281867861747742, "num_chars": 2}, {"sum_logits": -1.4421653747558594, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": false, "logits_per_token": -1.4421653747558594, "logits_per_char": -0.7210826873779297, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 34, "native_id": 34, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.572906732559204, "incorrect_loss_raw": 1.3444819450378418, "correct_loss_per_char": 0.786453366279602, "incorrect_loss_per_char": 0.6722409725189209, "correct_loss_per_token": 1.572906732559204, "incorrect_loss_per_token": 1.3444819450378418, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1897810697555542, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": true, "logits_per_token": -1.1897810697555542, "logits_per_char": -0.5948905348777771, "num_chars": 2}, {"sum_logits": -1.3707311153411865, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.3707311153411865, "logits_per_char": -0.6853655576705933, "num_chars": 2}, {"sum_logits": -1.4729336500167847, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.4729336500167847, "logits_per_char": -0.7364668250083923, "num_chars": 2}, {"sum_logits": -1.572906732559204, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.572906732559204, "logits_per_char": -0.786453366279602, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 35, "native_id": 35, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3976346254348755, "incorrect_loss_raw": 1.391938050587972, "correct_loss_per_char": 0.6988173127174377, "incorrect_loss_per_char": 0.695969025293986, "correct_loss_per_token": 1.3976346254348755, "incorrect_loss_per_token": 1.391938050587972, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4040278196334839, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.4040278196334839, "logits_per_char": -0.7020139098167419, "num_chars": 2}, {"sum_logits": -1.3976346254348755, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.3976346254348755, "logits_per_char": -0.6988173127174377, "num_chars": 2}, {"sum_logits": -1.391202688217163, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.391202688217163, "logits_per_char": -0.6956013441085815, "num_chars": 2}, {"sum_logits": -1.380583643913269, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": true, "logits_per_token": -1.380583643913269, "logits_per_char": -0.6902918219566345, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 36, "native_id": 36, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2374118566513062, "incorrect_loss_raw": 1.4487874507904053, "correct_loss_per_char": 0.6187059283256531, "incorrect_loss_per_char": 0.7243937253952026, "correct_loss_per_token": 1.2374118566513062, "incorrect_loss_per_token": 1.4487874507904053, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.446122407913208, "num_tokens": 1, "num_tokens_all": 496, "is_greedy": false, "logits_per_token": -1.446122407913208, "logits_per_char": -0.723061203956604, "num_chars": 2}, {"sum_logits": -1.3809789419174194, "num_tokens": 1, "num_tokens_all": 496, "is_greedy": false, "logits_per_token": -1.3809789419174194, "logits_per_char": -0.6904894709587097, "num_chars": 2}, {"sum_logits": -1.5192610025405884, "num_tokens": 1, "num_tokens_all": 496, "is_greedy": false, "logits_per_token": -1.5192610025405884, "logits_per_char": -0.7596305012702942, "num_chars": 2}, {"sum_logits": -1.2374118566513062, "num_tokens": 1, "num_tokens_all": 496, "is_greedy": true, "logits_per_token": -1.2374118566513062, "logits_per_char": -0.6187059283256531, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 37, "native_id": 37, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3226642608642578, "incorrect_loss_raw": 1.4200350840886433, "correct_loss_per_char": 0.6613321304321289, "incorrect_loss_per_char": 0.7100175420443217, "correct_loss_per_token": 1.3226642608642578, "incorrect_loss_per_token": 1.4200350840886433, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3062046766281128, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": true, "logits_per_token": -1.3062046766281128, "logits_per_char": -0.6531023383140564, "num_chars": 2}, {"sum_logits": -1.4249173402786255, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.4249173402786255, "logits_per_char": -0.7124586701393127, "num_chars": 2}, {"sum_logits": -1.528983235359192, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.528983235359192, "logits_per_char": -0.764491617679596, "num_chars": 2}, {"sum_logits": -1.3226642608642578, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.3226642608642578, "logits_per_char": -0.6613321304321289, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 38, "native_id": 38, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3809082508087158, "incorrect_loss_raw": 1.3947455088297527, "correct_loss_per_char": 0.6904541254043579, "incorrect_loss_per_char": 0.6973727544148763, "correct_loss_per_token": 1.3809082508087158, "incorrect_loss_per_token": 1.3947455088297527, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.371329426765442, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": true, "logits_per_token": -1.371329426765442, "logits_per_char": -0.685664713382721, "num_chars": 2}, {"sum_logits": -1.3941272497177124, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.3941272497177124, "logits_per_char": -0.6970636248588562, "num_chars": 2}, {"sum_logits": -1.4187798500061035, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.4187798500061035, "logits_per_char": -0.7093899250030518, "num_chars": 2}, {"sum_logits": -1.3809082508087158, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.3809082508087158, "logits_per_char": -0.6904541254043579, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 39, "native_id": 39, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3836910724639893, "incorrect_loss_raw": 1.395423690478007, "correct_loss_per_char": 0.6918455362319946, "incorrect_loss_per_char": 0.6977118452390035, "correct_loss_per_token": 1.3836910724639893, "incorrect_loss_per_token": 1.395423690478007, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3836910724639893, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.3836910724639893, "logits_per_char": -0.6918455362319946, "num_chars": 2}, {"sum_logits": -1.437476634979248, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.437476634979248, "logits_per_char": -0.718738317489624, "num_chars": 2}, {"sum_logits": -1.4453752040863037, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.4453752040863037, "logits_per_char": -0.7226876020431519, "num_chars": 2}, {"sum_logits": -1.3034192323684692, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": true, "logits_per_token": -1.3034192323684692, "logits_per_char": -0.6517096161842346, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 40, "native_id": 40, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4361391067504883, "incorrect_loss_raw": 1.385311444600423, "correct_loss_per_char": 0.7180695533752441, "incorrect_loss_per_char": 0.6926557223002116, "correct_loss_per_token": 1.4361391067504883, "incorrect_loss_per_token": 1.385311444600423, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.587109088897705, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": false, "logits_per_token": -1.587109088897705, "logits_per_char": -0.7935545444488525, "num_chars": 2}, {"sum_logits": -1.2423447370529175, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": true, "logits_per_token": -1.2423447370529175, "logits_per_char": -0.6211723685264587, "num_chars": 2}, {"sum_logits": -1.4361391067504883, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": false, "logits_per_token": -1.4361391067504883, "logits_per_char": -0.7180695533752441, "num_chars": 2}, {"sum_logits": -1.326480507850647, "num_tokens": 1, "num_tokens_all": 620, "is_greedy": false, "logits_per_token": -1.326480507850647, "logits_per_char": -0.6632402539253235, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 41, "native_id": 41, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3846122026443481, "incorrect_loss_raw": 1.3920142253239949, "correct_loss_per_char": 0.6923061013221741, "incorrect_loss_per_char": 0.6960071126619974, "correct_loss_per_token": 1.3846122026443481, "incorrect_loss_per_token": 1.3920142253239949, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3846122026443481, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.3846122026443481, "logits_per_char": -0.6923061013221741, "num_chars": 2}, {"sum_logits": -1.3682385683059692, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": true, "logits_per_token": -1.3682385683059692, "logits_per_char": -0.6841192841529846, "num_chars": 2}, {"sum_logits": -1.4295333623886108, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.4295333623886108, "logits_per_char": -0.7147666811943054, "num_chars": 2}, {"sum_logits": -1.3782707452774048, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.3782707452774048, "logits_per_char": -0.6891353726387024, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 42, "native_id": 42, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2193489074707031, "incorrect_loss_raw": 1.4608697096506755, "correct_loss_per_char": 0.6096744537353516, "incorrect_loss_per_char": 0.7304348548253378, "correct_loss_per_token": 1.2193489074707031, "incorrect_loss_per_token": 1.4608697096506755, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4294908046722412, "num_tokens": 1, "num_tokens_all": 502, "is_greedy": false, "logits_per_token": -1.4294908046722412, "logits_per_char": -0.7147454023361206, "num_chars": 2}, {"sum_logits": -1.3709235191345215, "num_tokens": 1, "num_tokens_all": 502, "is_greedy": false, "logits_per_token": -1.3709235191345215, "logits_per_char": -0.6854617595672607, "num_chars": 2}, {"sum_logits": -1.5821948051452637, "num_tokens": 1, "num_tokens_all": 502, "is_greedy": false, "logits_per_token": -1.5821948051452637, "logits_per_char": -0.7910974025726318, "num_chars": 2}, {"sum_logits": -1.2193489074707031, "num_tokens": 1, "num_tokens_all": 502, "is_greedy": true, "logits_per_token": -1.2193489074707031, "logits_per_char": -0.6096744537353516, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 43, "native_id": 43, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5900354385375977, "incorrect_loss_raw": 1.3363635142644246, "correct_loss_per_char": 0.7950177192687988, "incorrect_loss_per_char": 0.6681817571322123, "correct_loss_per_token": 1.5900354385375977, "incorrect_loss_per_token": 1.3363635142644246, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3184056282043457, "num_tokens": 1, "num_tokens_all": 547, "is_greedy": true, "logits_per_token": -1.3184056282043457, "logits_per_char": -0.6592028141021729, "num_chars": 2}, {"sum_logits": -1.360285758972168, "num_tokens": 1, "num_tokens_all": 547, "is_greedy": false, "logits_per_token": -1.360285758972168, "logits_per_char": -0.680142879486084, "num_chars": 2}, {"sum_logits": -1.5900354385375977, "num_tokens": 1, "num_tokens_all": 547, "is_greedy": false, "logits_per_token": -1.5900354385375977, "logits_per_char": -0.7950177192687988, "num_chars": 2}, {"sum_logits": -1.3303991556167603, "num_tokens": 1, "num_tokens_all": 547, "is_greedy": false, "logits_per_token": -1.3303991556167603, "logits_per_char": -0.6651995778083801, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 44, "native_id": 44, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4783542156219482, "incorrect_loss_raw": 1.368130882581075, "correct_loss_per_char": 0.7391771078109741, "incorrect_loss_per_char": 0.6840654412905375, "correct_loss_per_token": 1.4783542156219482, "incorrect_loss_per_token": 1.368130882581075, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.27777898311615, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": true, "logits_per_token": -1.27777898311615, "logits_per_char": -0.638889491558075, "num_chars": 2}, {"sum_logits": -1.4399657249450684, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": false, "logits_per_token": -1.4399657249450684, "logits_per_char": -0.7199828624725342, "num_chars": 2}, {"sum_logits": -1.4783542156219482, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": false, "logits_per_token": -1.4783542156219482, "logits_per_char": -0.7391771078109741, "num_chars": 2}, {"sum_logits": -1.3866479396820068, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": false, "logits_per_token": -1.3866479396820068, "logits_per_char": -0.6933239698410034, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 45, "native_id": 45, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3431878089904785, "incorrect_loss_raw": 1.4105871121088664, "correct_loss_per_char": 0.6715939044952393, "incorrect_loss_per_char": 0.7052935560544332, "correct_loss_per_token": 1.3431878089904785, "incorrect_loss_per_token": 1.4105871121088664, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3675259351730347, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": false, "logits_per_token": -1.3675259351730347, "logits_per_char": -0.6837629675865173, "num_chars": 2}, {"sum_logits": -1.4778579473495483, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": false, "logits_per_token": -1.4778579473495483, "logits_per_char": -0.7389289736747742, "num_chars": 2}, {"sum_logits": -1.3863774538040161, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": false, "logits_per_token": -1.3863774538040161, "logits_per_char": -0.6931887269020081, "num_chars": 2}, {"sum_logits": -1.3431878089904785, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": true, "logits_per_token": -1.3431878089904785, "logits_per_char": -0.6715939044952393, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 46, "native_id": 46, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5329006910324097, "incorrect_loss_raw": 1.3560510873794556, "correct_loss_per_char": 0.7664503455162048, "incorrect_loss_per_char": 0.6780255436897278, "correct_loss_per_token": 1.5329006910324097, "incorrect_loss_per_token": 1.3560510873794556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5202704668045044, "num_tokens": 1, "num_tokens_all": 532, "is_greedy": false, "logits_per_token": -1.5202704668045044, "logits_per_char": -0.7601352334022522, "num_chars": 2}, {"sum_logits": -1.3583815097808838, "num_tokens": 1, "num_tokens_all": 532, "is_greedy": false, "logits_per_token": -1.3583815097808838, "logits_per_char": -0.6791907548904419, "num_chars": 2}, {"sum_logits": -1.5329006910324097, "num_tokens": 1, "num_tokens_all": 532, "is_greedy": false, "logits_per_token": -1.5329006910324097, "logits_per_char": -0.7664503455162048, "num_chars": 2}, {"sum_logits": -1.1895012855529785, "num_tokens": 1, "num_tokens_all": 532, "is_greedy": true, "logits_per_token": -1.1895012855529785, "logits_per_char": -0.5947506427764893, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 47, "native_id": 47, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3750395774841309, "incorrect_loss_raw": 1.3973965644836426, "correct_loss_per_char": 0.6875197887420654, "incorrect_loss_per_char": 0.6986982822418213, "correct_loss_per_token": 1.3750395774841309, "incorrect_loss_per_token": 1.3973965644836426, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3750395774841309, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.3750395774841309, "logits_per_char": -0.6875197887420654, "num_chars": 2}, {"sum_logits": -1.3953133821487427, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.3953133821487427, "logits_per_char": -0.6976566910743713, "num_chars": 2}, {"sum_logits": -1.4558119773864746, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.4558119773864746, "logits_per_char": -0.7279059886932373, "num_chars": 2}, {"sum_logits": -1.3410643339157104, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": true, "logits_per_token": -1.3410643339157104, "logits_per_char": -0.6705321669578552, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 48, "native_id": 48, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4001574516296387, "incorrect_loss_raw": 1.3984970251719158, "correct_loss_per_char": 0.7000787258148193, "incorrect_loss_per_char": 0.6992485125859579, "correct_loss_per_token": 1.4001574516296387, "incorrect_loss_per_token": 1.3984970251719158, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4373034238815308, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": false, "logits_per_token": -1.4373034238815308, "logits_per_char": -0.7186517119407654, "num_chars": 2}, {"sum_logits": -1.4001574516296387, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": false, "logits_per_token": -1.4001574516296387, "logits_per_char": -0.7000787258148193, "num_chars": 2}, {"sum_logits": -1.5539499521255493, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": false, "logits_per_token": -1.5539499521255493, "logits_per_char": -0.7769749760627747, "num_chars": 2}, {"sum_logits": -1.204237699508667, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": true, "logits_per_token": -1.204237699508667, "logits_per_char": -0.6021188497543335, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 49, "native_id": 49, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6100918054580688, "incorrect_loss_raw": 1.3358996311823528, "correct_loss_per_char": 0.8050459027290344, "incorrect_loss_per_char": 0.6679498155911764, "correct_loss_per_token": 1.6100918054580688, "incorrect_loss_per_token": 1.3358996311823528, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.28573477268219, "num_tokens": 1, "num_tokens_all": 528, "is_greedy": false, "logits_per_token": -1.28573477268219, "logits_per_char": -0.642867386341095, "num_chars": 2}, {"sum_logits": -1.4405032396316528, "num_tokens": 1, "num_tokens_all": 528, "is_greedy": false, "logits_per_token": -1.4405032396316528, "logits_per_char": -0.7202516198158264, "num_chars": 2}, {"sum_logits": -1.6100918054580688, "num_tokens": 1, "num_tokens_all": 528, "is_greedy": false, "logits_per_token": -1.6100918054580688, "logits_per_char": -0.8050459027290344, "num_chars": 2}, {"sum_logits": -1.2814608812332153, "num_tokens": 1, "num_tokens_all": 528, "is_greedy": true, "logits_per_token": -1.2814608812332153, "logits_per_char": -0.6407304406166077, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 50, "native_id": 50, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6074968576431274, "incorrect_loss_raw": 1.3383123477300007, "correct_loss_per_char": 0.8037484288215637, "incorrect_loss_per_char": 0.6691561738650004, "correct_loss_per_token": 1.6074968576431274, "incorrect_loss_per_token": 1.3383123477300007, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2037198543548584, "num_tokens": 1, "num_tokens_all": 1363, "is_greedy": true, "logits_per_token": -1.2037198543548584, "logits_per_char": -0.6018599271774292, "num_chars": 2}, {"sum_logits": -1.3316341638565063, "num_tokens": 1, "num_tokens_all": 1363, "is_greedy": false, "logits_per_token": -1.3316341638565063, "logits_per_char": -0.6658170819282532, "num_chars": 2}, {"sum_logits": -1.4795830249786377, "num_tokens": 1, "num_tokens_all": 1363, "is_greedy": false, "logits_per_token": -1.4795830249786377, "logits_per_char": -0.7397915124893188, "num_chars": 2}, {"sum_logits": -1.6074968576431274, "num_tokens": 1, "num_tokens_all": 1363, "is_greedy": false, "logits_per_token": -1.6074968576431274, "logits_per_char": -0.8037484288215637, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 51, "native_id": 51, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.489471435546875, "incorrect_loss_raw": 1.3648029168446858, "correct_loss_per_char": 0.7447357177734375, "incorrect_loss_per_char": 0.6824014584223429, "correct_loss_per_token": 1.489471435546875, "incorrect_loss_per_token": 1.3648029168446858, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3242120742797852, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": true, "logits_per_token": -1.3242120742797852, "logits_per_char": -0.6621060371398926, "num_chars": 2}, {"sum_logits": -1.4105231761932373, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.4105231761932373, "logits_per_char": -0.7052615880966187, "num_chars": 2}, {"sum_logits": -1.3596735000610352, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.3596735000610352, "logits_per_char": -0.6798367500305176, "num_chars": 2}, {"sum_logits": -1.489471435546875, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.489471435546875, "logits_per_char": -0.7447357177734375, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 52, "native_id": 52, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.33547043800354, "incorrect_loss_raw": 1.415321906407674, "correct_loss_per_char": 0.66773521900177, "incorrect_loss_per_char": 0.707660953203837, "correct_loss_per_token": 1.33547043800354, "incorrect_loss_per_token": 1.415321906407674, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4349371194839478, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.4349371194839478, "logits_per_char": -0.7174685597419739, "num_chars": 2}, {"sum_logits": -1.5030428171157837, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.5030428171157837, "logits_per_char": -0.7515214085578918, "num_chars": 2}, {"sum_logits": -1.33547043800354, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.33547043800354, "logits_per_char": -0.66773521900177, "num_chars": 2}, {"sum_logits": -1.307985782623291, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": true, "logits_per_token": -1.307985782623291, "logits_per_char": -0.6539928913116455, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 53, "native_id": 53, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4863859415054321, "incorrect_loss_raw": 1.3637562195460002, "correct_loss_per_char": 0.7431929707527161, "incorrect_loss_per_char": 0.6818781097730001, "correct_loss_per_token": 1.4863859415054321, "incorrect_loss_per_token": 1.3637562195460002, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3808846473693848, "num_tokens": 1, "num_tokens_all": 474, "is_greedy": false, "logits_per_token": -1.3808846473693848, "logits_per_char": -0.6904423236846924, "num_chars": 2}, {"sum_logits": -1.4459820985794067, "num_tokens": 1, "num_tokens_all": 474, "is_greedy": false, "logits_per_token": -1.4459820985794067, "logits_per_char": -0.7229910492897034, "num_chars": 2}, {"sum_logits": -1.4863859415054321, "num_tokens": 1, "num_tokens_all": 474, "is_greedy": false, "logits_per_token": -1.4863859415054321, "logits_per_char": -0.7431929707527161, "num_chars": 2}, {"sum_logits": -1.264401912689209, "num_tokens": 1, "num_tokens_all": 474, "is_greedy": true, "logits_per_token": -1.264401912689209, "logits_per_char": -0.6322009563446045, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 54, "native_id": 54, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.455600619316101, "incorrect_loss_raw": 1.3714861869812012, "correct_loss_per_char": 0.7278003096580505, "incorrect_loss_per_char": 0.6857430934906006, "correct_loss_per_token": 1.455600619316101, "incorrect_loss_per_token": 1.3714861869812012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3624242544174194, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": true, "logits_per_token": -1.3624242544174194, "logits_per_char": -0.6812121272087097, "num_chars": 2}, {"sum_logits": -1.455600619316101, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.455600619316101, "logits_per_char": -0.7278003096580505, "num_chars": 2}, {"sum_logits": -1.3894455432891846, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.3894455432891846, "logits_per_char": -0.6947227716445923, "num_chars": 2}, {"sum_logits": -1.3625887632369995, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.3625887632369995, "logits_per_char": -0.6812943816184998, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 55, "native_id": 55, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4299700260162354, "incorrect_loss_raw": 1.3845285177230835, "correct_loss_per_char": 0.7149850130081177, "incorrect_loss_per_char": 0.6922642588615417, "correct_loss_per_token": 1.4299700260162354, "incorrect_loss_per_token": 1.3845285177230835, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2631103992462158, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": true, "logits_per_token": -1.2631103992462158, "logits_per_char": -0.6315551996231079, "num_chars": 2}, {"sum_logits": -1.4299700260162354, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.4299700260162354, "logits_per_char": -0.7149850130081177, "num_chars": 2}, {"sum_logits": -1.4952677488327026, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.4952677488327026, "logits_per_char": -0.7476338744163513, "num_chars": 2}, {"sum_logits": -1.395207405090332, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.395207405090332, "logits_per_char": -0.697603702545166, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 56, "native_id": 56, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4297008514404297, "incorrect_loss_raw": 1.3786114851633708, "correct_loss_per_char": 0.7148504257202148, "incorrect_loss_per_char": 0.6893057425816854, "correct_loss_per_token": 1.4297008514404297, "incorrect_loss_per_token": 1.3786114851633708, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3718122243881226, "num_tokens": 1, "num_tokens_all": 491, "is_greedy": false, "logits_per_token": -1.3718122243881226, "logits_per_char": -0.6859061121940613, "num_chars": 2}, {"sum_logits": -1.4297008514404297, "num_tokens": 1, "num_tokens_all": 491, "is_greedy": false, "logits_per_token": -1.4297008514404297, "logits_per_char": -0.7148504257202148, "num_chars": 2}, {"sum_logits": -1.406158685684204, "num_tokens": 1, "num_tokens_all": 491, "is_greedy": false, "logits_per_token": -1.406158685684204, "logits_per_char": -0.703079342842102, "num_chars": 2}, {"sum_logits": -1.3578635454177856, "num_tokens": 1, "num_tokens_all": 491, "is_greedy": true, "logits_per_token": -1.3578635454177856, "logits_per_char": -0.6789317727088928, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 57, "native_id": 57, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.554039478302002, "incorrect_loss_raw": 1.3492499987284343, "correct_loss_per_char": 0.777019739151001, "incorrect_loss_per_char": 0.6746249993642172, "correct_loss_per_token": 1.554039478302002, "incorrect_loss_per_token": 1.3492499987284343, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.554039478302002, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.554039478302002, "logits_per_char": -0.777019739151001, "num_chars": 2}, {"sum_logits": -1.1973925828933716, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": true, "logits_per_token": -1.1973925828933716, "logits_per_char": -0.5986962914466858, "num_chars": 2}, {"sum_logits": -1.4972217082977295, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.4972217082977295, "logits_per_char": -0.7486108541488647, "num_chars": 2}, {"sum_logits": -1.3531357049942017, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.3531357049942017, "logits_per_char": -0.6765678524971008, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 58, "native_id": 58, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.625766396522522, "incorrect_loss_raw": 1.3260475397109985, "correct_loss_per_char": 0.812883198261261, "incorrect_loss_per_char": 0.6630237698554993, "correct_loss_per_token": 1.625766396522522, "incorrect_loss_per_token": 1.3260475397109985, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3620269298553467, "num_tokens": 1, "num_tokens_all": 518, "is_greedy": false, "logits_per_token": -1.3620269298553467, "logits_per_char": -0.6810134649276733, "num_chars": 2}, {"sum_logits": -1.2119783163070679, "num_tokens": 1, "num_tokens_all": 518, "is_greedy": true, "logits_per_token": -1.2119783163070679, "logits_per_char": -0.6059891581535339, "num_chars": 2}, {"sum_logits": -1.625766396522522, "num_tokens": 1, "num_tokens_all": 518, "is_greedy": false, "logits_per_token": -1.625766396522522, "logits_per_char": -0.812883198261261, "num_chars": 2}, {"sum_logits": -1.404137372970581, "num_tokens": 1, "num_tokens_all": 518, "is_greedy": false, "logits_per_token": -1.404137372970581, "logits_per_char": -0.7020686864852905, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 59, "native_id": 59, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4996155500411987, "incorrect_loss_raw": 1.3615591923395793, "correct_loss_per_char": 0.7498077750205994, "incorrect_loss_per_char": 0.6807795961697897, "correct_loss_per_token": 1.4996155500411987, "incorrect_loss_per_token": 1.3615591923395793, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2597403526306152, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": true, "logits_per_token": -1.2597403526306152, "logits_per_char": -0.6298701763153076, "num_chars": 2}, {"sum_logits": -1.384339451789856, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": false, "logits_per_token": -1.384339451789856, "logits_per_char": -0.692169725894928, "num_chars": 2}, {"sum_logits": -1.4996155500411987, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": false, "logits_per_token": -1.4996155500411987, "logits_per_char": -0.7498077750205994, "num_chars": 2}, {"sum_logits": -1.4405977725982666, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": false, "logits_per_token": -1.4405977725982666, "logits_per_char": -0.7202988862991333, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 60, "native_id": 60, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3974049091339111, "incorrect_loss_raw": 1.3946173985799153, "correct_loss_per_char": 0.6987024545669556, "incorrect_loss_per_char": 0.6973086992899576, "correct_loss_per_token": 1.3974049091339111, "incorrect_loss_per_token": 1.3946173985799153, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.26002836227417, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": true, "logits_per_token": -1.26002836227417, "logits_per_char": -0.630014181137085, "num_chars": 2}, {"sum_logits": -1.4374947547912598, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4374947547912598, "logits_per_char": -0.7187473773956299, "num_chars": 2}, {"sum_logits": -1.3974049091339111, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.3974049091339111, "logits_per_char": -0.6987024545669556, "num_chars": 2}, {"sum_logits": -1.4863290786743164, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4863290786743164, "logits_per_char": -0.7431645393371582, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 61, "native_id": 61, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3666019439697266, "incorrect_loss_raw": 1.4114028612772624, "correct_loss_per_char": 0.6833009719848633, "incorrect_loss_per_char": 0.7057014306386312, "correct_loss_per_token": 1.3666019439697266, "incorrect_loss_per_token": 1.4114028612772624, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3369922637939453, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": true, "logits_per_token": -1.3369922637939453, "logits_per_char": -0.6684961318969727, "num_chars": 2}, {"sum_logits": -1.3864375352859497, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.3864375352859497, "logits_per_char": -0.6932187676429749, "num_chars": 2}, {"sum_logits": -1.510778784751892, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.510778784751892, "logits_per_char": -0.755389392375946, "num_chars": 2}, {"sum_logits": -1.3666019439697266, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.3666019439697266, "logits_per_char": -0.6833009719848633, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 62, "native_id": 62, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3901671171188354, "incorrect_loss_raw": 1.3952816327412922, "correct_loss_per_char": 0.6950835585594177, "incorrect_loss_per_char": 0.6976408163706461, "correct_loss_per_token": 1.3901671171188354, "incorrect_loss_per_token": 1.3952816327412922, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2932792901992798, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": true, "logits_per_token": -1.2932792901992798, "logits_per_char": -0.6466396450996399, "num_chars": 2}, {"sum_logits": -1.3891756534576416, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.3891756534576416, "logits_per_char": -0.6945878267288208, "num_chars": 2}, {"sum_logits": -1.5033899545669556, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.5033899545669556, "logits_per_char": -0.7516949772834778, "num_chars": 2}, {"sum_logits": -1.3901671171188354, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.3901671171188354, "logits_per_char": -0.6950835585594177, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 63, "native_id": 63, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5298954248428345, "incorrect_loss_raw": 1.4404651721318562, "correct_loss_per_char": 0.7649477124214172, "incorrect_loss_per_char": 0.7202325860659281, "correct_loss_per_token": 1.5298954248428345, "incorrect_loss_per_token": 1.4404651721318562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4380911588668823, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": false, "logits_per_token": -1.4380911588668823, "logits_per_char": -0.7190455794334412, "num_chars": 2}, {"sum_logits": -1.5180808305740356, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": false, "logits_per_token": -1.5180808305740356, "logits_per_char": -0.7590404152870178, "num_chars": 2}, {"sum_logits": -1.5298954248428345, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": false, "logits_per_token": -1.5298954248428345, "logits_per_char": -0.7649477124214172, "num_chars": 2}, {"sum_logits": -1.3652235269546509, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": true, "logits_per_token": -1.3652235269546509, "logits_per_char": -0.6826117634773254, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 64, "native_id": 64, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2939203977584839, "incorrect_loss_raw": 1.4258344570795696, "correct_loss_per_char": 0.6469601988792419, "incorrect_loss_per_char": 0.7129172285397848, "correct_loss_per_token": 1.2939203977584839, "incorrect_loss_per_token": 1.4258344570795696, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4205396175384521, "num_tokens": 1, "num_tokens_all": 483, "is_greedy": false, "logits_per_token": -1.4205396175384521, "logits_per_char": -0.7102698087692261, "num_chars": 2}, {"sum_logits": -1.4000825881958008, "num_tokens": 1, "num_tokens_all": 483, "is_greedy": false, "logits_per_token": -1.4000825881958008, "logits_per_char": -0.7000412940979004, "num_chars": 2}, {"sum_logits": -1.4568811655044556, "num_tokens": 1, "num_tokens_all": 483, "is_greedy": false, "logits_per_token": -1.4568811655044556, "logits_per_char": -0.7284405827522278, "num_chars": 2}, {"sum_logits": -1.2939203977584839, "num_tokens": 1, "num_tokens_all": 483, "is_greedy": true, "logits_per_token": -1.2939203977584839, "logits_per_char": -0.6469601988792419, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 65, "native_id": 65, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4729938507080078, "incorrect_loss_raw": 1.3702012300491333, "correct_loss_per_char": 0.7364969253540039, "incorrect_loss_per_char": 0.6851006150245667, "correct_loss_per_token": 1.4729938507080078, "incorrect_loss_per_token": 1.3702012300491333, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2344412803649902, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": true, "logits_per_token": -1.2344412803649902, "logits_per_char": -0.6172206401824951, "num_chars": 2}, {"sum_logits": -1.4161244630813599, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4161244630813599, "logits_per_char": -0.7080622315406799, "num_chars": 2}, {"sum_logits": -1.4600379467010498, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4600379467010498, "logits_per_char": -0.7300189733505249, "num_chars": 2}, {"sum_logits": -1.4729938507080078, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4729938507080078, "logits_per_char": -0.7364969253540039, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 66, "native_id": 66, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3773934841156006, "incorrect_loss_raw": 1.3998321692148845, "correct_loss_per_char": 0.6886967420578003, "incorrect_loss_per_char": 0.6999160846074423, "correct_loss_per_token": 1.3773934841156006, "incorrect_loss_per_token": 1.3998321692148845, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3513822555541992, "num_tokens": 1, "num_tokens_all": 1403, "is_greedy": true, "logits_per_token": -1.3513822555541992, "logits_per_char": -0.6756911277770996, "num_chars": 2}, {"sum_logits": -1.3773934841156006, "num_tokens": 1, "num_tokens_all": 1403, "is_greedy": false, "logits_per_token": -1.3773934841156006, "logits_per_char": -0.6886967420578003, "num_chars": 2}, {"sum_logits": -1.4647092819213867, "num_tokens": 1, "num_tokens_all": 1403, "is_greedy": false, "logits_per_token": -1.4647092819213867, "logits_per_char": -0.7323546409606934, "num_chars": 2}, {"sum_logits": -1.3834049701690674, "num_tokens": 1, "num_tokens_all": 1403, "is_greedy": false, "logits_per_token": -1.3834049701690674, "logits_per_char": -0.6917024850845337, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 67, "native_id": 67, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4238381385803223, "incorrect_loss_raw": 1.3821394443511963, "correct_loss_per_char": 0.7119190692901611, "incorrect_loss_per_char": 0.6910697221755981, "correct_loss_per_token": 1.4238381385803223, "incorrect_loss_per_token": 1.3821394443511963, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4403165578842163, "num_tokens": 1, "num_tokens_all": 546, "is_greedy": false, "logits_per_token": -1.4403165578842163, "logits_per_char": -0.7201582789421082, "num_chars": 2}, {"sum_logits": -1.3170254230499268, "num_tokens": 1, "num_tokens_all": 546, "is_greedy": true, "logits_per_token": -1.3170254230499268, "logits_per_char": -0.6585127115249634, "num_chars": 2}, {"sum_logits": -1.3890763521194458, "num_tokens": 1, "num_tokens_all": 546, "is_greedy": false, "logits_per_token": -1.3890763521194458, "logits_per_char": -0.6945381760597229, "num_chars": 2}, {"sum_logits": -1.4238381385803223, "num_tokens": 1, "num_tokens_all": 546, "is_greedy": false, "logits_per_token": -1.4238381385803223, "logits_per_char": -0.7119190692901611, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 68, "native_id": 68, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4263674020767212, "incorrect_loss_raw": 1.3810132344563801, "correct_loss_per_char": 0.7131837010383606, "incorrect_loss_per_char": 0.6905066172281901, "correct_loss_per_token": 1.4263674020767212, "incorrect_loss_per_token": 1.3810132344563801, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2960543632507324, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": true, "logits_per_token": -1.2960543632507324, "logits_per_char": -0.6480271816253662, "num_chars": 2}, {"sum_logits": -1.407777190208435, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.407777190208435, "logits_per_char": -0.7038885951042175, "num_chars": 2}, {"sum_logits": -1.4392081499099731, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.4392081499099731, "logits_per_char": -0.7196040749549866, "num_chars": 2}, {"sum_logits": -1.4263674020767212, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.4263674020767212, "logits_per_char": -0.7131837010383606, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 69, "native_id": 69, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2891231775283813, "incorrect_loss_raw": 1.4363483985265095, "correct_loss_per_char": 0.6445615887641907, "incorrect_loss_per_char": 0.7181741992632548, "correct_loss_per_token": 1.2891231775283813, "incorrect_loss_per_token": 1.4363483985265095, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.55141282081604, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": false, "logits_per_token": -1.55141282081604, "logits_per_char": -0.77570641040802, "num_chars": 2}, {"sum_logits": -1.3017241954803467, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": false, "logits_per_token": -1.3017241954803467, "logits_per_char": -0.6508620977401733, "num_chars": 2}, {"sum_logits": -1.455908179283142, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": false, "logits_per_token": -1.455908179283142, "logits_per_char": -0.727954089641571, "num_chars": 2}, {"sum_logits": -1.2891231775283813, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": true, "logits_per_token": -1.2891231775283813, "logits_per_char": -0.6445615887641907, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 70, "native_id": 70, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3686902523040771, "incorrect_loss_raw": 1.402584433555603, "correct_loss_per_char": 0.6843451261520386, "incorrect_loss_per_char": 0.7012922167778015, "correct_loss_per_token": 1.3686902523040771, "incorrect_loss_per_token": 1.402584433555603, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3308066129684448, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": true, "logits_per_token": -1.3308066129684448, "logits_per_char": -0.6654033064842224, "num_chars": 2}, {"sum_logits": -1.5455849170684814, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": false, "logits_per_token": -1.5455849170684814, "logits_per_char": -0.7727924585342407, "num_chars": 2}, {"sum_logits": -1.3313617706298828, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": false, "logits_per_token": -1.3313617706298828, "logits_per_char": -0.6656808853149414, "num_chars": 2}, {"sum_logits": -1.3686902523040771, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": false, "logits_per_token": -1.3686902523040771, "logits_per_char": -0.6843451261520386, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 71, "native_id": 71, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4642411470413208, "incorrect_loss_raw": 1.3795243104298909, "correct_loss_per_char": 0.7321205735206604, "incorrect_loss_per_char": 0.6897621552149454, "correct_loss_per_token": 1.4642411470413208, "incorrect_loss_per_token": 1.3795243104298909, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2582236528396606, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": true, "logits_per_token": -1.2582236528396606, "logits_per_char": -0.6291118264198303, "num_chars": 2}, {"sum_logits": -1.472737431526184, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": false, "logits_per_token": -1.472737431526184, "logits_per_char": -0.736368715763092, "num_chars": 2}, {"sum_logits": -1.4642411470413208, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": false, "logits_per_token": -1.4642411470413208, "logits_per_char": -0.7321205735206604, "num_chars": 2}, {"sum_logits": -1.4076118469238281, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": false, "logits_per_token": -1.4076118469238281, "logits_per_char": -0.7038059234619141, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 72, "native_id": 72, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.421527624130249, "incorrect_loss_raw": 1.386803110440572, "correct_loss_per_char": 0.7107638120651245, "incorrect_loss_per_char": 0.693401555220286, "correct_loss_per_token": 1.421527624130249, "incorrect_loss_per_token": 1.386803110440572, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4181734323501587, "num_tokens": 1, "num_tokens_all": 569, "is_greedy": false, "logits_per_token": -1.4181734323501587, "logits_per_char": -0.7090867161750793, "num_chars": 2}, {"sum_logits": -1.2466899156570435, "num_tokens": 1, "num_tokens_all": 569, "is_greedy": true, "logits_per_token": -1.2466899156570435, "logits_per_char": -0.6233449578285217, "num_chars": 2}, {"sum_logits": -1.421527624130249, "num_tokens": 1, "num_tokens_all": 569, "is_greedy": false, "logits_per_token": -1.421527624130249, "logits_per_char": -0.7107638120651245, "num_chars": 2}, {"sum_logits": -1.4955459833145142, "num_tokens": 1, "num_tokens_all": 569, "is_greedy": false, "logits_per_token": -1.4955459833145142, "logits_per_char": -0.7477729916572571, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 73, "native_id": 73, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4021012783050537, "incorrect_loss_raw": 1.3925657669703166, "correct_loss_per_char": 0.7010506391525269, "incorrect_loss_per_char": 0.6962828834851583, "correct_loss_per_token": 1.4021012783050537, "incorrect_loss_per_token": 1.3925657669703166, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4038621187210083, "num_tokens": 1, "num_tokens_all": 497, "is_greedy": false, "logits_per_token": -1.4038621187210083, "logits_per_char": -0.7019310593605042, "num_chars": 2}, {"sum_logits": -1.281685471534729, "num_tokens": 1, "num_tokens_all": 497, "is_greedy": true, "logits_per_token": -1.281685471534729, "logits_per_char": -0.6408427357673645, "num_chars": 2}, {"sum_logits": -1.4921497106552124, "num_tokens": 1, "num_tokens_all": 497, "is_greedy": false, "logits_per_token": -1.4921497106552124, "logits_per_char": -0.7460748553276062, "num_chars": 2}, {"sum_logits": -1.4021012783050537, "num_tokens": 1, "num_tokens_all": 497, "is_greedy": false, "logits_per_token": -1.4021012783050537, "logits_per_char": -0.7010506391525269, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 74, "native_id": 74, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4800763130187988, "incorrect_loss_raw": 1.369297742843628, "correct_loss_per_char": 0.7400381565093994, "incorrect_loss_per_char": 0.684648871421814, "correct_loss_per_token": 1.4800763130187988, "incorrect_loss_per_token": 1.369297742843628, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4800763130187988, "num_tokens": 1, "num_tokens_all": 539, "is_greedy": false, "logits_per_token": -1.4800763130187988, "logits_per_char": -0.7400381565093994, "num_chars": 2}, {"sum_logits": -1.3168880939483643, "num_tokens": 1, "num_tokens_all": 539, "is_greedy": false, "logits_per_token": -1.3168880939483643, "logits_per_char": -0.6584440469741821, "num_chars": 2}, {"sum_logits": -1.5263564586639404, "num_tokens": 1, "num_tokens_all": 539, "is_greedy": false, "logits_per_token": -1.5263564586639404, "logits_per_char": -0.7631782293319702, "num_chars": 2}, {"sum_logits": -1.264648675918579, "num_tokens": 1, "num_tokens_all": 539, "is_greedy": true, "logits_per_token": -1.264648675918579, "logits_per_char": -0.6323243379592896, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 75, "native_id": 75, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5080119371414185, "incorrect_loss_raw": 1.3553856213887532, "correct_loss_per_char": 0.7540059685707092, "incorrect_loss_per_char": 0.6776928106943766, "correct_loss_per_token": 1.5080119371414185, "incorrect_loss_per_token": 1.3553856213887532, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.325792670249939, "num_tokens": 1, "num_tokens_all": 513, "is_greedy": true, "logits_per_token": -1.325792670249939, "logits_per_char": -0.6628963351249695, "num_chars": 2}, {"sum_logits": -1.3537582159042358, "num_tokens": 1, "num_tokens_all": 513, "is_greedy": false, "logits_per_token": -1.3537582159042358, "logits_per_char": -0.6768791079521179, "num_chars": 2}, {"sum_logits": -1.5080119371414185, "num_tokens": 1, "num_tokens_all": 513, "is_greedy": false, "logits_per_token": -1.5080119371414185, "logits_per_char": -0.7540059685707092, "num_chars": 2}, {"sum_logits": -1.386605978012085, "num_tokens": 1, "num_tokens_all": 513, "is_greedy": false, "logits_per_token": -1.386605978012085, "logits_per_char": -0.6933029890060425, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 76, "native_id": 76, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2031636238098145, "incorrect_loss_raw": 1.4626309076944988, "correct_loss_per_char": 0.6015818119049072, "incorrect_loss_per_char": 0.7313154538472494, "correct_loss_per_token": 1.2031636238098145, "incorrect_loss_per_token": 1.4626309076944988, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2031636238098145, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": true, "logits_per_token": -1.2031636238098145, "logits_per_char": -0.6015818119049072, "num_chars": 2}, {"sum_logits": -1.4339576959609985, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.4339576959609985, "logits_per_char": -0.7169788479804993, "num_chars": 2}, {"sum_logits": -1.4871680736541748, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.4871680736541748, "logits_per_char": -0.7435840368270874, "num_chars": 2}, {"sum_logits": -1.4667669534683228, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.4667669534683228, "logits_per_char": -0.7333834767341614, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 77, "native_id": 77, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5147324800491333, "incorrect_loss_raw": 1.3665980100631714, "correct_loss_per_char": 0.7573662400245667, "incorrect_loss_per_char": 0.6832990050315857, "correct_loss_per_token": 1.5147324800491333, "incorrect_loss_per_token": 1.3665980100631714, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2019195556640625, "num_tokens": 1, "num_tokens_all": 525, "is_greedy": true, "logits_per_token": -1.2019195556640625, "logits_per_char": -0.6009597778320312, "num_chars": 2}, {"sum_logits": -1.3347141742706299, "num_tokens": 1, "num_tokens_all": 525, "is_greedy": false, "logits_per_token": -1.3347141742706299, "logits_per_char": -0.6673570871353149, "num_chars": 2}, {"sum_logits": -1.5147324800491333, "num_tokens": 1, "num_tokens_all": 525, "is_greedy": false, "logits_per_token": -1.5147324800491333, "logits_per_char": -0.7573662400245667, "num_chars": 2}, {"sum_logits": -1.5631603002548218, "num_tokens": 1, "num_tokens_all": 525, "is_greedy": false, "logits_per_token": -1.5631603002548218, "logits_per_char": -0.7815801501274109, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 78, "native_id": 78, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5375896692276, "incorrect_loss_raw": 1.35410741964976, "correct_loss_per_char": 0.7687948346138, "incorrect_loss_per_char": 0.67705370982488, "correct_loss_per_token": 1.5375896692276, "incorrect_loss_per_token": 1.35410741964976, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4947718381881714, "num_tokens": 1, "num_tokens_all": 541, "is_greedy": false, "logits_per_token": -1.4947718381881714, "logits_per_char": -0.7473859190940857, "num_chars": 2}, {"sum_logits": -1.3666794300079346, "num_tokens": 1, "num_tokens_all": 541, "is_greedy": false, "logits_per_token": -1.3666794300079346, "logits_per_char": -0.6833397150039673, "num_chars": 2}, {"sum_logits": -1.5375896692276, "num_tokens": 1, "num_tokens_all": 541, "is_greedy": false, "logits_per_token": -1.5375896692276, "logits_per_char": -0.7687948346138, "num_chars": 2}, {"sum_logits": -1.2008709907531738, "num_tokens": 1, "num_tokens_all": 541, "is_greedy": true, "logits_per_token": -1.2008709907531738, "logits_per_char": -0.6004354953765869, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 79, "native_id": 79, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3406012058258057, "incorrect_loss_raw": 1.4085525274276733, "correct_loss_per_char": 0.6703006029129028, "incorrect_loss_per_char": 0.7042762637138367, "correct_loss_per_token": 1.3406012058258057, "incorrect_loss_per_token": 1.4085525274276733, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3406012058258057, "num_tokens": 1, "num_tokens_all": 488, "is_greedy": true, "logits_per_token": -1.3406012058258057, "logits_per_char": -0.6703006029129028, "num_chars": 2}, {"sum_logits": -1.4010130167007446, "num_tokens": 1, "num_tokens_all": 488, "is_greedy": false, "logits_per_token": -1.4010130167007446, "logits_per_char": -0.7005065083503723, "num_chars": 2}, {"sum_logits": -1.4783778190612793, "num_tokens": 1, "num_tokens_all": 488, "is_greedy": false, "logits_per_token": -1.4783778190612793, "logits_per_char": -0.7391889095306396, "num_chars": 2}, {"sum_logits": -1.346266746520996, "num_tokens": 1, "num_tokens_all": 488, "is_greedy": false, "logits_per_token": -1.346266746520996, "logits_per_char": -0.673133373260498, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 80, "native_id": 80, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2703492641448975, "incorrect_loss_raw": 1.4379205703735352, "correct_loss_per_char": 0.6351746320724487, "incorrect_loss_per_char": 0.7189602851867676, "correct_loss_per_token": 1.2703492641448975, "incorrect_loss_per_token": 1.4379205703735352, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.408733606338501, "num_tokens": 1, "num_tokens_all": 546, "is_greedy": false, "logits_per_token": -1.408733606338501, "logits_per_char": -0.7043668031692505, "num_chars": 2}, {"sum_logits": -1.4239118099212646, "num_tokens": 1, "num_tokens_all": 546, "is_greedy": false, "logits_per_token": -1.4239118099212646, "logits_per_char": -0.7119559049606323, "num_chars": 2}, {"sum_logits": -1.4811162948608398, "num_tokens": 1, "num_tokens_all": 546, "is_greedy": false, "logits_per_token": -1.4811162948608398, "logits_per_char": -0.7405581474304199, "num_chars": 2}, {"sum_logits": -1.2703492641448975, "num_tokens": 1, "num_tokens_all": 546, "is_greedy": true, "logits_per_token": -1.2703492641448975, "logits_per_char": -0.6351746320724487, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 81, "native_id": 81, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4412420988082886, "incorrect_loss_raw": 1.3805052042007446, "correct_loss_per_char": 0.7206210494041443, "incorrect_loss_per_char": 0.6902526021003723, "correct_loss_per_token": 1.4412420988082886, "incorrect_loss_per_token": 1.3805052042007446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2403024435043335, "num_tokens": 1, "num_tokens_all": 479, "is_greedy": true, "logits_per_token": -1.2403024435043335, "logits_per_char": -0.6201512217521667, "num_chars": 2}, {"sum_logits": -1.4449462890625, "num_tokens": 1, "num_tokens_all": 479, "is_greedy": false, "logits_per_token": -1.4449462890625, "logits_per_char": -0.72247314453125, "num_chars": 2}, {"sum_logits": -1.4412420988082886, "num_tokens": 1, "num_tokens_all": 479, "is_greedy": false, "logits_per_token": -1.4412420988082886, "logits_per_char": -0.7206210494041443, "num_chars": 2}, {"sum_logits": -1.4562668800354004, "num_tokens": 1, "num_tokens_all": 479, "is_greedy": false, "logits_per_token": -1.4562668800354004, "logits_per_char": -0.7281334400177002, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 82, "native_id": 82, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4488275051116943, "incorrect_loss_raw": 1.4014703432718914, "correct_loss_per_char": 0.7244137525558472, "incorrect_loss_per_char": 0.7007351716359457, "correct_loss_per_token": 1.4488275051116943, "incorrect_loss_per_token": 1.4014703432718914, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3075206279754639, "num_tokens": 1, "num_tokens_all": 613, "is_greedy": true, "logits_per_token": -1.3075206279754639, "logits_per_char": -0.6537603139877319, "num_chars": 2}, {"sum_logits": -1.4488275051116943, "num_tokens": 1, "num_tokens_all": 613, "is_greedy": false, "logits_per_token": -1.4488275051116943, "logits_per_char": -0.7244137525558472, "num_chars": 2}, {"sum_logits": -1.5657217502593994, "num_tokens": 1, "num_tokens_all": 613, "is_greedy": false, "logits_per_token": -1.5657217502593994, "logits_per_char": -0.7828608751296997, "num_chars": 2}, {"sum_logits": -1.3311686515808105, "num_tokens": 1, "num_tokens_all": 613, "is_greedy": false, "logits_per_token": -1.3311686515808105, "logits_per_char": -0.6655843257904053, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 83, "native_id": 83, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3759208917617798, "incorrect_loss_raw": 1.3989447355270386, "correct_loss_per_char": 0.6879604458808899, "incorrect_loss_per_char": 0.6994723677635193, "correct_loss_per_token": 1.3759208917617798, "incorrect_loss_per_token": 1.3989447355270386, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3759208917617798, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.3759208917617798, "logits_per_char": -0.6879604458808899, "num_chars": 2}, {"sum_logits": -1.4468697309494019, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.4468697309494019, "logits_per_char": -0.7234348654747009, "num_chars": 2}, {"sum_logits": -1.4629995822906494, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.4629995822906494, "logits_per_char": -0.7314997911453247, "num_chars": 2}, {"sum_logits": -1.2869648933410645, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": true, "logits_per_token": -1.2869648933410645, "logits_per_char": -0.6434824466705322, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 84, "native_id": 84, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1372607946395874, "incorrect_loss_raw": 1.4948836962382, "correct_loss_per_char": 0.5686303973197937, "incorrect_loss_per_char": 0.7474418481191, "correct_loss_per_token": 1.1372607946395874, "incorrect_loss_per_token": 1.4948836962382, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1372607946395874, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": true, "logits_per_token": -1.1372607946395874, "logits_per_char": -0.5686303973197937, "num_chars": 2}, {"sum_logits": -1.3981711864471436, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.3981711864471436, "logits_per_char": -0.6990855932235718, "num_chars": 2}, {"sum_logits": -1.5114741325378418, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.5114741325378418, "logits_per_char": -0.7557370662689209, "num_chars": 2}, {"sum_logits": -1.5750057697296143, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.5750057697296143, "logits_per_char": -0.7875028848648071, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 85, "native_id": 85, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.456874966621399, "incorrect_loss_raw": 1.371556043624878, "correct_loss_per_char": 0.7284374833106995, "incorrect_loss_per_char": 0.685778021812439, "correct_loss_per_token": 1.456874966621399, "incorrect_loss_per_token": 1.371556043624878, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2853367328643799, "num_tokens": 1, "num_tokens_all": 474, "is_greedy": true, "logits_per_token": -1.2853367328643799, "logits_per_char": -0.6426683664321899, "num_chars": 2}, {"sum_logits": -1.456874966621399, "num_tokens": 1, "num_tokens_all": 474, "is_greedy": false, "logits_per_token": -1.456874966621399, "logits_per_char": -0.7284374833106995, "num_chars": 2}, {"sum_logits": -1.3812856674194336, "num_tokens": 1, "num_tokens_all": 474, "is_greedy": false, "logits_per_token": -1.3812856674194336, "logits_per_char": -0.6906428337097168, "num_chars": 2}, {"sum_logits": -1.4480457305908203, "num_tokens": 1, "num_tokens_all": 474, "is_greedy": false, "logits_per_token": -1.4480457305908203, "logits_per_char": -0.7240228652954102, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 86, "native_id": 86, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.458677053451538, "incorrect_loss_raw": 1.3719374736150105, "correct_loss_per_char": 0.729338526725769, "incorrect_loss_per_char": 0.6859687368075053, "correct_loss_per_token": 1.458677053451538, "incorrect_loss_per_token": 1.3719374736150105, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2899086475372314, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": true, "logits_per_token": -1.2899086475372314, "logits_per_char": -0.6449543237686157, "num_chars": 2}, {"sum_logits": -1.3920902013778687, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.3920902013778687, "logits_per_char": -0.6960451006889343, "num_chars": 2}, {"sum_logits": -1.4338135719299316, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.4338135719299316, "logits_per_char": -0.7169067859649658, "num_chars": 2}, {"sum_logits": -1.458677053451538, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.458677053451538, "logits_per_char": -0.729338526725769, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 87, "native_id": 87, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.147544026374817, "incorrect_loss_raw": 1.4890002409617107, "correct_loss_per_char": 0.5737720131874084, "incorrect_loss_per_char": 0.7445001204808553, "correct_loss_per_token": 1.147544026374817, "incorrect_loss_per_token": 1.4890002409617107, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.147544026374817, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": true, "logits_per_token": -1.147544026374817, "logits_per_char": -0.5737720131874084, "num_chars": 2}, {"sum_logits": -1.4298744201660156, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.4298744201660156, "logits_per_char": -0.7149372100830078, "num_chars": 2}, {"sum_logits": -1.5316784381866455, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.5316784381866455, "logits_per_char": -0.7658392190933228, "num_chars": 2}, {"sum_logits": -1.5054478645324707, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.5054478645324707, "logits_per_char": -0.7527239322662354, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 88, "native_id": 88, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7228777408599854, "incorrect_loss_raw": 1.3091226021448772, "correct_loss_per_char": 0.8614388704299927, "incorrect_loss_per_char": 0.6545613010724386, "correct_loss_per_token": 1.7228777408599854, "incorrect_loss_per_token": 1.3091226021448772, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4424538612365723, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.4424538612365723, "logits_per_char": -0.7212269306182861, "num_chars": 2}, {"sum_logits": -1.3464652299880981, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.3464652299880981, "logits_per_char": -0.6732326149940491, "num_chars": 2}, {"sum_logits": -1.7228777408599854, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": false, "logits_per_token": -1.7228777408599854, "logits_per_char": -0.8614388704299927, "num_chars": 2}, {"sum_logits": -1.138448715209961, "num_tokens": 1, "num_tokens_all": 593, "is_greedy": true, "logits_per_token": -1.138448715209961, "logits_per_char": -0.5692243576049805, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 89, "native_id": 89, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3587270975112915, "incorrect_loss_raw": 1.4136848052342732, "correct_loss_per_char": 0.6793635487556458, "incorrect_loss_per_char": 0.7068424026171366, "correct_loss_per_token": 1.3587270975112915, "incorrect_loss_per_token": 1.4136848052342732, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5674701929092407, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.5674701929092407, "logits_per_char": -0.7837350964546204, "num_chars": 2}, {"sum_logits": -1.2197905778884888, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": true, "logits_per_token": -1.2197905778884888, "logits_per_char": -0.6098952889442444, "num_chars": 2}, {"sum_logits": -1.4537936449050903, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.4537936449050903, "logits_per_char": -0.7268968224525452, "num_chars": 2}, {"sum_logits": -1.3587270975112915, "num_tokens": 1, "num_tokens_all": 562, "is_greedy": false, "logits_per_token": -1.3587270975112915, "logits_per_char": -0.6793635487556458, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 90, "native_id": 90, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4117192029953003, "incorrect_loss_raw": 1.3958585659662883, "correct_loss_per_char": 0.7058596014976501, "incorrect_loss_per_char": 0.6979292829831442, "correct_loss_per_token": 1.4117192029953003, "incorrect_loss_per_token": 1.3958585659662883, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3931446075439453, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": false, "logits_per_token": -1.3931446075439453, "logits_per_char": -0.6965723037719727, "num_chars": 2}, {"sum_logits": -1.4117192029953003, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": false, "logits_per_token": -1.4117192029953003, "logits_per_char": -0.7058596014976501, "num_chars": 2}, {"sum_logits": -1.554471731185913, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": false, "logits_per_token": -1.554471731185913, "logits_per_char": -0.7772358655929565, "num_chars": 2}, {"sum_logits": -1.2399593591690063, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": true, "logits_per_token": -1.2399593591690063, "logits_per_char": -0.6199796795845032, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 91, "native_id": 91, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3894765377044678, "incorrect_loss_raw": 1.3940022389094036, "correct_loss_per_char": 0.6947382688522339, "incorrect_loss_per_char": 0.6970011194547018, "correct_loss_per_token": 1.3894765377044678, "incorrect_loss_per_token": 1.3940022389094036, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3894765377044678, "num_tokens": 1, "num_tokens_all": 456, "is_greedy": false, "logits_per_token": -1.3894765377044678, "logits_per_char": -0.6947382688522339, "num_chars": 2}, {"sum_logits": -1.4277153015136719, "num_tokens": 1, "num_tokens_all": 456, "is_greedy": false, "logits_per_token": -1.4277153015136719, "logits_per_char": -0.7138576507568359, "num_chars": 2}, {"sum_logits": -1.4298112392425537, "num_tokens": 1, "num_tokens_all": 456, "is_greedy": false, "logits_per_token": -1.4298112392425537, "logits_per_char": -0.7149056196212769, "num_chars": 2}, {"sum_logits": -1.3244801759719849, "num_tokens": 1, "num_tokens_all": 456, "is_greedy": true, "logits_per_token": -1.3244801759719849, "logits_per_char": -0.6622400879859924, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 92, "native_id": 92, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2968827486038208, "incorrect_loss_raw": 1.4271541436513264, "correct_loss_per_char": 0.6484413743019104, "incorrect_loss_per_char": 0.7135770718256632, "correct_loss_per_token": 1.2968827486038208, "incorrect_loss_per_token": 1.4271541436513264, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4426417350769043, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.4426417350769043, "logits_per_char": -0.7213208675384521, "num_chars": 2}, {"sum_logits": -1.3440837860107422, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.3440837860107422, "logits_per_char": -0.6720418930053711, "num_chars": 2}, {"sum_logits": -1.494736909866333, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.494736909866333, "logits_per_char": -0.7473684549331665, "num_chars": 2}, {"sum_logits": -1.2968827486038208, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": true, "logits_per_token": -1.2968827486038208, "logits_per_char": -0.6484413743019104, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 93, "native_id": 93, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5495622158050537, "incorrect_loss_raw": 1.349178632100423, "correct_loss_per_char": 0.7747811079025269, "incorrect_loss_per_char": 0.6745893160502116, "correct_loss_per_token": 1.5495622158050537, "incorrect_loss_per_token": 1.349178632100423, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5495622158050537, "num_tokens": 1, "num_tokens_all": 1386, "is_greedy": false, "logits_per_token": -1.5495622158050537, "logits_per_char": -0.7747811079025269, "num_chars": 2}, {"sum_logits": -1.2877167463302612, "num_tokens": 1, "num_tokens_all": 1386, "is_greedy": true, "logits_per_token": -1.2877167463302612, "logits_per_char": -0.6438583731651306, "num_chars": 2}, {"sum_logits": -1.4534138441085815, "num_tokens": 1, "num_tokens_all": 1386, "is_greedy": false, "logits_per_token": -1.4534138441085815, "logits_per_char": -0.7267069220542908, "num_chars": 2}, {"sum_logits": -1.3064053058624268, "num_tokens": 1, "num_tokens_all": 1386, "is_greedy": false, "logits_per_token": -1.3064053058624268, "logits_per_char": -0.6532026529312134, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 94, "native_id": 94, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.50458562374115, "incorrect_loss_raw": 1.3623775243759155, "correct_loss_per_char": 0.752292811870575, "incorrect_loss_per_char": 0.6811887621879578, "correct_loss_per_token": 1.50458562374115, "incorrect_loss_per_token": 1.3623775243759155, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2165454626083374, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": true, "logits_per_token": -1.2165454626083374, "logits_per_char": -0.6082727313041687, "num_chars": 2}, {"sum_logits": -1.3883233070373535, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.3883233070373535, "logits_per_char": -0.6941616535186768, "num_chars": 2}, {"sum_logits": -1.4822638034820557, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4822638034820557, "logits_per_char": -0.7411319017410278, "num_chars": 2}, {"sum_logits": -1.50458562374115, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.50458562374115, "logits_per_char": -0.752292811870575, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 95, "native_id": 95, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.474397897720337, "incorrect_loss_raw": 1.3715231815973918, "correct_loss_per_char": 0.7371989488601685, "incorrect_loss_per_char": 0.6857615907986959, "correct_loss_per_token": 1.474397897720337, "incorrect_loss_per_token": 1.3715231815973918, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4301735162734985, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.4301735162734985, "logits_per_char": -0.7150867581367493, "num_chars": 2}, {"sum_logits": -1.2801074981689453, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": true, "logits_per_token": -1.2801074981689453, "logits_per_char": -0.6400537490844727, "num_chars": 2}, {"sum_logits": -1.4042885303497314, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.4042885303497314, "logits_per_char": -0.7021442651748657, "num_chars": 2}, {"sum_logits": -1.474397897720337, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.474397897720337, "logits_per_char": -0.7371989488601685, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 96, "native_id": 96, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.508132815361023, "incorrect_loss_raw": 1.358418345451355, "correct_loss_per_char": 0.7540664076805115, "incorrect_loss_per_char": 0.6792091727256775, "correct_loss_per_token": 1.508132815361023, "incorrect_loss_per_token": 1.358418345451355, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4652372598648071, "num_tokens": 1, "num_tokens_all": 531, "is_greedy": false, "logits_per_token": -1.4652372598648071, "logits_per_char": -0.7326186299324036, "num_chars": 2}, {"sum_logits": -1.3474277257919312, "num_tokens": 1, "num_tokens_all": 531, "is_greedy": false, "logits_per_token": -1.3474277257919312, "logits_per_char": -0.6737138628959656, "num_chars": 2}, {"sum_logits": -1.508132815361023, "num_tokens": 1, "num_tokens_all": 531, "is_greedy": false, "logits_per_token": -1.508132815361023, "logits_per_char": -0.7540664076805115, "num_chars": 2}, {"sum_logits": -1.2625900506973267, "num_tokens": 1, "num_tokens_all": 531, "is_greedy": true, "logits_per_token": -1.2625900506973267, "logits_per_char": -0.6312950253486633, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 97, "native_id": 97, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3334038257598877, "incorrect_loss_raw": 1.4159584442774455, "correct_loss_per_char": 0.6667019128799438, "incorrect_loss_per_char": 0.7079792221387228, "correct_loss_per_token": 1.3334038257598877, "incorrect_loss_per_token": 1.4159584442774455, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3334038257598877, "num_tokens": 1, "num_tokens_all": 548, "is_greedy": false, "logits_per_token": -1.3334038257598877, "logits_per_char": -0.6667019128799438, "num_chars": 2}, {"sum_logits": -1.2810012102127075, "num_tokens": 1, "num_tokens_all": 548, "is_greedy": true, "logits_per_token": -1.2810012102127075, "logits_per_char": -0.6405006051063538, "num_chars": 2}, {"sum_logits": -1.5267326831817627, "num_tokens": 1, "num_tokens_all": 548, "is_greedy": false, "logits_per_token": -1.5267326831817627, "logits_per_char": -0.7633663415908813, "num_chars": 2}, {"sum_logits": -1.4401414394378662, "num_tokens": 1, "num_tokens_all": 548, "is_greedy": false, "logits_per_token": -1.4401414394378662, "logits_per_char": -0.7200707197189331, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 98, "native_id": 98, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2643320560455322, "incorrect_loss_raw": 1.4463357130686443, "correct_loss_per_char": 0.6321660280227661, "incorrect_loss_per_char": 0.7231678565343221, "correct_loss_per_token": 1.2643320560455322, "incorrect_loss_per_token": 1.4463357130686443, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5255712270736694, "num_tokens": 1, "num_tokens_all": 505, "is_greedy": false, "logits_per_token": -1.5255712270736694, "logits_per_char": -0.7627856135368347, "num_chars": 2}, {"sum_logits": -1.2693957090377808, "num_tokens": 1, "num_tokens_all": 505, "is_greedy": false, "logits_per_token": -1.2693957090377808, "logits_per_char": -0.6346978545188904, "num_chars": 2}, {"sum_logits": -1.5440402030944824, "num_tokens": 1, "num_tokens_all": 505, "is_greedy": false, "logits_per_token": -1.5440402030944824, "logits_per_char": -0.7720201015472412, "num_chars": 2}, {"sum_logits": -1.2643320560455322, "num_tokens": 1, "num_tokens_all": 505, "is_greedy": true, "logits_per_token": -1.2643320560455322, "logits_per_char": -0.6321660280227661, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 99, "native_id": 99, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5039989948272705, "incorrect_loss_raw": 1.3629751205444336, "correct_loss_per_char": 0.7519994974136353, "incorrect_loss_per_char": 0.6814875602722168, "correct_loss_per_token": 1.5039989948272705, "incorrect_loss_per_token": 1.3629751205444336, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1998289823532104, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": true, "logits_per_token": -1.1998289823532104, "logits_per_char": -0.5999144911766052, "num_chars": 2}, {"sum_logits": -1.4009519815444946, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": false, "logits_per_token": -1.4009519815444946, "logits_per_char": -0.7004759907722473, "num_chars": 2}, {"sum_logits": -1.4881443977355957, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": false, "logits_per_token": -1.4881443977355957, "logits_per_char": -0.7440721988677979, "num_chars": 2}, {"sum_logits": -1.5039989948272705, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": false, "logits_per_token": -1.5039989948272705, "logits_per_char": -0.7519994974136353, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 100, "native_id": 100, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.47621750831604, "incorrect_loss_raw": 1.367976466814677, "correct_loss_per_char": 0.73810875415802, "incorrect_loss_per_char": 0.6839882334073385, "correct_loss_per_token": 1.47621750831604, "incorrect_loss_per_token": 1.367976466814677, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2452012300491333, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": true, "logits_per_token": -1.2452012300491333, "logits_per_char": -0.6226006150245667, "num_chars": 2}, {"sum_logits": -1.4218381643295288, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.4218381643295288, "logits_per_char": -0.7109190821647644, "num_chars": 2}, {"sum_logits": -1.4368900060653687, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.4368900060653687, "logits_per_char": -0.7184450030326843, "num_chars": 2}, {"sum_logits": -1.47621750831604, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.47621750831604, "logits_per_char": -0.73810875415802, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 101, "native_id": 101, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3237191438674927, "incorrect_loss_raw": 1.4157923460006714, "correct_loss_per_char": 0.6618595719337463, "incorrect_loss_per_char": 0.7078961730003357, "correct_loss_per_token": 1.3237191438674927, "incorrect_loss_per_token": 1.4157923460006714, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3415532112121582, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": false, "logits_per_token": -1.3415532112121582, "logits_per_char": -0.6707766056060791, "num_chars": 2}, {"sum_logits": -1.4315171241760254, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": false, "logits_per_token": -1.4315171241760254, "logits_per_char": -0.7157585620880127, "num_chars": 2}, {"sum_logits": -1.4743067026138306, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": false, "logits_per_token": -1.4743067026138306, "logits_per_char": -0.7371533513069153, "num_chars": 2}, {"sum_logits": -1.3237191438674927, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": true, "logits_per_token": -1.3237191438674927, "logits_per_char": -0.6618595719337463, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 102, "native_id": 102, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5046297311782837, "incorrect_loss_raw": 1.3615683317184448, "correct_loss_per_char": 0.7523148655891418, "incorrect_loss_per_char": 0.6807841658592224, "correct_loss_per_token": 1.5046297311782837, "incorrect_loss_per_token": 1.3615683317184448, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2094218730926514, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": true, "logits_per_token": -1.2094218730926514, "logits_per_char": -0.6047109365463257, "num_chars": 2}, {"sum_logits": -1.5046297311782837, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": false, "logits_per_token": -1.5046297311782837, "logits_per_char": -0.7523148655891418, "num_chars": 2}, {"sum_logits": -1.4349229335784912, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": false, "logits_per_token": -1.4349229335784912, "logits_per_char": -0.7174614667892456, "num_chars": 2}, {"sum_logits": -1.440360188484192, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": false, "logits_per_token": -1.440360188484192, "logits_per_char": -0.720180094242096, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 103, "native_id": 103, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4654918909072876, "incorrect_loss_raw": 1.3808022737503052, "correct_loss_per_char": 0.7327459454536438, "incorrect_loss_per_char": 0.6904011368751526, "correct_loss_per_token": 1.4654918909072876, "incorrect_loss_per_token": 1.3808022737503052, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1544468402862549, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": true, "logits_per_token": -1.1544468402862549, "logits_per_char": -0.5772234201431274, "num_chars": 2}, {"sum_logits": -1.4789937734603882, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": false, "logits_per_token": -1.4789937734603882, "logits_per_char": -0.7394968867301941, "num_chars": 2}, {"sum_logits": -1.5089662075042725, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": false, "logits_per_token": -1.5089662075042725, "logits_per_char": -0.7544831037521362, "num_chars": 2}, {"sum_logits": -1.4654918909072876, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": false, "logits_per_token": -1.4654918909072876, "logits_per_char": -0.7327459454536438, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 104, "native_id": 104, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3360676765441895, "incorrect_loss_raw": 1.4194543759028118, "correct_loss_per_char": 0.6680338382720947, "incorrect_loss_per_char": 0.7097271879514059, "correct_loss_per_token": 1.3360676765441895, "incorrect_loss_per_token": 1.4194543759028118, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2958391904830933, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": true, "logits_per_token": -1.2958391904830933, "logits_per_char": -0.6479195952415466, "num_chars": 2}, {"sum_logits": -1.3645824193954468, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": false, "logits_per_token": -1.3645824193954468, "logits_per_char": -0.6822912096977234, "num_chars": 2}, {"sum_logits": -1.3360676765441895, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": false, "logits_per_token": -1.3360676765441895, "logits_per_char": -0.6680338382720947, "num_chars": 2}, {"sum_logits": -1.597941517829895, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": false, "logits_per_token": -1.597941517829895, "logits_per_char": -0.7989707589149475, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 105, "native_id": 105, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2097547054290771, "incorrect_loss_raw": 1.4710252285003662, "correct_loss_per_char": 0.6048773527145386, "incorrect_loss_per_char": 0.7355126142501831, "correct_loss_per_token": 1.2097547054290771, "incorrect_loss_per_token": 1.4710252285003662, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2097547054290771, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": true, "logits_per_token": -1.2097547054290771, "logits_per_char": -0.6048773527145386, "num_chars": 2}, {"sum_logits": -1.4972434043884277, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.4972434043884277, "logits_per_char": -0.7486217021942139, "num_chars": 2}, {"sum_logits": -1.5982086658477783, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.5982086658477783, "logits_per_char": -0.7991043329238892, "num_chars": 2}, {"sum_logits": -1.3176236152648926, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.3176236152648926, "logits_per_char": -0.6588118076324463, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 106, "native_id": 106, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4812366962432861, "incorrect_loss_raw": 1.361802299817403, "correct_loss_per_char": 0.7406183481216431, "incorrect_loss_per_char": 0.6809011499087015, "correct_loss_per_token": 1.4812366962432861, "incorrect_loss_per_token": 1.361802299817403, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4812366962432861, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": false, "logits_per_token": -1.4812366962432861, "logits_per_char": -0.7406183481216431, "num_chars": 2}, {"sum_logits": -1.3280446529388428, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": true, "logits_per_token": -1.3280446529388428, "logits_per_char": -0.6640223264694214, "num_chars": 2}, {"sum_logits": -1.3663166761398315, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": false, "logits_per_token": -1.3663166761398315, "logits_per_char": -0.6831583380699158, "num_chars": 2}, {"sum_logits": -1.3910455703735352, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": false, "logits_per_token": -1.3910455703735352, "logits_per_char": -0.6955227851867676, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 107, "native_id": 107, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5193963050842285, "incorrect_loss_raw": 1.3523032665252686, "correct_loss_per_char": 0.7596981525421143, "incorrect_loss_per_char": 0.6761516332626343, "correct_loss_per_token": 1.5193963050842285, "incorrect_loss_per_token": 1.3523032665252686, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.306890606880188, "num_tokens": 1, "num_tokens_all": 456, "is_greedy": true, "logits_per_token": -1.306890606880188, "logits_per_char": -0.653445303440094, "num_chars": 2}, {"sum_logits": -1.35837721824646, "num_tokens": 1, "num_tokens_all": 456, "is_greedy": false, "logits_per_token": -1.35837721824646, "logits_per_char": -0.67918860912323, "num_chars": 2}, {"sum_logits": -1.3916419744491577, "num_tokens": 1, "num_tokens_all": 456, "is_greedy": false, "logits_per_token": -1.3916419744491577, "logits_per_char": -0.6958209872245789, "num_chars": 2}, {"sum_logits": -1.5193963050842285, "num_tokens": 1, "num_tokens_all": 456, "is_greedy": false, "logits_per_token": -1.5193963050842285, "logits_per_char": -0.7596981525421143, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 108, "native_id": 108, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4896137714385986, "incorrect_loss_raw": 1.3623570601145427, "correct_loss_per_char": 0.7448068857192993, "incorrect_loss_per_char": 0.6811785300572714, "correct_loss_per_token": 1.4896137714385986, "incorrect_loss_per_token": 1.3623570601145427, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.289624571800232, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": true, "logits_per_token": -1.289624571800232, "logits_per_char": -0.644812285900116, "num_chars": 2}, {"sum_logits": -1.4613802433013916, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": false, "logits_per_token": -1.4613802433013916, "logits_per_char": -0.7306901216506958, "num_chars": 2}, {"sum_logits": -1.4896137714385986, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": false, "logits_per_token": -1.4896137714385986, "logits_per_char": -0.7448068857192993, "num_chars": 2}, {"sum_logits": -1.3360663652420044, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": false, "logits_per_token": -1.3360663652420044, "logits_per_char": -0.6680331826210022, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 109, "native_id": 109, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3896602392196655, "incorrect_loss_raw": 1.3913640181223552, "correct_loss_per_char": 0.6948301196098328, "incorrect_loss_per_char": 0.6956820090611776, "correct_loss_per_token": 1.3896602392196655, "incorrect_loss_per_token": 1.3913640181223552, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3841280937194824, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": false, "logits_per_token": -1.3841280937194824, "logits_per_char": -0.6920640468597412, "num_chars": 2}, {"sum_logits": -1.364453911781311, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": true, "logits_per_token": -1.364453911781311, "logits_per_char": -0.6822269558906555, "num_chars": 2}, {"sum_logits": -1.425510048866272, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": false, "logits_per_token": -1.425510048866272, "logits_per_char": -0.712755024433136, "num_chars": 2}, {"sum_logits": -1.3896602392196655, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": false, "logits_per_token": -1.3896602392196655, "logits_per_char": -0.6948301196098328, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 110, "native_id": 110, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4743804931640625, "incorrect_loss_raw": 1.3741905689239502, "correct_loss_per_char": 0.7371902465820312, "incorrect_loss_per_char": 0.6870952844619751, "correct_loss_per_token": 1.4743804931640625, "incorrect_loss_per_token": 1.3741905689239502, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.216845154762268, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": true, "logits_per_token": -1.216845154762268, "logits_per_char": -0.608422577381134, "num_chars": 2}, {"sum_logits": -1.3626248836517334, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.3626248836517334, "logits_per_char": -0.6813124418258667, "num_chars": 2}, {"sum_logits": -1.5431016683578491, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.5431016683578491, "logits_per_char": -0.7715508341789246, "num_chars": 2}, {"sum_logits": -1.4743804931640625, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4743804931640625, "logits_per_char": -0.7371902465820312, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 111, "native_id": 111, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3302642107009888, "incorrect_loss_raw": 1.4173107544581096, "correct_loss_per_char": 0.6651321053504944, "incorrect_loss_per_char": 0.7086553772290548, "correct_loss_per_token": 1.3302642107009888, "incorrect_loss_per_token": 1.4173107544581096, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3302642107009888, "num_tokens": 1, "num_tokens_all": 466, "is_greedy": false, "logits_per_token": -1.3302642107009888, "logits_per_char": -0.6651321053504944, "num_chars": 2}, {"sum_logits": -1.4096416234970093, "num_tokens": 1, "num_tokens_all": 466, "is_greedy": false, "logits_per_token": -1.4096416234970093, "logits_per_char": -0.7048208117485046, "num_chars": 2}, {"sum_logits": -1.5192058086395264, "num_tokens": 1, "num_tokens_all": 466, "is_greedy": false, "logits_per_token": -1.5192058086395264, "logits_per_char": -0.7596029043197632, "num_chars": 2}, {"sum_logits": -1.323084831237793, "num_tokens": 1, "num_tokens_all": 466, "is_greedy": true, "logits_per_token": -1.323084831237793, "logits_per_char": -0.6615424156188965, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 112, "native_id": 112, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4024964570999146, "incorrect_loss_raw": 1.3972622950871785, "correct_loss_per_char": 0.7012482285499573, "incorrect_loss_per_char": 0.6986311475435892, "correct_loss_per_token": 1.4024964570999146, "incorrect_loss_per_token": 1.3972622950871785, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.585503339767456, "num_tokens": 1, "num_tokens_all": 559, "is_greedy": false, "logits_per_token": -1.585503339767456, "logits_per_char": -0.792751669883728, "num_chars": 2}, {"sum_logits": -1.3212814331054688, "num_tokens": 1, "num_tokens_all": 559, "is_greedy": false, "logits_per_token": -1.3212814331054688, "logits_per_char": -0.6606407165527344, "num_chars": 2}, {"sum_logits": -1.4024964570999146, "num_tokens": 1, "num_tokens_all": 559, "is_greedy": false, "logits_per_token": -1.4024964570999146, "logits_per_char": -0.7012482285499573, "num_chars": 2}, {"sum_logits": -1.2850021123886108, "num_tokens": 1, "num_tokens_all": 559, "is_greedy": true, "logits_per_token": -1.2850021123886108, "logits_per_char": -0.6425010561943054, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 113, "native_id": 113, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.38670015335083, "incorrect_loss_raw": 1.3932985067367554, "correct_loss_per_char": 0.693350076675415, "incorrect_loss_per_char": 0.6966492533683777, "correct_loss_per_token": 1.38670015335083, "incorrect_loss_per_token": 1.3932985067367554, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.38670015335083, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": false, "logits_per_token": -1.38670015335083, "logits_per_char": -0.693350076675415, "num_chars": 2}, {"sum_logits": -1.3438036441802979, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": true, "logits_per_token": -1.3438036441802979, "logits_per_char": -0.6719018220901489, "num_chars": 2}, {"sum_logits": -1.4357537031173706, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": false, "logits_per_token": -1.4357537031173706, "logits_per_char": -0.7178768515586853, "num_chars": 2}, {"sum_logits": -1.4003381729125977, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": false, "logits_per_token": -1.4003381729125977, "logits_per_char": -0.7001690864562988, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 114, "native_id": 114, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2706040143966675, "incorrect_loss_raw": 1.437711517016093, "correct_loss_per_char": 0.6353020071983337, "incorrect_loss_per_char": 0.7188557585080465, "correct_loss_per_token": 1.2706040143966675, "incorrect_loss_per_token": 1.437711517016093, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3509886264801025, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": false, "logits_per_token": -1.3509886264801025, "logits_per_char": -0.6754943132400513, "num_chars": 2}, {"sum_logits": -1.4669313430786133, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": false, "logits_per_token": -1.4669313430786133, "logits_per_char": -0.7334656715393066, "num_chars": 2}, {"sum_logits": -1.495214581489563, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": false, "logits_per_token": -1.495214581489563, "logits_per_char": -0.7476072907447815, "num_chars": 2}, {"sum_logits": -1.2706040143966675, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": true, "logits_per_token": -1.2706040143966675, "logits_per_char": -0.6353020071983337, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 115, "native_id": 115, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4866902828216553, "incorrect_loss_raw": 1.3635017077128093, "correct_loss_per_char": 0.7433451414108276, "incorrect_loss_per_char": 0.6817508538564047, "correct_loss_per_token": 1.4866902828216553, "incorrect_loss_per_token": 1.3635017077128093, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3598312139511108, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": false, "logits_per_token": -1.3598312139511108, "logits_per_char": -0.6799156069755554, "num_chars": 2}, {"sum_logits": -1.3901830911636353, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": false, "logits_per_token": -1.3901830911636353, "logits_per_char": -0.6950915455818176, "num_chars": 2}, {"sum_logits": -1.4866902828216553, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": false, "logits_per_token": -1.4866902828216553, "logits_per_char": -0.7433451414108276, "num_chars": 2}, {"sum_logits": -1.3404908180236816, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": true, "logits_per_token": -1.3404908180236816, "logits_per_char": -0.6702454090118408, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 116, "native_id": 116, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5177230834960938, "incorrect_loss_raw": 1.3719716866811116, "correct_loss_per_char": 0.7588615417480469, "incorrect_loss_per_char": 0.6859858433405558, "correct_loss_per_token": 1.5177230834960938, "incorrect_loss_per_token": 1.3719716866811116, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2591636180877686, "num_tokens": 1, "num_tokens_all": 526, "is_greedy": false, "logits_per_token": -1.2591636180877686, "logits_per_char": -0.6295818090438843, "num_chars": 2}, {"sum_logits": -1.5177230834960938, "num_tokens": 1, "num_tokens_all": 526, "is_greedy": false, "logits_per_token": -1.5177230834960938, "logits_per_char": -0.7588615417480469, "num_chars": 2}, {"sum_logits": -1.6468623876571655, "num_tokens": 1, "num_tokens_all": 526, "is_greedy": false, "logits_per_token": -1.6468623876571655, "logits_per_char": -0.8234311938285828, "num_chars": 2}, {"sum_logits": -1.2098890542984009, "num_tokens": 1, "num_tokens_all": 526, "is_greedy": true, "logits_per_token": -1.2098890542984009, "logits_per_char": -0.6049445271492004, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 117, "native_id": 117, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2095355987548828, "incorrect_loss_raw": 1.4674972693125408, "correct_loss_per_char": 0.6047677993774414, "incorrect_loss_per_char": 0.7337486346562704, "correct_loss_per_token": 1.2095355987548828, "incorrect_loss_per_token": 1.4674972693125408, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2095355987548828, "num_tokens": 1, "num_tokens_all": 524, "is_greedy": true, "logits_per_token": -1.2095355987548828, "logits_per_char": -0.6047677993774414, "num_chars": 2}, {"sum_logits": -1.3820385932922363, "num_tokens": 1, "num_tokens_all": 524, "is_greedy": false, "logits_per_token": -1.3820385932922363, "logits_per_char": -0.6910192966461182, "num_chars": 2}, {"sum_logits": -1.6221199035644531, "num_tokens": 1, "num_tokens_all": 524, "is_greedy": false, "logits_per_token": -1.6221199035644531, "logits_per_char": -0.8110599517822266, "num_chars": 2}, {"sum_logits": -1.3983333110809326, "num_tokens": 1, "num_tokens_all": 524, "is_greedy": false, "logits_per_token": -1.3983333110809326, "logits_per_char": -0.6991666555404663, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 118, "native_id": 118, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3826277256011963, "incorrect_loss_raw": 1.3964767456054688, "correct_loss_per_char": 0.6913138628005981, "incorrect_loss_per_char": 0.6982383728027344, "correct_loss_per_token": 1.3826277256011963, "incorrect_loss_per_token": 1.3964767456054688, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3103632926940918, "num_tokens": 1, "num_tokens_all": 477, "is_greedy": true, "logits_per_token": -1.3103632926940918, "logits_per_char": -0.6551816463470459, "num_chars": 2}, {"sum_logits": -1.4290287494659424, "num_tokens": 1, "num_tokens_all": 477, "is_greedy": false, "logits_per_token": -1.4290287494659424, "logits_per_char": -0.7145143747329712, "num_chars": 2}, {"sum_logits": -1.450038194656372, "num_tokens": 1, "num_tokens_all": 477, "is_greedy": false, "logits_per_token": -1.450038194656372, "logits_per_char": -0.725019097328186, "num_chars": 2}, {"sum_logits": -1.3826277256011963, "num_tokens": 1, "num_tokens_all": 477, "is_greedy": false, "logits_per_token": -1.3826277256011963, "logits_per_char": -0.6913138628005981, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 119, "native_id": 119, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2250707149505615, "incorrect_loss_raw": 1.45546289285024, "correct_loss_per_char": 0.6125353574752808, "incorrect_loss_per_char": 0.72773144642512, "correct_loss_per_token": 1.2250707149505615, "incorrect_loss_per_token": 1.45546289285024, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2250707149505615, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": true, "logits_per_token": -1.2250707149505615, "logits_per_char": -0.6125353574752808, "num_chars": 2}, {"sum_logits": -1.39744234085083, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.39744234085083, "logits_per_char": -0.698721170425415, "num_chars": 2}, {"sum_logits": -1.41957688331604, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.41957688331604, "logits_per_char": -0.70978844165802, "num_chars": 2}, {"sum_logits": -1.54936945438385, "num_tokens": 1, "num_tokens_all": 476, "is_greedy": false, "logits_per_token": -1.54936945438385, "logits_per_char": -0.774684727191925, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 120, "native_id": 120, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3618850708007812, "incorrect_loss_raw": 1.4024434089660645, "correct_loss_per_char": 0.6809425354003906, "incorrect_loss_per_char": 0.7012217044830322, "correct_loss_per_token": 1.3618850708007812, "incorrect_loss_per_token": 1.4024434089660645, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4501190185546875, "num_tokens": 1, "num_tokens_all": 496, "is_greedy": false, "logits_per_token": -1.4501190185546875, "logits_per_char": -0.7250595092773438, "num_chars": 2}, {"sum_logits": -1.3618850708007812, "num_tokens": 1, "num_tokens_all": 496, "is_greedy": false, "logits_per_token": -1.3618850708007812, "logits_per_char": -0.6809425354003906, "num_chars": 2}, {"sum_logits": -1.436293363571167, "num_tokens": 1, "num_tokens_all": 496, "is_greedy": false, "logits_per_token": -1.436293363571167, "logits_per_char": -0.7181466817855835, "num_chars": 2}, {"sum_logits": -1.3209178447723389, "num_tokens": 1, "num_tokens_all": 496, "is_greedy": true, "logits_per_token": -1.3209178447723389, "logits_per_char": -0.6604589223861694, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 121, "native_id": 121, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.687347412109375, "incorrect_loss_raw": 1.3838911056518555, "correct_loss_per_char": 0.8436737060546875, "incorrect_loss_per_char": 0.6919455528259277, "correct_loss_per_token": 1.687347412109375, "incorrect_loss_per_token": 1.3838911056518555, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0277729034423828, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": true, "logits_per_token": -1.0277729034423828, "logits_per_char": -0.5138864517211914, "num_chars": 2}, {"sum_logits": -1.2149970531463623, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": false, "logits_per_token": -1.2149970531463623, "logits_per_char": -0.6074985265731812, "num_chars": 2}, {"sum_logits": -1.687347412109375, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": false, "logits_per_token": -1.687347412109375, "logits_per_char": -0.8436737060546875, "num_chars": 2}, {"sum_logits": -1.9089033603668213, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": false, "logits_per_token": -1.9089033603668213, "logits_per_char": -0.9544516801834106, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 122, "native_id": 122, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.347974181175232, "incorrect_loss_raw": 1.4085017840067546, "correct_loss_per_char": 0.673987090587616, "incorrect_loss_per_char": 0.7042508920033773, "correct_loss_per_token": 1.347974181175232, "incorrect_loss_per_token": 1.4085017840067546, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3543614149093628, "num_tokens": 1, "num_tokens_all": 487, "is_greedy": false, "logits_per_token": -1.3543614149093628, "logits_per_char": -0.6771807074546814, "num_chars": 2}, {"sum_logits": -1.347974181175232, "num_tokens": 1, "num_tokens_all": 487, "is_greedy": true, "logits_per_token": -1.347974181175232, "logits_per_char": -0.673987090587616, "num_chars": 2}, {"sum_logits": -1.4329779148101807, "num_tokens": 1, "num_tokens_all": 487, "is_greedy": false, "logits_per_token": -1.4329779148101807, "logits_per_char": -0.7164889574050903, "num_chars": 2}, {"sum_logits": -1.4381660223007202, "num_tokens": 1, "num_tokens_all": 487, "is_greedy": false, "logits_per_token": -1.4381660223007202, "logits_per_char": -0.7190830111503601, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 123, "native_id": 123, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2421962022781372, "incorrect_loss_raw": 1.454849084218343, "correct_loss_per_char": 0.6210981011390686, "incorrect_loss_per_char": 0.7274245421091715, "correct_loss_per_token": 1.2421962022781372, "incorrect_loss_per_token": 1.454849084218343, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3643498420715332, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.3643498420715332, "logits_per_char": -0.6821749210357666, "num_chars": 2}, {"sum_logits": -1.2421962022781372, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": true, "logits_per_token": -1.2421962022781372, "logits_per_char": -0.6210981011390686, "num_chars": 2}, {"sum_logits": -1.4708396196365356, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.4708396196365356, "logits_per_char": -0.7354198098182678, "num_chars": 2}, {"sum_logits": -1.5293577909469604, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.5293577909469604, "logits_per_char": -0.7646788954734802, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 124, "native_id": 124, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.495184063911438, "incorrect_loss_raw": 1.3638898134231567, "correct_loss_per_char": 0.747592031955719, "incorrect_loss_per_char": 0.6819449067115784, "correct_loss_per_token": 1.495184063911438, "incorrect_loss_per_token": 1.3638898134231567, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.249393343925476, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": true, "logits_per_token": -1.249393343925476, "logits_per_char": -0.624696671962738, "num_chars": 2}, {"sum_logits": -1.495184063911438, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": false, "logits_per_token": -1.495184063911438, "logits_per_char": -0.747592031955719, "num_chars": 2}, {"sum_logits": -1.4505170583724976, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": false, "logits_per_token": -1.4505170583724976, "logits_per_char": -0.7252585291862488, "num_chars": 2}, {"sum_logits": -1.3917590379714966, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": false, "logits_per_token": -1.3917590379714966, "logits_per_char": -0.6958795189857483, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 125, "native_id": 125, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4068937301635742, "incorrect_loss_raw": 1.400842269261678, "correct_loss_per_char": 0.7034468650817871, "incorrect_loss_per_char": 0.700421134630839, "correct_loss_per_token": 1.4068937301635742, "incorrect_loss_per_token": 1.400842269261678, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4068937301635742, "num_tokens": 1, "num_tokens_all": 533, "is_greedy": false, "logits_per_token": -1.4068937301635742, "logits_per_char": -0.7034468650817871, "num_chars": 2}, {"sum_logits": -1.4924843311309814, "num_tokens": 1, "num_tokens_all": 533, "is_greedy": false, "logits_per_token": -1.4924843311309814, "logits_per_char": -0.7462421655654907, "num_chars": 2}, {"sum_logits": -1.536849021911621, "num_tokens": 1, "num_tokens_all": 533, "is_greedy": false, "logits_per_token": -1.536849021911621, "logits_per_char": -0.7684245109558105, "num_chars": 2}, {"sum_logits": -1.1731934547424316, "num_tokens": 1, "num_tokens_all": 533, "is_greedy": true, "logits_per_token": -1.1731934547424316, "logits_per_char": -0.5865967273712158, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 126, "native_id": 126, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4231280088424683, "incorrect_loss_raw": 1.3846617937088013, "correct_loss_per_char": 0.7115640044212341, "incorrect_loss_per_char": 0.6923308968544006, "correct_loss_per_token": 1.4231280088424683, "incorrect_loss_per_token": 1.3846617937088013, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.334031581878662, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": true, "logits_per_token": -1.334031581878662, "logits_per_char": -0.667015790939331, "num_chars": 2}, {"sum_logits": -1.464371681213379, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": false, "logits_per_token": -1.464371681213379, "logits_per_char": -0.7321858406066895, "num_chars": 2}, {"sum_logits": -1.4231280088424683, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": false, "logits_per_token": -1.4231280088424683, "logits_per_char": -0.7115640044212341, "num_chars": 2}, {"sum_logits": -1.3555821180343628, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": false, "logits_per_token": -1.3555821180343628, "logits_per_char": -0.6777910590171814, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 127, "native_id": 127, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5096204280853271, "incorrect_loss_raw": 1.3965788682301838, "correct_loss_per_char": 0.7548102140426636, "incorrect_loss_per_char": 0.6982894341150919, "correct_loss_per_token": 1.5096204280853271, "incorrect_loss_per_token": 1.3965788682301838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4103189706802368, "num_tokens": 1, "num_tokens_all": 506, "is_greedy": false, "logits_per_token": -1.4103189706802368, "logits_per_char": -0.7051594853401184, "num_chars": 2}, {"sum_logits": -1.3867393732070923, "num_tokens": 1, "num_tokens_all": 506, "is_greedy": true, "logits_per_token": -1.3867393732070923, "logits_per_char": -0.6933696866035461, "num_chars": 2}, {"sum_logits": -1.5096204280853271, "num_tokens": 1, "num_tokens_all": 506, "is_greedy": false, "logits_per_token": -1.5096204280853271, "logits_per_char": -0.7548102140426636, "num_chars": 2}, {"sum_logits": -1.3926782608032227, "num_tokens": 1, "num_tokens_all": 506, "is_greedy": false, "logits_per_token": -1.3926782608032227, "logits_per_char": -0.6963391304016113, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 128, "native_id": 128, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4694297313690186, "incorrect_loss_raw": 1.369420846303304, "correct_loss_per_char": 0.7347148656845093, "incorrect_loss_per_char": 0.684710423151652, "correct_loss_per_token": 1.4694297313690186, "incorrect_loss_per_token": 1.369420846303304, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.253924012184143, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": true, "logits_per_token": -1.253924012184143, "logits_per_char": -0.6269620060920715, "num_chars": 2}, {"sum_logits": -1.4694297313690186, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4694297313690186, "logits_per_char": -0.7347148656845093, "num_chars": 2}, {"sum_logits": -1.4540717601776123, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4540717601776123, "logits_per_char": -0.7270358800888062, "num_chars": 2}, {"sum_logits": -1.4002667665481567, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4002667665481567, "logits_per_char": -0.7001333832740784, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 129, "native_id": 129, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3437482118606567, "incorrect_loss_raw": 1.4086922407150269, "correct_loss_per_char": 0.6718741059303284, "incorrect_loss_per_char": 0.7043461203575134, "correct_loss_per_token": 1.3437482118606567, "incorrect_loss_per_token": 1.4086922407150269, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.450563907623291, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": false, "logits_per_token": -1.450563907623291, "logits_per_char": -0.7252819538116455, "num_chars": 2}, {"sum_logits": -1.353861689567566, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": false, "logits_per_token": -1.353861689567566, "logits_per_char": -0.676930844783783, "num_chars": 2}, {"sum_logits": -1.4216511249542236, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": false, "logits_per_token": -1.4216511249542236, "logits_per_char": -0.7108255624771118, "num_chars": 2}, {"sum_logits": -1.3437482118606567, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": true, "logits_per_token": -1.3437482118606567, "logits_per_char": -0.6718741059303284, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 130, "native_id": 130, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1884119510650635, "incorrect_loss_raw": 1.4698503812154133, "correct_loss_per_char": 0.5942059755325317, "incorrect_loss_per_char": 0.7349251906077067, "correct_loss_per_token": 1.1884119510650635, "incorrect_loss_per_token": 1.4698503812154133, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4090862274169922, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.4090862274169922, "logits_per_char": -0.7045431137084961, "num_chars": 2}, {"sum_logits": -1.1884119510650635, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": true, "logits_per_token": -1.1884119510650635, "logits_per_char": -0.5942059755325317, "num_chars": 2}, {"sum_logits": -1.492836833000183, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.492836833000183, "logits_per_char": -0.7464184165000916, "num_chars": 2}, {"sum_logits": -1.507628083229065, "num_tokens": 1, "num_tokens_all": 570, "is_greedy": false, "logits_per_token": -1.507628083229065, "logits_per_char": -0.7538140416145325, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 131, "native_id": 131, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.445224404335022, "incorrect_loss_raw": 1.3770922025044758, "correct_loss_per_char": 0.722612202167511, "incorrect_loss_per_char": 0.6885461012522379, "correct_loss_per_token": 1.445224404335022, "incorrect_loss_per_token": 1.3770922025044758, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2847487926483154, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": true, "logits_per_token": -1.2847487926483154, "logits_per_char": -0.6423743963241577, "num_chars": 2}, {"sum_logits": -1.3727788925170898, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.3727788925170898, "logits_per_char": -0.6863894462585449, "num_chars": 2}, {"sum_logits": -1.4737489223480225, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.4737489223480225, "logits_per_char": -0.7368744611740112, "num_chars": 2}, {"sum_logits": -1.445224404335022, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.445224404335022, "logits_per_char": -0.722612202167511, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 132, "native_id": 132, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2872806787490845, "incorrect_loss_raw": 1.4320712089538574, "correct_loss_per_char": 0.6436403393745422, "incorrect_loss_per_char": 0.7160356044769287, "correct_loss_per_token": 1.2872806787490845, "incorrect_loss_per_token": 1.4320712089538574, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.42312753200531, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.42312753200531, "logits_per_char": -0.711563766002655, "num_chars": 2}, {"sum_logits": -1.357282280921936, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.357282280921936, "logits_per_char": -0.678641140460968, "num_chars": 2}, {"sum_logits": -1.5158038139343262, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.5158038139343262, "logits_per_char": -0.7579019069671631, "num_chars": 2}, {"sum_logits": -1.2872806787490845, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": true, "logits_per_token": -1.2872806787490845, "logits_per_char": -0.6436403393745422, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 133, "native_id": 133, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4573135375976562, "incorrect_loss_raw": 1.4390231370925903, "correct_loss_per_char": 0.7286567687988281, "incorrect_loss_per_char": 0.7195115685462952, "correct_loss_per_token": 1.4573135375976562, "incorrect_loss_per_token": 1.4390231370925903, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4573135375976562, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4573135375976562, "logits_per_char": -0.7286567687988281, "num_chars": 2}, {"sum_logits": -1.4946353435516357, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4946353435516357, "logits_per_char": -0.7473176717758179, "num_chars": 2}, {"sum_logits": -1.485732078552246, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.485732078552246, "logits_per_char": -0.742866039276123, "num_chars": 2}, {"sum_logits": -1.3367019891738892, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": true, "logits_per_token": -1.3367019891738892, "logits_per_char": -0.6683509945869446, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 134, "native_id": 134, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2492231130599976, "incorrect_loss_raw": 1.4474469025929768, "correct_loss_per_char": 0.6246115565299988, "incorrect_loss_per_char": 0.7237234512964884, "correct_loss_per_token": 1.2492231130599976, "incorrect_loss_per_token": 1.4474469025929768, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4972445964813232, "num_tokens": 1, "num_tokens_all": 498, "is_greedy": false, "logits_per_token": -1.4972445964813232, "logits_per_char": -0.7486222982406616, "num_chars": 2}, {"sum_logits": -1.3503644466400146, "num_tokens": 1, "num_tokens_all": 498, "is_greedy": false, "logits_per_token": -1.3503644466400146, "logits_per_char": -0.6751822233200073, "num_chars": 2}, {"sum_logits": -1.4947316646575928, "num_tokens": 1, "num_tokens_all": 498, "is_greedy": false, "logits_per_token": -1.4947316646575928, "logits_per_char": -0.7473658323287964, "num_chars": 2}, {"sum_logits": -1.2492231130599976, "num_tokens": 1, "num_tokens_all": 498, "is_greedy": true, "logits_per_token": -1.2492231130599976, "logits_per_char": -0.6246115565299988, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 135, "native_id": 135, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4982755184173584, "incorrect_loss_raw": 1.3817294041315715, "correct_loss_per_char": 0.7491377592086792, "incorrect_loss_per_char": 0.6908647020657858, "correct_loss_per_token": 1.4982755184173584, "incorrect_loss_per_token": 1.3817294041315715, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4982755184173584, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.4982755184173584, "logits_per_char": -0.7491377592086792, "num_chars": 2}, {"sum_logits": -1.6462600231170654, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.6462600231170654, "logits_per_char": -0.8231300115585327, "num_chars": 2}, {"sum_logits": -1.174777626991272, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": true, "logits_per_token": -1.174777626991272, "logits_per_char": -0.587388813495636, "num_chars": 2}, {"sum_logits": -1.324150562286377, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.324150562286377, "logits_per_char": -0.6620752811431885, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 136, "native_id": 136, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3809937238693237, "incorrect_loss_raw": 1.3934775193532307, "correct_loss_per_char": 0.6904968619346619, "incorrect_loss_per_char": 0.6967387596766154, "correct_loss_per_token": 1.3809937238693237, "incorrect_loss_per_token": 1.3934775193532307, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.38814377784729, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": false, "logits_per_token": -1.38814377784729, "logits_per_char": -0.694071888923645, "num_chars": 2}, {"sum_logits": -1.3725202083587646, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": true, "logits_per_token": -1.3725202083587646, "logits_per_char": -0.6862601041793823, "num_chars": 2}, {"sum_logits": -1.3809937238693237, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": false, "logits_per_token": -1.3809937238693237, "logits_per_char": -0.6904968619346619, "num_chars": 2}, {"sum_logits": -1.4197685718536377, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": false, "logits_per_token": -1.4197685718536377, "logits_per_char": -0.7098842859268188, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 137, "native_id": 137, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4540411233901978, "incorrect_loss_raw": 1.3790416717529297, "correct_loss_per_char": 0.7270205616950989, "incorrect_loss_per_char": 0.6895208358764648, "correct_loss_per_token": 1.4540411233901978, "incorrect_loss_per_token": 1.3790416717529297, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2304195165634155, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": true, "logits_per_token": -1.2304195165634155, "logits_per_char": -0.6152097582817078, "num_chars": 2}, {"sum_logits": -1.4540411233901978, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.4540411233901978, "logits_per_char": -0.7270205616950989, "num_chars": 2}, {"sum_logits": -1.5512620210647583, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.5512620210647583, "logits_per_char": -0.7756310105323792, "num_chars": 2}, {"sum_logits": -1.3554434776306152, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.3554434776306152, "logits_per_char": -0.6777217388153076, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 138, "native_id": 138, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.22114098072052, "incorrect_loss_raw": 1.4605447053909302, "correct_loss_per_char": 0.61057049036026, "incorrect_loss_per_char": 0.7302723526954651, "correct_loss_per_token": 1.22114098072052, "incorrect_loss_per_token": 1.4605447053909302, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5323090553283691, "num_tokens": 1, "num_tokens_all": 483, "is_greedy": false, "logits_per_token": -1.5323090553283691, "logits_per_char": -0.7661545276641846, "num_chars": 2}, {"sum_logits": -1.22114098072052, "num_tokens": 1, "num_tokens_all": 483, "is_greedy": true, "logits_per_token": -1.22114098072052, "logits_per_char": -0.61057049036026, "num_chars": 2}, {"sum_logits": -1.449199914932251, "num_tokens": 1, "num_tokens_all": 483, "is_greedy": false, "logits_per_token": -1.449199914932251, "logits_per_char": -0.7245999574661255, "num_chars": 2}, {"sum_logits": -1.4001251459121704, "num_tokens": 1, "num_tokens_all": 483, "is_greedy": false, "logits_per_token": -1.4001251459121704, "logits_per_char": -0.7000625729560852, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 139, "native_id": 139, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4431551694869995, "incorrect_loss_raw": 1.390837828318278, "correct_loss_per_char": 0.7215775847434998, "incorrect_loss_per_char": 0.695418914159139, "correct_loss_per_token": 1.4431551694869995, "incorrect_loss_per_token": 1.390837828318278, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1415601968765259, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": true, "logits_per_token": -1.1415601968765259, "logits_per_char": -0.5707800984382629, "num_chars": 2}, {"sum_logits": -1.453462839126587, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": false, "logits_per_token": -1.453462839126587, "logits_per_char": -0.7267314195632935, "num_chars": 2}, {"sum_logits": -1.4431551694869995, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": false, "logits_per_token": -1.4431551694869995, "logits_per_char": -0.7215775847434998, "num_chars": 2}, {"sum_logits": -1.5774904489517212, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": false, "logits_per_token": -1.5774904489517212, "logits_per_char": -0.7887452244758606, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 140, "native_id": 140, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8122776746749878, "incorrect_loss_raw": 1.7865512371063232, "correct_loss_per_char": 0.4061388373374939, "incorrect_loss_per_char": 0.8932756185531616, "correct_loss_per_token": 0.8122776746749878, "incorrect_loss_per_token": 1.7865512371063232, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8122776746749878, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": true, "logits_per_token": -0.8122776746749878, "logits_per_char": -0.4061388373374939, "num_chars": 2}, {"sum_logits": -1.648895263671875, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": false, "logits_per_token": -1.648895263671875, "logits_per_char": -0.8244476318359375, "num_chars": 2}, {"sum_logits": -1.6353905200958252, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": false, "logits_per_token": -1.6353905200958252, "logits_per_char": -0.8176952600479126, "num_chars": 2}, {"sum_logits": -2.0753679275512695, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": false, "logits_per_token": -2.0753679275512695, "logits_per_char": -1.0376839637756348, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 141, "native_id": 141, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.528014898300171, "incorrect_loss_raw": 1.3512379725774128, "correct_loss_per_char": 0.7640074491500854, "incorrect_loss_per_char": 0.6756189862887064, "correct_loss_per_token": 1.528014898300171, "incorrect_loss_per_token": 1.3512379725774128, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3878815174102783, "num_tokens": 1, "num_tokens_all": 547, "is_greedy": false, "logits_per_token": -1.3878815174102783, "logits_per_char": -0.6939407587051392, "num_chars": 2}, {"sum_logits": -1.4007376432418823, "num_tokens": 1, "num_tokens_all": 547, "is_greedy": false, "logits_per_token": -1.4007376432418823, "logits_per_char": -0.7003688216209412, "num_chars": 2}, {"sum_logits": -1.528014898300171, "num_tokens": 1, "num_tokens_all": 547, "is_greedy": false, "logits_per_token": -1.528014898300171, "logits_per_char": -0.7640074491500854, "num_chars": 2}, {"sum_logits": -1.2650947570800781, "num_tokens": 1, "num_tokens_all": 547, "is_greedy": true, "logits_per_token": -1.2650947570800781, "logits_per_char": -0.6325473785400391, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 142, "native_id": 142, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1471322774887085, "incorrect_loss_raw": 1.5180500348409016, "correct_loss_per_char": 0.5735661387443542, "incorrect_loss_per_char": 0.7590250174204508, "correct_loss_per_token": 1.1471322774887085, "incorrect_loss_per_token": 1.5180500348409016, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1471322774887085, "num_tokens": 1, "num_tokens_all": 517, "is_greedy": true, "logits_per_token": -1.1471322774887085, "logits_per_char": -0.5735661387443542, "num_chars": 2}, {"sum_logits": -1.6821155548095703, "num_tokens": 1, "num_tokens_all": 517, "is_greedy": false, "logits_per_token": -1.6821155548095703, "logits_per_char": -0.8410577774047852, "num_chars": 2}, {"sum_logits": -1.6633628606796265, "num_tokens": 1, "num_tokens_all": 517, "is_greedy": false, "logits_per_token": -1.6633628606796265, "logits_per_char": -0.8316814303398132, "num_chars": 2}, {"sum_logits": -1.2086716890335083, "num_tokens": 1, "num_tokens_all": 517, "is_greedy": false, "logits_per_token": -1.2086716890335083, "logits_per_char": -0.6043358445167542, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 143, "native_id": 143, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5271096229553223, "incorrect_loss_raw": 1.3534876108169556, "correct_loss_per_char": 0.7635548114776611, "incorrect_loss_per_char": 0.6767438054084778, "correct_loss_per_token": 1.5271096229553223, "incorrect_loss_per_token": 1.3534876108169556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3987528085708618, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.3987528085708618, "logits_per_char": -0.6993764042854309, "num_chars": 2}, {"sum_logits": -1.3935489654541016, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.3935489654541016, "logits_per_char": -0.6967744827270508, "num_chars": 2}, {"sum_logits": -1.5271096229553223, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.5271096229553223, "logits_per_char": -0.7635548114776611, "num_chars": 2}, {"sum_logits": -1.2681610584259033, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": true, "logits_per_token": -1.2681610584259033, "logits_per_char": -0.6340805292129517, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 144, "native_id": 144, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.351881742477417, "incorrect_loss_raw": 1.4070109128952026, "correct_loss_per_char": 0.6759408712387085, "incorrect_loss_per_char": 0.7035054564476013, "correct_loss_per_token": 1.351881742477417, "incorrect_loss_per_token": 1.4070109128952026, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.334950566291809, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": true, "logits_per_token": -1.334950566291809, "logits_per_char": -0.6674752831459045, "num_chars": 2}, {"sum_logits": -1.351881742477417, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.351881742477417, "logits_per_char": -0.6759408712387085, "num_chars": 2}, {"sum_logits": -1.3872473239898682, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.3872473239898682, "logits_per_char": -0.6936236619949341, "num_chars": 2}, {"sum_logits": -1.4988348484039307, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.4988348484039307, "logits_per_char": -0.7494174242019653, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 145, "native_id": 145, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3856544494628906, "incorrect_loss_raw": 1.3959529002507527, "correct_loss_per_char": 0.6928272247314453, "incorrect_loss_per_char": 0.6979764501253763, "correct_loss_per_token": 1.3856544494628906, "incorrect_loss_per_token": 1.3959529002507527, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2816723585128784, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": true, "logits_per_token": -1.2816723585128784, "logits_per_char": -0.6408361792564392, "num_chars": 2}, {"sum_logits": -1.4078563451766968, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": false, "logits_per_token": -1.4078563451766968, "logits_per_char": -0.7039281725883484, "num_chars": 2}, {"sum_logits": -1.498329997062683, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": false, "logits_per_token": -1.498329997062683, "logits_per_char": -0.7491649985313416, "num_chars": 2}, {"sum_logits": -1.3856544494628906, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": false, "logits_per_token": -1.3856544494628906, "logits_per_char": -0.6928272247314453, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 146, "native_id": 146, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5288783311843872, "incorrect_loss_raw": 1.3607079982757568, "correct_loss_per_char": 0.7644391655921936, "incorrect_loss_per_char": 0.6803539991378784, "correct_loss_per_token": 1.5288783311843872, "incorrect_loss_per_token": 1.3607079982757568, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2715990543365479, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": true, "logits_per_token": -1.2715990543365479, "logits_per_char": -0.6357995271682739, "num_chars": 2}, {"sum_logits": -1.5288783311843872, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": false, "logits_per_token": -1.5288783311843872, "logits_per_char": -0.7644391655921936, "num_chars": 2}, {"sum_logits": -1.3225523233413696, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": false, "logits_per_token": -1.3225523233413696, "logits_per_char": -0.6612761616706848, "num_chars": 2}, {"sum_logits": -1.487972617149353, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": false, "logits_per_token": -1.487972617149353, "logits_per_char": -0.7439863085746765, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 147, "native_id": 147, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4791910648345947, "incorrect_loss_raw": 1.373311718304952, "correct_loss_per_char": 0.7395955324172974, "incorrect_loss_per_char": 0.686655859152476, "correct_loss_per_token": 1.4791910648345947, "incorrect_loss_per_token": 1.373311718304952, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1843458414077759, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": true, "logits_per_token": -1.1843458414077759, "logits_per_char": -0.5921729207038879, "num_chars": 2}, {"sum_logits": -1.505616545677185, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.505616545677185, "logits_per_char": -0.7528082728385925, "num_chars": 2}, {"sum_logits": -1.429972767829895, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.429972767829895, "logits_per_char": -0.7149863839149475, "num_chars": 2}, {"sum_logits": -1.4791910648345947, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.4791910648345947, "logits_per_char": -0.7395955324172974, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 148, "native_id": 148, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3688076734542847, "incorrect_loss_raw": 1.4000028769175212, "correct_loss_per_char": 0.6844038367271423, "incorrect_loss_per_char": 0.7000014384587606, "correct_loss_per_token": 1.3688076734542847, "incorrect_loss_per_token": 1.4000028769175212, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3688076734542847, "num_tokens": 1, "num_tokens_all": 520, "is_greedy": true, "logits_per_token": -1.3688076734542847, "logits_per_char": -0.6844038367271423, "num_chars": 2}, {"sum_logits": -1.383797287940979, "num_tokens": 1, "num_tokens_all": 520, "is_greedy": false, "logits_per_token": -1.383797287940979, "logits_per_char": -0.6918986439704895, "num_chars": 2}, {"sum_logits": -1.4383130073547363, "num_tokens": 1, "num_tokens_all": 520, "is_greedy": false, "logits_per_token": -1.4383130073547363, "logits_per_char": -0.7191565036773682, "num_chars": 2}, {"sum_logits": -1.3778983354568481, "num_tokens": 1, "num_tokens_all": 520, "is_greedy": false, "logits_per_token": -1.3778983354568481, "logits_per_char": -0.6889491677284241, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 149, "native_id": 149, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3698687553405762, "incorrect_loss_raw": 1.4074780543645222, "correct_loss_per_char": 0.6849343776702881, "incorrect_loss_per_char": 0.7037390271822611, "correct_loss_per_token": 1.3698687553405762, "incorrect_loss_per_token": 1.4074780543645222, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2263498306274414, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": true, "logits_per_token": -1.2263498306274414, "logits_per_char": -0.6131749153137207, "num_chars": 2}, {"sum_logits": -1.3698687553405762, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.3698687553405762, "logits_per_char": -0.6849343776702881, "num_chars": 2}, {"sum_logits": -1.440962791442871, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.440962791442871, "logits_per_char": -0.7204813957214355, "num_chars": 2}, {"sum_logits": -1.5551215410232544, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.5551215410232544, "logits_per_char": -0.7775607705116272, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 150, "native_id": 150, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.482347846031189, "incorrect_loss_raw": 1.367419958114624, "correct_loss_per_char": 0.7411739230155945, "incorrect_loss_per_char": 0.683709979057312, "correct_loss_per_token": 1.482347846031189, "incorrect_loss_per_token": 1.367419958114624, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5015747547149658, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": false, "logits_per_token": -1.5015747547149658, "logits_per_char": -0.7507873773574829, "num_chars": 2}, {"sum_logits": -1.347455382347107, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": false, "logits_per_token": -1.347455382347107, "logits_per_char": -0.6737276911735535, "num_chars": 2}, {"sum_logits": -1.482347846031189, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": false, "logits_per_token": -1.482347846031189, "logits_per_char": -0.7411739230155945, "num_chars": 2}, {"sum_logits": -1.2532297372817993, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": true, "logits_per_token": -1.2532297372817993, "logits_per_char": -0.6266148686408997, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 151, "native_id": 151, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2234065532684326, "incorrect_loss_raw": 1.467342694600423, "correct_loss_per_char": 0.6117032766342163, "incorrect_loss_per_char": 0.7336713473002116, "correct_loss_per_token": 1.2234065532684326, "incorrect_loss_per_token": 1.467342694600423, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5024991035461426, "num_tokens": 1, "num_tokens_all": 541, "is_greedy": false, "logits_per_token": -1.5024991035461426, "logits_per_char": -0.7512495517730713, "num_chars": 2}, {"sum_logits": -1.5419061183929443, "num_tokens": 1, "num_tokens_all": 541, "is_greedy": false, "logits_per_token": -1.5419061183929443, "logits_per_char": -0.7709530591964722, "num_chars": 2}, {"sum_logits": -1.3576228618621826, "num_tokens": 1, "num_tokens_all": 541, "is_greedy": false, "logits_per_token": -1.3576228618621826, "logits_per_char": -0.6788114309310913, "num_chars": 2}, {"sum_logits": -1.2234065532684326, "num_tokens": 1, "num_tokens_all": 541, "is_greedy": true, "logits_per_token": -1.2234065532684326, "logits_per_char": -0.6117032766342163, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 152, "native_id": 152, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2218222618103027, "incorrect_loss_raw": 1.456200361251831, "correct_loss_per_char": 0.6109111309051514, "incorrect_loss_per_char": 0.7281001806259155, "correct_loss_per_token": 1.2218222618103027, "incorrect_loss_per_token": 1.456200361251831, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2218222618103027, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": true, "logits_per_token": -1.2218222618103027, "logits_per_char": -0.6109111309051514, "num_chars": 2}, {"sum_logits": -1.3894727230072021, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.3894727230072021, "logits_per_char": -0.6947363615036011, "num_chars": 2}, {"sum_logits": -1.4676272869110107, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.4676272869110107, "logits_per_char": -0.7338136434555054, "num_chars": 2}, {"sum_logits": -1.5115010738372803, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.5115010738372803, "logits_per_char": -0.7557505369186401, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 153, "native_id": 153, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3844611644744873, "incorrect_loss_raw": 1.3950168689092, "correct_loss_per_char": 0.6922305822372437, "incorrect_loss_per_char": 0.6975084344546, "correct_loss_per_token": 1.3844611644744873, "incorrect_loss_per_token": 1.3950168689092, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3589742183685303, "num_tokens": 1, "num_tokens_all": 513, "is_greedy": false, "logits_per_token": -1.3589742183685303, "logits_per_char": -0.6794871091842651, "num_chars": 2}, {"sum_logits": -1.3499218225479126, "num_tokens": 1, "num_tokens_all": 513, "is_greedy": true, "logits_per_token": -1.3499218225479126, "logits_per_char": -0.6749609112739563, "num_chars": 2}, {"sum_logits": -1.3844611644744873, "num_tokens": 1, "num_tokens_all": 513, "is_greedy": false, "logits_per_token": -1.3844611644744873, "logits_per_char": -0.6922305822372437, "num_chars": 2}, {"sum_logits": -1.4761545658111572, "num_tokens": 1, "num_tokens_all": 513, "is_greedy": false, "logits_per_token": -1.4761545658111572, "logits_per_char": -0.7380772829055786, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 154, "native_id": 154, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5255497694015503, "incorrect_loss_raw": 1.3550209204355876, "correct_loss_per_char": 0.7627748847007751, "incorrect_loss_per_char": 0.6775104602177938, "correct_loss_per_token": 1.5255497694015503, "incorrect_loss_per_token": 1.3550209204355876, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2485591173171997, "num_tokens": 1, "num_tokens_all": 456, "is_greedy": true, "logits_per_token": -1.2485591173171997, "logits_per_char": -0.6242795586585999, "num_chars": 2}, {"sum_logits": -1.4945887327194214, "num_tokens": 1, "num_tokens_all": 456, "is_greedy": false, "logits_per_token": -1.4945887327194214, "logits_per_char": -0.7472943663597107, "num_chars": 2}, {"sum_logits": -1.3219149112701416, "num_tokens": 1, "num_tokens_all": 456, "is_greedy": false, "logits_per_token": -1.3219149112701416, "logits_per_char": -0.6609574556350708, "num_chars": 2}, {"sum_logits": -1.5255497694015503, "num_tokens": 1, "num_tokens_all": 456, "is_greedy": false, "logits_per_token": -1.5255497694015503, "logits_per_char": -0.7627748847007751, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 155, "native_id": 155, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4124755859375, "incorrect_loss_raw": 1.4095298846562703, "correct_loss_per_char": 0.70623779296875, "incorrect_loss_per_char": 0.7047649423281351, "correct_loss_per_token": 1.4124755859375, "incorrect_loss_per_token": 1.4095298846562703, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4540801048278809, "num_tokens": 1, "num_tokens_all": 523, "is_greedy": false, "logits_per_token": -1.4540801048278809, "logits_per_char": -0.7270400524139404, "num_chars": 2}, {"sum_logits": -1.4124755859375, "num_tokens": 1, "num_tokens_all": 523, "is_greedy": false, "logits_per_token": -1.4124755859375, "logits_per_char": -0.70623779296875, "num_chars": 2}, {"sum_logits": -1.64347243309021, "num_tokens": 1, "num_tokens_all": 523, "is_greedy": false, "logits_per_token": -1.64347243309021, "logits_per_char": -0.821736216545105, "num_chars": 2}, {"sum_logits": -1.1310371160507202, "num_tokens": 1, "num_tokens_all": 523, "is_greedy": true, "logits_per_token": -1.1310371160507202, "logits_per_char": -0.5655185580253601, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 156, "native_id": 156, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5574414730072021, "incorrect_loss_raw": 1.3516608476638794, "correct_loss_per_char": 0.7787207365036011, "incorrect_loss_per_char": 0.6758304238319397, "correct_loss_per_token": 1.5574414730072021, "incorrect_loss_per_token": 1.3516608476638794, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1684938669204712, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": true, "logits_per_token": -1.1684938669204712, "logits_per_char": -0.5842469334602356, "num_chars": 2}, {"sum_logits": -1.5574414730072021, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.5574414730072021, "logits_per_char": -0.7787207365036011, "num_chars": 2}, {"sum_logits": -1.3974621295928955, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.3974621295928955, "logits_per_char": -0.6987310647964478, "num_chars": 2}, {"sum_logits": -1.4890265464782715, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.4890265464782715, "logits_per_char": -0.7445132732391357, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 157, "native_id": 157, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2464172840118408, "incorrect_loss_raw": 1.4775516192118328, "correct_loss_per_char": 0.6232086420059204, "incorrect_loss_per_char": 0.7387758096059164, "correct_loss_per_token": 1.2464172840118408, "incorrect_loss_per_token": 1.4775516192118328, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2464172840118408, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": true, "logits_per_token": -1.2464172840118408, "logits_per_char": -0.6232086420059204, "num_chars": 2}, {"sum_logits": -1.3980776071548462, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": false, "logits_per_token": -1.3980776071548462, "logits_per_char": -0.6990388035774231, "num_chars": 2}, {"sum_logits": -1.526690125465393, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": false, "logits_per_token": -1.526690125465393, "logits_per_char": -0.7633450627326965, "num_chars": 2}, {"sum_logits": -1.5078871250152588, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": false, "logits_per_token": -1.5078871250152588, "logits_per_char": -0.7539435625076294, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 158, "native_id": 158, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5168625116348267, "incorrect_loss_raw": 1.3564890225728352, "correct_loss_per_char": 0.7584312558174133, "incorrect_loss_per_char": 0.6782445112864176, "correct_loss_per_token": 1.5168625116348267, "incorrect_loss_per_token": 1.3564890225728352, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4186840057373047, "num_tokens": 1, "num_tokens_all": 509, "is_greedy": false, "logits_per_token": -1.4186840057373047, "logits_per_char": -0.7093420028686523, "num_chars": 2}, {"sum_logits": -1.299961805343628, "num_tokens": 1, "num_tokens_all": 509, "is_greedy": true, "logits_per_token": -1.299961805343628, "logits_per_char": -0.649980902671814, "num_chars": 2}, {"sum_logits": -1.5168625116348267, "num_tokens": 1, "num_tokens_all": 509, "is_greedy": false, "logits_per_token": -1.5168625116348267, "logits_per_char": -0.7584312558174133, "num_chars": 2}, {"sum_logits": -1.3508212566375732, "num_tokens": 1, "num_tokens_all": 509, "is_greedy": false, "logits_per_token": -1.3508212566375732, "logits_per_char": -0.6754106283187866, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 159, "native_id": 159, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4592453241348267, "incorrect_loss_raw": 1.3707321087519329, "correct_loss_per_char": 0.7296226620674133, "incorrect_loss_per_char": 0.6853660543759664, "correct_loss_per_token": 1.4592453241348267, "incorrect_loss_per_token": 1.3707321087519329, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2651338577270508, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": true, "logits_per_token": -1.2651338577270508, "logits_per_char": -0.6325669288635254, "num_chars": 2}, {"sum_logits": -1.4300193786621094, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.4300193786621094, "logits_per_char": -0.7150096893310547, "num_chars": 2}, {"sum_logits": -1.4592453241348267, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.4592453241348267, "logits_per_char": -0.7296226620674133, "num_chars": 2}, {"sum_logits": -1.4170430898666382, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.4170430898666382, "logits_per_char": -0.7085215449333191, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 160, "native_id": 160, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3581881523132324, "incorrect_loss_raw": 1.4096895456314087, "correct_loss_per_char": 0.6790940761566162, "incorrect_loss_per_char": 0.7048447728157043, "correct_loss_per_token": 1.3581881523132324, "incorrect_loss_per_token": 1.4096895456314087, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4640488624572754, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.4640488624572754, "logits_per_char": -0.7320244312286377, "num_chars": 2}, {"sum_logits": -1.4552593231201172, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.4552593231201172, "logits_per_char": -0.7276296615600586, "num_chars": 2}, {"sum_logits": -1.3097604513168335, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": true, "logits_per_token": -1.3097604513168335, "logits_per_char": -0.6548802256584167, "num_chars": 2}, {"sum_logits": -1.3581881523132324, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.3581881523132324, "logits_per_char": -0.6790940761566162, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 161, "native_id": 161, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3840985298156738, "incorrect_loss_raw": 1.4004029432932537, "correct_loss_per_char": 0.6920492649078369, "incorrect_loss_per_char": 0.7002014716466268, "correct_loss_per_token": 1.3840985298156738, "incorrect_loss_per_token": 1.4004029432932537, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4804823398590088, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.4804823398590088, "logits_per_char": -0.7402411699295044, "num_chars": 2}, {"sum_logits": -1.317429780960083, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": true, "logits_per_token": -1.317429780960083, "logits_per_char": -0.6587148904800415, "num_chars": 2}, {"sum_logits": -1.3840985298156738, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.3840985298156738, "logits_per_char": -0.6920492649078369, "num_chars": 2}, {"sum_logits": -1.403296709060669, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.403296709060669, "logits_per_char": -0.7016483545303345, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 162, "native_id": 162, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4732398986816406, "incorrect_loss_raw": 1.3747048377990723, "correct_loss_per_char": 0.7366199493408203, "incorrect_loss_per_char": 0.6873524188995361, "correct_loss_per_token": 1.4732398986816406, "incorrect_loss_per_token": 1.3747048377990723, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.260348916053772, "num_tokens": 1, "num_tokens_all": 1364, "is_greedy": true, "logits_per_token": -1.260348916053772, "logits_per_char": -0.630174458026886, "num_chars": 2}, {"sum_logits": -1.3456422090530396, "num_tokens": 1, "num_tokens_all": 1364, "is_greedy": false, "logits_per_token": -1.3456422090530396, "logits_per_char": -0.6728211045265198, "num_chars": 2}, {"sum_logits": -1.5181233882904053, "num_tokens": 1, "num_tokens_all": 1364, "is_greedy": false, "logits_per_token": -1.5181233882904053, "logits_per_char": -0.7590616941452026, "num_chars": 2}, {"sum_logits": -1.4732398986816406, "num_tokens": 1, "num_tokens_all": 1364, "is_greedy": false, "logits_per_token": -1.4732398986816406, "logits_per_char": -0.7366199493408203, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 163, "native_id": 163, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4828941822052002, "incorrect_loss_raw": 1.3779438734054565, "correct_loss_per_char": 0.7414470911026001, "incorrect_loss_per_char": 0.6889719367027283, "correct_loss_per_token": 1.4828941822052002, "incorrect_loss_per_token": 1.3779438734054565, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1631540060043335, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": true, "logits_per_token": -1.1631540060043335, "logits_per_char": -0.5815770030021667, "num_chars": 2}, {"sum_logits": -1.38066565990448, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.38066565990448, "logits_per_char": -0.69033282995224, "num_chars": 2}, {"sum_logits": -1.5900119543075562, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.5900119543075562, "logits_per_char": -0.7950059771537781, "num_chars": 2}, {"sum_logits": -1.4828941822052002, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.4828941822052002, "logits_per_char": -0.7414470911026001, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 164, "native_id": 164, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1876778602600098, "incorrect_loss_raw": 1.4708882570266724, "correct_loss_per_char": 0.5938389301300049, "incorrect_loss_per_char": 0.7354441285133362, "correct_loss_per_token": 1.1876778602600098, "incorrect_loss_per_token": 1.4708882570266724, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5311979055404663, "num_tokens": 1, "num_tokens_all": 535, "is_greedy": false, "logits_per_token": -1.5311979055404663, "logits_per_char": -0.7655989527702332, "num_chars": 2}, {"sum_logits": -1.374039649963379, "num_tokens": 1, "num_tokens_all": 535, "is_greedy": false, "logits_per_token": -1.374039649963379, "logits_per_char": -0.6870198249816895, "num_chars": 2}, {"sum_logits": -1.5074272155761719, "num_tokens": 1, "num_tokens_all": 535, "is_greedy": false, "logits_per_token": -1.5074272155761719, "logits_per_char": -0.7537136077880859, "num_chars": 2}, {"sum_logits": -1.1876778602600098, "num_tokens": 1, "num_tokens_all": 535, "is_greedy": true, "logits_per_token": -1.1876778602600098, "logits_per_char": -0.5938389301300049, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 165, "native_id": 165, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4645987749099731, "incorrect_loss_raw": 1.378304163614909, "correct_loss_per_char": 0.7322993874549866, "incorrect_loss_per_char": 0.6891520818074545, "correct_loss_per_token": 1.4645987749099731, "incorrect_loss_per_token": 1.378304163614909, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1928822994232178, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": true, "logits_per_token": -1.1928822994232178, "logits_per_char": -0.5964411497116089, "num_chars": 2}, {"sum_logits": -1.4645987749099731, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.4645987749099731, "logits_per_char": -0.7322993874549866, "num_chars": 2}, {"sum_logits": -1.4568737745285034, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.4568737745285034, "logits_per_char": -0.7284368872642517, "num_chars": 2}, {"sum_logits": -1.4851564168930054, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.4851564168930054, "logits_per_char": -0.7425782084465027, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 166, "native_id": 166, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5651791095733643, "incorrect_loss_raw": 1.3469722668329875, "correct_loss_per_char": 0.7825895547866821, "incorrect_loss_per_char": 0.6734861334164938, "correct_loss_per_token": 1.5651791095733643, "incorrect_loss_per_token": 1.3469722668329875, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2505710124969482, "num_tokens": 1, "num_tokens_all": 479, "is_greedy": true, "logits_per_token": -1.2505710124969482, "logits_per_char": -0.6252855062484741, "num_chars": 2}, {"sum_logits": -1.5651791095733643, "num_tokens": 1, "num_tokens_all": 479, "is_greedy": false, "logits_per_token": -1.5651791095733643, "logits_per_char": -0.7825895547866821, "num_chars": 2}, {"sum_logits": -1.4648226499557495, "num_tokens": 1, "num_tokens_all": 479, "is_greedy": false, "logits_per_token": -1.4648226499557495, "logits_per_char": -0.7324113249778748, "num_chars": 2}, {"sum_logits": -1.3255231380462646, "num_tokens": 1, "num_tokens_all": 479, "is_greedy": false, "logits_per_token": -1.3255231380462646, "logits_per_char": -0.6627615690231323, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 167, "native_id": 167, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.334636926651001, "incorrect_loss_raw": 1.4138034582138062, "correct_loss_per_char": 0.6673184633255005, "incorrect_loss_per_char": 0.7069017291069031, "correct_loss_per_token": 1.334636926651001, "incorrect_loss_per_token": 1.4138034582138062, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2984750270843506, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": true, "logits_per_token": -1.2984750270843506, "logits_per_char": -0.6492375135421753, "num_chars": 2}, {"sum_logits": -1.4796313047409058, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": false, "logits_per_token": -1.4796313047409058, "logits_per_char": -0.7398156523704529, "num_chars": 2}, {"sum_logits": -1.463304042816162, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": false, "logits_per_token": -1.463304042816162, "logits_per_char": -0.731652021408081, "num_chars": 2}, {"sum_logits": -1.334636926651001, "num_tokens": 1, "num_tokens_all": 475, "is_greedy": false, "logits_per_token": -1.334636926651001, "logits_per_char": -0.6673184633255005, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 168, "native_id": 168, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4549880027770996, "incorrect_loss_raw": 1.3763134479522705, "correct_loss_per_char": 0.7274940013885498, "incorrect_loss_per_char": 0.6881567239761353, "correct_loss_per_token": 1.4549880027770996, "incorrect_loss_per_token": 1.3763134479522705, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4549880027770996, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.4549880027770996, "logits_per_char": -0.7274940013885498, "num_chars": 2}, {"sum_logits": -1.446290373802185, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.446290373802185, "logits_per_char": -0.7231451869010925, "num_chars": 2}, {"sum_logits": -1.4562170505523682, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.4562170505523682, "logits_per_char": -0.7281085252761841, "num_chars": 2}, {"sum_logits": -1.2264329195022583, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": true, "logits_per_token": -1.2264329195022583, "logits_per_char": -0.6132164597511292, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 169, "native_id": 169, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3960846662521362, "incorrect_loss_raw": 1.3914277950922649, "correct_loss_per_char": 0.6980423331260681, "incorrect_loss_per_char": 0.6957138975461324, "correct_loss_per_token": 1.3960846662521362, "incorrect_loss_per_token": 1.3914277950922649, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3960846662521362, "num_tokens": 1, "num_tokens_all": 509, "is_greedy": false, "logits_per_token": -1.3960846662521362, "logits_per_char": -0.6980423331260681, "num_chars": 2}, {"sum_logits": -1.3492118120193481, "num_tokens": 1, "num_tokens_all": 509, "is_greedy": true, "logits_per_token": -1.3492118120193481, "logits_per_char": -0.6746059060096741, "num_chars": 2}, {"sum_logits": -1.4575114250183105, "num_tokens": 1, "num_tokens_all": 509, "is_greedy": false, "logits_per_token": -1.4575114250183105, "logits_per_char": -0.7287557125091553, "num_chars": 2}, {"sum_logits": -1.3675601482391357, "num_tokens": 1, "num_tokens_all": 509, "is_greedy": false, "logits_per_token": -1.3675601482391357, "logits_per_char": -0.6837800741195679, "num_chars": 2}], "label": 0, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 170, "native_id": 170, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4748952388763428, "incorrect_loss_raw": 1.3712986310323079, "correct_loss_per_char": 0.7374476194381714, "incorrect_loss_per_char": 0.6856493155161539, "correct_loss_per_token": 1.4748952388763428, "incorrect_loss_per_token": 1.3712986310323079, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3288787603378296, "num_tokens": 1, "num_tokens_all": 492, "is_greedy": true, "logits_per_token": -1.3288787603378296, "logits_per_char": -0.6644393801689148, "num_chars": 2}, {"sum_logits": -1.3427354097366333, "num_tokens": 1, "num_tokens_all": 492, "is_greedy": false, "logits_per_token": -1.3427354097366333, "logits_per_char": -0.6713677048683167, "num_chars": 2}, {"sum_logits": -1.442281723022461, "num_tokens": 1, "num_tokens_all": 492, "is_greedy": false, "logits_per_token": -1.442281723022461, "logits_per_char": -0.7211408615112305, "num_chars": 2}, {"sum_logits": -1.4748952388763428, "num_tokens": 1, "num_tokens_all": 492, "is_greedy": false, "logits_per_token": -1.4748952388763428, "logits_per_char": -0.7374476194381714, "num_chars": 2}], "label": 3, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 171, "native_id": 171, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7096575498580933, "incorrect_loss_raw": 1.346509615580241, "correct_loss_per_char": 0.8548287749290466, "incorrect_loss_per_char": 0.6732548077901205, "correct_loss_per_token": 1.7096575498580933, "incorrect_loss_per_token": 1.346509615580241, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.042893886566162, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": true, "logits_per_token": -1.042893886566162, "logits_per_char": -0.521446943283081, "num_chars": 2}, {"sum_logits": -1.2988396883010864, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": false, "logits_per_token": -1.2988396883010864, "logits_per_char": -0.6494198441505432, "num_chars": 2}, {"sum_logits": -1.7096575498580933, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": false, "logits_per_token": -1.7096575498580933, "logits_per_char": -0.8548287749290466, "num_chars": 2}, {"sum_logits": -1.6977952718734741, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": false, "logits_per_token": -1.6977952718734741, "logits_per_char": -0.8488976359367371, "num_chars": 2}], "label": 2, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 172, "native_id": 172, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3361907005310059, "incorrect_loss_raw": 1.4187125364939372, "correct_loss_per_char": 0.6680953502655029, "incorrect_loss_per_char": 0.7093562682469686, "correct_loss_per_token": 1.3361907005310059, "incorrect_loss_per_token": 1.4187125364939372, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2977237701416016, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": true, "logits_per_token": -1.2977237701416016, "logits_per_char": -0.6488618850708008, "num_chars": 2}, {"sum_logits": -1.3361907005310059, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": false, "logits_per_token": -1.3361907005310059, "logits_per_char": -0.6680953502655029, "num_chars": 2}, {"sum_logits": -1.5219085216522217, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": false, "logits_per_token": -1.5219085216522217, "logits_per_char": -0.7609542608261108, "num_chars": 2}, {"sum_logits": -1.4365053176879883, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": false, "logits_per_token": -1.4365053176879883, "logits_per_char": -0.7182526588439941, "num_chars": 2}], "label": 1, "task_hash": "9b3c95bd3bbac8771701a5abc3ab28ba", "model_hash": "03418cf8091a9882619950ffb07429a5"}