{"doc_id": 0, "native_id": 29519, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 82.44107055664062, "incorrect_loss_raw": 116.86351521809895, "correct_loss_per_char": 0.35534944205448543, "incorrect_loss_per_char": 0.4861315026934611, "correct_loss_per_token": 1.616491579541973, "incorrect_loss_per_token": 2.447800822744647, "correct_loss_uncond": -30.13153076171875, "incorrect_loss_uncond": -16.47253672281901}, "model_output": [{"sum_logits": -142.19569396972656, "num_tokens": 59, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -147.9378204345703, "logits_per_token": -2.4100965079614673, "logits_per_char": -0.4739856465657552, "num_chars": 300}, {"sum_logits": -89.8965072631836, "num_tokens": 44, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -117.05452728271484, "logits_per_token": -2.043102437799627, "logits_per_char": -0.42807860601515996, "num_chars": 210}, {"sum_logits": -118.49834442138672, "num_tokens": 41, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -135.01580810546875, "logits_per_token": -2.890203522472847, "logits_per_char": -0.5563302554994681, "num_chars": 213}, {"sum_logits": -82.44107055664062, "num_tokens": 51, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -112.57260131835938, "logits_per_token": -1.616491579541973, "logits_per_char": -0.35534944205448543, "num_chars": 232}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 1, "native_id": 29688, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 17.27979850769043, "incorrect_loss_raw": 33.32557042439779, "correct_loss_per_char": 0.45473153967606394, "incorrect_loss_per_char": 0.6860311501703901, "correct_loss_per_token": 1.9199776119656033, "incorrect_loss_per_token": 3.04966883033213, "correct_loss_uncond": -24.036996841430664, "incorrect_loss_uncond": -16.176260630289715}, "model_output": [{"sum_logits": -34.49020004272461, "num_tokens": 10, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -48.49597930908203, "logits_per_token": -3.449020004272461, "logits_per_char": -0.6898040008544922, "num_chars": 50}, {"sum_logits": -32.046592712402344, "num_tokens": 11, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -43.556724548339844, "logits_per_token": -2.913326610218395, "logits_per_char": -0.7816242124976182, "num_chars": 41}, {"sum_logits": -33.439918518066406, "num_tokens": 12, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -56.452789306640625, "logits_per_token": -2.7866598765055337, "logits_per_char": -0.5866652371590597, "num_chars": 57}, {"sum_logits": -17.27979850769043, "num_tokens": 9, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -41.316795349121094, "logits_per_token": -1.9199776119656033, "logits_per_char": -0.45473153967606394, "num_chars": 38}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 2, "native_id": 1755, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.482193470001221, "incorrect_loss_raw": 53.87667973836263, "correct_loss_per_char": 0.19643010515155215, "incorrect_loss_per_char": 0.9595174218103084, "correct_loss_per_token": 0.6482193470001221, "incorrect_loss_per_token": 3.8021852793516935, "correct_loss_uncond": -28.980891704559326, "incorrect_loss_uncond": -14.466120402018229}, "model_output": [{"sum_logits": -60.491275787353516, "num_tokens": 16, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -79.39585876464844, "logits_per_token": -3.7807047367095947, "logits_per_char": -0.9756657385057018, "num_chars": 62}, {"sum_logits": -6.482193470001221, "num_tokens": 10, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -35.46308517456055, "logits_per_token": -0.6482193470001221, "logits_per_char": -0.19643010515155215, "num_chars": 33}, {"sum_logits": -69.07546997070312, "num_tokens": 17, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -84.04166412353516, "logits_per_token": -4.063262939453125, "logits_per_char": -0.9867924281529018, "num_chars": 70}, {"sum_logits": -32.06329345703125, "num_tokens": 9, "num_tokens_all": 395, "is_greedy": false, "sum_logits_uncond": -41.590877532958984, "logits_per_token": -3.562588161892361, "logits_per_char": -0.9160940987723214, "num_chars": 35}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 3, "native_id": 22230, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 19.35793685913086, "incorrect_loss_raw": 47.501827239990234, "correct_loss_per_char": 0.4721448014422161, "incorrect_loss_per_char": 1.127662433041565, "correct_loss_per_token": 1.759812441739169, "incorrect_loss_per_token": 4.270206778040735, "correct_loss_uncond": -23.83692169189453, "incorrect_loss_uncond": -14.662123362223307}, "model_output": [{"sum_logits": -19.35793685913086, "num_tokens": 11, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -43.19485855102539, "logits_per_token": -1.759812441739169, "logits_per_char": -0.4721448014422161, "num_chars": 41}, {"sum_logits": -49.7335319519043, "num_tokens": 13, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -69.31004333496094, "logits_per_token": -3.825656303992638, "logits_per_char": -1.1303075443614612, "num_chars": 44}, {"sum_logits": -44.02339553833008, "num_tokens": 8, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -47.262298583984375, "logits_per_token": -5.50292444229126, "logits_per_char": -1.257811301095145, "num_chars": 35}, {"sum_logits": -48.74855422973633, "num_tokens": 14, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -69.91950988769531, "logits_per_token": -3.482039587838309, "logits_per_char": -0.9948684536680883, "num_chars": 49}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 4, "native_id": 46509, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 82.19734191894531, "incorrect_loss_raw": 114.31886037190755, "correct_loss_per_char": 0.48068621005231177, "incorrect_loss_per_char": 0.6254817260341397, "correct_loss_per_token": 2.8343911006532867, "incorrect_loss_per_token": 2.9320200746398903, "correct_loss_uncond": -46.4405517578125, "incorrect_loss_uncond": -23.75194040934245}, "model_output": [{"sum_logits": -149.47909545898438, "num_tokens": 55, "num_tokens_all": 498, "is_greedy": false, "sum_logits_uncond": -184.62306213378906, "logits_per_token": -2.717801735617898, "logits_per_char": -0.5376945879819582, "num_chars": 278}, {"sum_logits": -110.12818908691406, "num_tokens": 31, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -126.73207092285156, "logits_per_token": -3.552522228610131, "logits_per_char": -0.7391153629994233, "num_chars": 149}, {"sum_logits": -82.19734191894531, "num_tokens": 29, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -128.6378936767578, "logits_per_token": -2.8343911006532867, "logits_per_char": -0.48068621005231177, "num_chars": 171}, {"sum_logits": -83.34929656982422, "num_tokens": 33, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -102.85726928710938, "logits_per_token": -2.525736259691643, "logits_per_char": -0.5996352271210376, "num_chars": 139}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 5, "native_id": 7996, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 80.47647857666016, "incorrect_loss_raw": 41.62765630086263, "correct_loss_per_char": 0.5917388130636776, "incorrect_loss_per_char": 0.8129828262374513, "correct_loss_per_token": 2.51488995552063, "incorrect_loss_per_token": 3.341474717527955, "correct_loss_uncond": -24.71678924560547, "incorrect_loss_uncond": -14.521907806396484}, "model_output": [{"sum_logits": -37.72502517700195, "num_tokens": 13, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -51.569740295410156, "logits_per_token": -2.901925013615535, "logits_per_char": -0.6859095486727628, "num_chars": 55}, {"sum_logits": -80.47647857666016, "num_tokens": 32, "num_tokens_all": 434, "is_greedy": false, "sum_logits_uncond": -105.19326782226562, "logits_per_token": -2.51488995552063, "logits_per_char": -0.5917388130636776, "num_chars": 136}, {"sum_logits": -57.38530731201172, "num_tokens": 20, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -72.8441162109375, "logits_per_token": -2.8692653656005858, "logits_per_char": -0.7263962950887559, "num_chars": 79}, {"sum_logits": -29.77263641357422, "num_tokens": 7, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -44.03483581542969, "logits_per_token": -4.253233773367746, "logits_per_char": -1.026642634950835, "num_chars": 29}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 6, "native_id": 20300, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 17.609710693359375, "incorrect_loss_raw": 30.53115431467692, "correct_loss_per_char": 0.9268268785978618, "incorrect_loss_per_char": 0.6613340552024262, "correct_loss_per_token": 3.521942138671875, "incorrect_loss_per_token": 2.647119156267277, "correct_loss_uncond": -9.55194091796875, "incorrect_loss_uncond": -24.485267957051594}, "model_output": [{"sum_logits": -19.780963897705078, "num_tokens": 7, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -39.860816955566406, "logits_per_token": -2.8258519853864397, "logits_per_char": -0.7912385559082031, "num_chars": 25}, {"sum_logits": -10.133299827575684, "num_tokens": 6, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -32.55173873901367, "logits_per_token": -1.6888833045959473, "logits_per_char": -0.44057825337285583, "num_chars": 23}, {"sum_logits": -17.609710693359375, "num_tokens": 5, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -27.161651611328125, "logits_per_token": -3.521942138671875, "logits_per_char": -0.9268268785978618, "num_chars": 19}, {"sum_logits": -61.67919921875, "num_tokens": 18, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -92.63671112060547, "logits_per_token": -3.4266221788194446, "logits_per_char": -0.7521853563262195, "num_chars": 82}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 7, "native_id": 24469, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 30.640605926513672, "incorrect_loss_raw": 42.49734115600586, "correct_loss_per_char": 0.4642516049471768, "incorrect_loss_per_char": 0.5930438834770208, "correct_loss_per_token": 2.188614709036691, "incorrect_loss_per_token": 2.7374700273786274, "correct_loss_uncond": -32.2872200012207, "incorrect_loss_uncond": -25.38565190633138}, "model_output": [{"sum_logits": -41.72760772705078, "num_tokens": 20, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -67.02387237548828, "logits_per_token": -2.086380386352539, "logits_per_char": -0.4535609535548998, "num_chars": 92}, {"sum_logits": -43.6112174987793, "num_tokens": 14, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -68.56132507324219, "logits_per_token": -3.1150869641985213, "logits_per_char": -0.6230173928397043, "num_chars": 70}, {"sum_logits": -30.640605926513672, "num_tokens": 14, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -62.927825927734375, "logits_per_token": -2.188614709036691, "logits_per_char": -0.4642516049471768, "num_chars": 66}, {"sum_logits": -42.1531982421875, "num_tokens": 14, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -68.06378173828125, "logits_per_token": -3.0109427315848216, "logits_per_char": -0.7025533040364583, "num_chars": 60}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 8, "native_id": 19077, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 107.12481689453125, "incorrect_loss_raw": 123.11056264241536, "correct_loss_per_char": 0.4302201481708082, "incorrect_loss_per_char": 0.6359696853416046, "correct_loss_per_token": 1.9477239435369318, "incorrect_loss_per_token": 2.717010237793515, "correct_loss_uncond": -24.411361694335938, "incorrect_loss_uncond": -15.974876403808594}, "model_output": [{"sum_logits": -201.85772705078125, "num_tokens": 63, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -214.25636291503906, "logits_per_token": -3.2040909055679565, "logits_per_char": -0.66182861328125, "num_chars": 305}, {"sum_logits": -107.12481689453125, "num_tokens": 55, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -131.5361785888672, "logits_per_token": -1.9477239435369318, "logits_per_char": -0.4302201481708082, "num_chars": 249}, {"sum_logits": -99.29785919189453, "num_tokens": 41, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -116.58973693847656, "logits_per_token": -2.4218990046803546, "logits_per_char": -0.6206116199493408, "num_chars": 160}, {"sum_logits": -68.17610168457031, "num_tokens": 27, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -86.41021728515625, "logits_per_token": -2.525040803132234, "logits_per_char": -0.625468822794223, "num_chars": 109}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 9, "native_id": 4929, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 75.46485900878906, "incorrect_loss_raw": 75.30235926310222, "correct_loss_per_char": 0.6037188720703125, "incorrect_loss_per_char": 0.6889062937277215, "correct_loss_per_token": 2.6022365175444504, "incorrect_loss_per_token": 2.9897434894359054, "correct_loss_uncond": -29.546142578125, "incorrect_loss_uncond": -18.230175018310547}, "model_output": [{"sum_logits": -105.10609436035156, "num_tokens": 23, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -108.6274642944336, "logits_per_token": -4.569830189580503, "logits_per_char": -1.0835679830964078, "num_chars": 97}, {"sum_logits": -55.90512466430664, "num_tokens": 25, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -73.41207885742188, "logits_per_token": -2.2362049865722655, "logits_per_char": -0.512891051966116, "num_chars": 109}, {"sum_logits": -64.89585876464844, "num_tokens": 30, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -98.55805969238281, "logits_per_token": -2.1631952921549478, "logits_per_char": -0.47025984612064087, "num_chars": 138}, {"sum_logits": -75.46485900878906, "num_tokens": 29, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -105.01100158691406, "logits_per_token": -2.6022365175444504, "logits_per_char": -0.6037188720703125, "num_chars": 125}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 10, "native_id": 4008, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 98.85967254638672, "incorrect_loss_raw": 58.64485549926758, "correct_loss_per_char": 0.6771210448382652, "incorrect_loss_per_char": 0.693693035503329, "correct_loss_per_token": 3.089364767074585, "incorrect_loss_per_token": 3.09328341657708, "correct_loss_uncond": -34.027137756347656, "incorrect_loss_uncond": -24.588581085205078}, "model_output": [{"sum_logits": -95.16801452636719, "num_tokens": 35, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -134.54339599609375, "logits_per_token": -2.719086129324777, "logits_per_char": -0.5838528498550134, "num_chars": 163}, {"sum_logits": -21.53650665283203, "num_tokens": 7, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -35.15953826904297, "logits_per_token": -3.076643807547433, "logits_per_char": -0.717883555094401, "num_chars": 30}, {"sum_logits": -59.230045318603516, "num_tokens": 17, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -79.99737548828125, "logits_per_token": -3.48412031285903, "logits_per_char": -0.7793427015605726, "num_chars": 76}, {"sum_logits": -98.85967254638672, "num_tokens": 32, "num_tokens_all": 433, "is_greedy": false, "sum_logits_uncond": -132.88681030273438, "logits_per_token": -3.089364767074585, "logits_per_char": -0.6771210448382652, "num_chars": 146}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 11, "native_id": 7060, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.621476173400879, "incorrect_loss_raw": 36.47395388285319, "correct_loss_per_char": 0.581073808670044, "incorrect_loss_per_char": 0.8131907363327183, "correct_loss_per_token": 2.324295234680176, "incorrect_loss_per_token": 3.837503433227539, "correct_loss_uncond": -18.860554695129395, "incorrect_loss_uncond": -12.447128931681315}, "model_output": [{"sum_logits": -32.18955612182617, "num_tokens": 11, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -48.541175842285156, "logits_per_token": -2.9263232838023794, "logits_per_char": -0.6569297167719627, "num_chars": 49}, {"sum_logits": -11.621476173400879, "num_tokens": 5, "num_tokens_all": 392, "is_greedy": false, "sum_logits_uncond": -30.482030868530273, "logits_per_token": -2.324295234680176, "logits_per_char": -0.581073808670044, "num_chars": 20}, {"sum_logits": -47.104740142822266, "num_tokens": 11, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -62.95536422729492, "logits_per_token": -4.282249103892934, "logits_per_char": -0.8411560739789691, "num_chars": 56}, {"sum_logits": -30.127565383911133, "num_tokens": 7, "num_tokens_all": 394, "is_greedy": false, "sum_logits_uncond": -35.26670837402344, "logits_per_token": -4.303937911987305, "logits_per_char": -0.9414864182472229, "num_chars": 32}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 12, "native_id": 3623, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 38.28396987915039, "incorrect_loss_raw": 66.47479756673177, "correct_loss_per_char": 0.46687768145305353, "incorrect_loss_per_char": 0.6886540998492325, "correct_loss_per_token": 2.3927481174468994, "incorrect_loss_per_token": 3.2122836340041387, "correct_loss_uncond": -38.58992385864258, "incorrect_loss_uncond": -26.465375264485676}, "model_output": [{"sum_logits": -76.81626892089844, "num_tokens": 21, "num_tokens_all": 434, "is_greedy": false, "sum_logits_uncond": -103.99964904785156, "logits_per_token": -3.6579175676618303, "logits_per_char": -0.8085923044305099, "num_chars": 95}, {"sum_logits": -58.989524841308594, "num_tokens": 20, "num_tokens_all": 433, "is_greedy": false, "sum_logits_uncond": -80.09005737304688, "logits_per_token": -2.9494762420654297, "logits_per_char": -0.6628036499023438, "num_chars": 89}, {"sum_logits": -38.28396987915039, "num_tokens": 16, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -76.87389373779297, "logits_per_token": -2.3927481174468994, "logits_per_char": -0.46687768145305353, "num_chars": 82}, {"sum_logits": -63.61859893798828, "num_tokens": 21, "num_tokens_all": 434, "is_greedy": false, "sum_logits_uncond": -94.7308120727539, "logits_per_token": -3.0294570922851562, "logits_per_char": -0.5945663452148438, "num_chars": 107}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 13, "native_id": 18097, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 89.71109771728516, "incorrect_loss_raw": 121.41436258951823, "correct_loss_per_char": 0.6900853670560396, "incorrect_loss_per_char": 0.7394969620411821, "correct_loss_per_token": 2.803471803665161, "incorrect_loss_per_token": 3.29303490645396, "correct_loss_uncond": -20.033775329589844, "incorrect_loss_uncond": -18.925760904947918}, "model_output": [{"sum_logits": -144.83799743652344, "num_tokens": 48, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -162.87339782714844, "logits_per_token": -3.0174582799275718, "logits_per_char": -0.7134876721011006, "num_chars": 203}, {"sum_logits": -89.71109771728516, "num_tokens": 32, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -109.744873046875, "logits_per_token": -2.803471803665161, "logits_per_char": -0.6900853670560396, "num_chars": 130}, {"sum_logits": -80.56715393066406, "num_tokens": 29, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -95.84181213378906, "logits_per_token": -2.7781777217470367, "logits_per_char": -0.6150164422188097, "num_chars": 131}, {"sum_logits": -138.8379364013672, "num_tokens": 34, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -162.30516052246094, "logits_per_token": -4.08346871768727, "logits_per_char": -0.8899867718036358, "num_chars": 156}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 14, "native_id": 34712, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 102.08633422851562, "incorrect_loss_raw": 75.69716898600261, "correct_loss_per_char": 0.537296495939556, "incorrect_loss_per_char": 0.5771714843647343, "correct_loss_per_token": 2.6175983135516825, "incorrect_loss_per_token": 2.5636547765590243, "correct_loss_uncond": -30.410140991210938, "incorrect_loss_uncond": -22.023167928059895}, "model_output": [{"sum_logits": -102.08633422851562, "num_tokens": 39, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -132.49647521972656, "logits_per_token": -2.6175983135516825, "logits_per_char": -0.537296495939556, "num_chars": 190}, {"sum_logits": -126.84789276123047, "num_tokens": 38, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -151.5490264892578, "logits_per_token": -3.338102441085012, "logits_per_char": -0.7126286110181487, "num_chars": 178}, {"sum_logits": -43.1026496887207, "num_tokens": 20, "num_tokens_all": 433, "is_greedy": false, "sum_logits_uncond": -69.55381774902344, "logits_per_token": -2.1551324844360353, "logits_per_char": -0.48980283737182617, "num_chars": 88}, {"sum_logits": -57.14096450805664, "num_tokens": 26, "num_tokens_all": 439, "is_greedy": false, "sum_logits_uncond": -72.05816650390625, "logits_per_token": -2.1977294041560245, "logits_per_char": -0.5290830047042282, "num_chars": 108}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 15, "native_id": 32285, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 75.30406951904297, "incorrect_loss_raw": 92.38731384277344, "correct_loss_per_char": 0.690863023110486, "incorrect_loss_per_char": 0.6836457789550963, "correct_loss_per_token": 2.8963103661170373, "incorrect_loss_per_token": 2.732653418761043, "correct_loss_uncond": -26.444564819335938, "incorrect_loss_uncond": -16.977340698242188}, "model_output": [{"sum_logits": -75.30406951904297, "num_tokens": 26, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -101.7486343383789, "logits_per_token": -2.8963103661170373, "logits_per_char": -0.690863023110486, "num_chars": 109}, {"sum_logits": -78.88667297363281, "num_tokens": 31, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -87.95980834960938, "logits_per_token": -2.54473138624622, "logits_per_char": -0.6573889414469402, "num_chars": 120}, {"sum_logits": -133.65440368652344, "num_tokens": 41, "num_tokens_all": 498, "is_greedy": false, "sum_logits_uncond": -161.28530883789062, "logits_per_token": -3.2598635045493523, "logits_per_char": -0.8459139473830597, "num_chars": 158}, {"sum_logits": -64.62086486816406, "num_tokens": 27, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -78.84884643554688, "logits_per_token": -2.3933653654875577, "logits_per_char": -0.5476344480352887, "num_chars": 118}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 16, "native_id": 37475, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 19.768808364868164, "incorrect_loss_raw": 18.577405611673992, "correct_loss_per_char": 0.4492910992015492, "incorrect_loss_per_char": 0.5910630148329657, "correct_loss_per_token": 2.4711010456085205, "incorrect_loss_per_token": 2.457766281233894, "correct_loss_uncond": -23.590158462524414, "incorrect_loss_uncond": -21.504624366760254}, "model_output": [{"sum_logits": -17.80434226989746, "num_tokens": 6, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -39.29850387573242, "logits_per_token": -2.9673903783162436, "logits_per_char": -0.6847823949960562, "num_chars": 26}, {"sum_logits": -13.802412986755371, "num_tokens": 8, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -37.99455261230469, "logits_per_token": -1.7253016233444214, "logits_per_char": -0.418254938992587, "num_chars": 33}, {"sum_logits": -19.768808364868164, "num_tokens": 8, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -43.35896682739258, "logits_per_token": -2.4711010456085205, "logits_per_char": -0.4492910992015492, "num_chars": 44}, {"sum_logits": -24.12546157836914, "num_tokens": 9, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -42.953033447265625, "logits_per_token": -2.6806068420410156, "logits_per_char": -0.6701517105102539, "num_chars": 36}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 17, "native_id": 22475, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 37.63185119628906, "incorrect_loss_raw": 30.76700719197591, "correct_loss_per_char": 0.5973309713696676, "incorrect_loss_per_char": 0.8267727798773757, "correct_loss_per_token": 2.894757784329928, "incorrect_loss_per_token": 3.985895538330078, "correct_loss_uncond": -21.373985290527344, "incorrect_loss_uncond": -17.833507537841797}, "model_output": [{"sum_logits": -37.63185119628906, "num_tokens": 13, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -59.005836486816406, "logits_per_token": -2.894757784329928, "logits_per_char": -0.5973309713696676, "num_chars": 63}, {"sum_logits": -30.977886199951172, "num_tokens": 6, "num_tokens_all": 394, "is_greedy": false, "sum_logits_uncond": -38.67931365966797, "logits_per_token": -5.162981033325195, "logits_per_char": -1.0682029724121094, "num_chars": 29}, {"sum_logits": -34.82745361328125, "num_tokens": 10, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -56.78413391113281, "logits_per_token": -3.482745361328125, "logits_per_char": -0.8099407817042151, "num_chars": 43}, {"sum_logits": -26.495681762695312, "num_tokens": 8, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -50.338096618652344, "logits_per_token": -3.311960220336914, "logits_per_char": -0.6021745855158026, "num_chars": 44}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 18, "native_id": 45869, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 31.226125717163086, "incorrect_loss_raw": 45.574143727620445, "correct_loss_per_char": 0.5677477403120561, "incorrect_loss_per_char": 0.9673211820541866, "correct_loss_per_token": 2.8387387015602807, "incorrect_loss_per_token": 3.839054939074394, "correct_loss_uncond": -16.2877140045166, "incorrect_loss_uncond": -17.202768961588543}, "model_output": [{"sum_logits": -31.226125717163086, "num_tokens": 11, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -47.51383972167969, "logits_per_token": -2.8387387015602807, "logits_per_char": -0.5677477403120561, "num_chars": 55}, {"sum_logits": -37.580142974853516, "num_tokens": 8, "num_tokens_all": 395, "is_greedy": false, "sum_logits_uncond": -51.44755172729492, "logits_per_token": -4.6975178718566895, "logits_per_char": -1.3421479633876257, "num_chars": 28}, {"sum_logits": -43.21227264404297, "num_tokens": 13, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -66.07799530029297, "logits_per_token": -3.32402097261869, "logits_per_char": -0.6859090895879836, "num_chars": 63}, {"sum_logits": -55.930015563964844, "num_tokens": 16, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -70.80519104003906, "logits_per_token": -3.4956259727478027, "logits_per_char": -0.8739064931869507, "num_chars": 64}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 19, "native_id": 28965, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 98.01995086669922, "incorrect_loss_raw": 156.30268351236978, "correct_loss_per_char": 0.7779361179896763, "incorrect_loss_per_char": 0.7543471999395462, "correct_loss_per_token": 3.2673316955566407, "incorrect_loss_per_token": 3.6758407984024437, "correct_loss_uncond": -29.899871826171875, "incorrect_loss_uncond": -15.912127176920572}, "model_output": [{"sum_logits": -240.05616760253906, "num_tokens": 65, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -259.0322265625, "logits_per_token": -3.6931718092698316, "logits_per_char": -0.7794031415666852, "num_chars": 308}, {"sum_logits": -64.52214050292969, "num_tokens": 20, "num_tokens_all": 431, "is_greedy": false, "sum_logits_uncond": -73.99152374267578, "logits_per_token": -3.2261070251464843, "logits_per_char": -0.6452214050292969, "num_chars": 100}, {"sum_logits": -164.32974243164062, "num_tokens": 40, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -183.6206817626953, "logits_per_token": -4.108243560791015, "logits_per_char": -0.8384170532226562, "num_chars": 196}, {"sum_logits": -98.01995086669922, "num_tokens": 30, "num_tokens_all": 441, "is_greedy": false, "sum_logits_uncond": -127.9198226928711, "logits_per_token": -3.2673316955566407, "logits_per_char": -0.7779361179896763, "num_chars": 126}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 20, "native_id": 3049, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 56.04977035522461, "incorrect_loss_raw": 46.305468241373696, "correct_loss_per_char": 0.709490763990185, "incorrect_loss_per_char": 0.6156458601875133, "correct_loss_per_token": 4.003555025373187, "incorrect_loss_per_token": 3.0535187191433377, "correct_loss_uncond": -19.18075180053711, "incorrect_loss_uncond": -22.79784393310547}, "model_output": [{"sum_logits": -32.7244873046875, "num_tokens": 14, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -55.657997131347656, "logits_per_token": -2.33746337890625, "logits_per_char": -0.4545067681206597, "num_chars": 72}, {"sum_logits": -33.24750518798828, "num_tokens": 12, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -53.685523986816406, "logits_per_token": -2.770625432332357, "logits_per_char": -0.5635170370845471, "num_chars": 59}, {"sum_logits": -72.94441223144531, "num_tokens": 18, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -97.96641540527344, "logits_per_token": -4.052467346191406, "logits_per_char": -0.8289137753573331, "num_chars": 88}, {"sum_logits": -56.04977035522461, "num_tokens": 14, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -75.23052215576172, "logits_per_token": -4.003555025373187, "logits_per_char": -0.709490763990185, "num_chars": 79}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 21, "native_id": 36821, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 100.2749252319336, "incorrect_loss_raw": 137.40631612141928, "correct_loss_per_char": 0.651135878129439, "incorrect_loss_per_char": 0.8509751085096527, "correct_loss_per_token": 2.8649978637695312, "incorrect_loss_per_token": 3.4163894799390455, "correct_loss_uncond": -22.661712646484375, "incorrect_loss_uncond": -24.46929931640625}, "model_output": [{"sum_logits": -181.43026733398438, "num_tokens": 54, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -210.939697265625, "logits_per_token": -3.359819765444155, "logits_per_char": -0.9209658240303775, "num_chars": 197}, {"sum_logits": -113.82273864746094, "num_tokens": 33, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -137.22581481933594, "logits_per_token": -3.4491738984079072, "logits_per_char": -0.8308229098354813, "num_chars": 137}, {"sum_logits": -116.9659423828125, "num_tokens": 34, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -137.46133422851562, "logits_per_token": -3.4401747759650734, "logits_per_char": -0.8011365916630994, "num_chars": 146}, {"sum_logits": -100.2749252319336, "num_tokens": 35, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -122.93663787841797, "logits_per_token": -2.8649978637695312, "logits_per_char": -0.651135878129439, "num_chars": 154}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 22, "native_id": 34993, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 86.20411682128906, "incorrect_loss_raw": 132.38372294108072, "correct_loss_per_char": 0.4537058780067845, "incorrect_loss_per_char": 0.7012426046598748, "correct_loss_per_token": 2.004746902820676, "incorrect_loss_per_token": 2.706080991035682, "correct_loss_uncond": -23.478759765625, "incorrect_loss_uncond": -19.676966349283855}, "model_output": [{"sum_logits": -129.8015899658203, "num_tokens": 59, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -147.21664428710938, "logits_per_token": -2.2000269485732256, "logits_per_char": -0.5341629216700424, "num_chars": 243}, {"sum_logits": -86.20411682128906, "num_tokens": 43, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -109.68287658691406, "logits_per_token": -2.004746902820676, "logits_per_char": -0.4537058780067845, "num_chars": 190}, {"sum_logits": -156.06735229492188, "num_tokens": 59, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -181.5590057373047, "logits_per_token": -2.6452093609308793, "logits_per_char": -0.6344201312801703, "num_chars": 246}, {"sum_logits": -111.2822265625, "num_tokens": 34, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -127.40641784667969, "logits_per_token": -3.273006663602941, "logits_per_char": -0.9351447610294118, "num_chars": 119}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 23, "native_id": 6498, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 81.2359619140625, "incorrect_loss_raw": 117.52601623535156, "correct_loss_per_char": 0.4538321894640363, "incorrect_loss_per_char": 0.632123672287462, "correct_loss_per_token": 1.9813649247332317, "incorrect_loss_per_token": 3.1520659013227985, "correct_loss_uncond": -21.59881591796875, "incorrect_loss_uncond": -23.08203887939453}, "model_output": [{"sum_logits": -78.673828125, "num_tokens": 30, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -111.18205261230469, "logits_per_token": -2.6224609375, "logits_per_char": -0.5043194110576923, "num_chars": 156}, {"sum_logits": -81.2359619140625, "num_tokens": 41, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -102.83477783203125, "logits_per_token": -1.9813649247332317, "logits_per_char": -0.4538321894640363, "num_chars": 179}, {"sum_logits": -65.39332580566406, "num_tokens": 22, "num_tokens_all": 441, "is_greedy": false, "sum_logits_uncond": -87.16458892822266, "logits_per_token": -2.9724239002574575, "logits_per_char": -0.6169181679779628, "num_chars": 106}, {"sum_logits": -208.51089477539062, "num_tokens": 54, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -223.47752380371094, "logits_per_token": -3.8613128662109375, "logits_per_char": -0.7751334378267309, "num_chars": 269}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 24, "native_id": 15115, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 42.509883880615234, "incorrect_loss_raw": 49.03297233581543, "correct_loss_per_char": 0.5313735485076905, "incorrect_loss_per_char": 0.655434418915692, "correct_loss_per_token": 2.125494194030762, "incorrect_loss_per_token": 2.6122417813255674, "correct_loss_uncond": -31.686481475830078, "incorrect_loss_uncond": -22.92557716369629}, "model_output": [{"sum_logits": -42.509883880615234, "num_tokens": 20, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -74.19636535644531, "logits_per_token": -2.125494194030762, "logits_per_char": -0.5313735485076905, "num_chars": 80}, {"sum_logits": -34.63312530517578, "num_tokens": 14, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -61.04974365234375, "logits_per_token": -2.4737946646554128, "logits_per_char": -0.577218755086263, "num_chars": 60}, {"sum_logits": -23.408098220825195, "num_tokens": 13, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -50.43910217285156, "logits_per_token": -1.8006229400634766, "logits_per_char": -0.4416622305816075, "num_chars": 53}, {"sum_logits": -89.05769348144531, "num_tokens": 25, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -104.38680267333984, "logits_per_token": -3.5623077392578124, "logits_per_char": -0.9474222710792054, "num_chars": 94}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 25, "native_id": 41644, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 69.5885009765625, "incorrect_loss_raw": 106.79284922281902, "correct_loss_per_char": 0.5042644998301631, "incorrect_loss_per_char": 0.5878867183887168, "correct_loss_per_token": 2.5773518880208335, "incorrect_loss_per_token": 2.942677492409933, "correct_loss_uncond": -13.411422729492188, "incorrect_loss_uncond": -14.544665018717447}, "model_output": [{"sum_logits": -69.5885009765625, "num_tokens": 27, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -82.99992370605469, "logits_per_token": -2.5773518880208335, "logits_per_char": -0.5042644998301631, "num_chars": 138}, {"sum_logits": -68.96663665771484, "num_tokens": 25, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -81.92752075195312, "logits_per_token": -2.7586654663085937, "logits_per_char": -0.5305125896747296, "num_chars": 130}, {"sum_logits": -114.20243835449219, "num_tokens": 37, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -131.0643310546875, "logits_per_token": -3.0865523879592485, "logits_per_char": -0.5979185254161895, "num_chars": 191}, {"sum_logits": -137.20947265625, "num_tokens": 46, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -151.02069091796875, "logits_per_token": -2.9828146229619565, "logits_per_char": -0.6352290400752315, "num_chars": 216}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 26, "native_id": 32493, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 41.94902801513672, "incorrect_loss_raw": 125.4673080444336, "correct_loss_per_char": 0.4510648173670615, "incorrect_loss_per_char": 0.7166667796792713, "correct_loss_per_token": 1.9067740006880327, "incorrect_loss_per_token": 3.2443235902111938, "correct_loss_uncond": -30.827728271484375, "incorrect_loss_uncond": -18.46869150797526}, "model_output": [{"sum_logits": -172.09405517578125, "num_tokens": 45, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -185.50140380859375, "logits_per_token": -3.8243123372395833, "logits_per_char": -0.8041778279242114, "num_chars": 214}, {"sum_logits": -73.25408172607422, "num_tokens": 25, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -94.43408203125, "logits_per_token": -2.9301632690429686, "logits_per_char": -0.605405634099787, "num_chars": 121}, {"sum_logits": -131.0537872314453, "num_tokens": 44, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -151.8725128173828, "logits_per_token": -2.97849516435103, "logits_per_char": -0.7404168770138153, "num_chars": 177}, {"sum_logits": -41.94902801513672, "num_tokens": 22, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -72.7767562866211, "logits_per_token": -1.9067740006880327, "logits_per_char": -0.4510648173670615, "num_chars": 93}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 27, "native_id": 16051, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 14.242087364196777, "incorrect_loss_raw": 53.695643742879234, "correct_loss_per_char": 0.4315784049756599, "incorrect_loss_per_char": 0.704054034044877, "correct_loss_per_token": 1.7802609205245972, "incorrect_loss_per_token": 3.0389030383534408, "correct_loss_uncond": -30.11684513092041, "incorrect_loss_uncond": -24.837917963663738}, "model_output": [{"sum_logits": -101.84420013427734, "num_tokens": 20, "num_tokens_all": 431, "is_greedy": false, "sum_logits_uncond": -119.78811645507812, "logits_per_token": -5.092210006713867, "logits_per_char": -1.0499402075698696, "num_chars": 97}, {"sum_logits": -35.56144714355469, "num_tokens": 19, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -67.97396087646484, "logits_per_token": -1.8716551128186678, "logits_per_char": -0.4390302116488233, "num_chars": 81}, {"sum_logits": -23.681283950805664, "num_tokens": 11, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -47.83860778808594, "logits_per_token": -2.1528439955277876, "logits_per_char": -0.6231916829159385, "num_chars": 38}, {"sum_logits": -14.242087364196777, "num_tokens": 8, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -44.35893249511719, "logits_per_token": -1.7802609205245972, "logits_per_char": -0.4315784049756599, "num_chars": 33}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 28, "native_id": 49083, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 126.24391174316406, "incorrect_loss_raw": 101.96336364746094, "correct_loss_per_char": 0.4950741636986826, "incorrect_loss_per_char": 0.5965807560810626, "correct_loss_per_token": 2.8054202609592016, "incorrect_loss_per_token": 2.8645036293368036, "correct_loss_uncond": -34.07170104980469, "incorrect_loss_uncond": -23.054718017578125}, "model_output": [{"sum_logits": -73.22666931152344, "num_tokens": 29, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -93.49513244628906, "logits_per_token": -2.5250575624663254, "logits_per_char": -0.5811640421549479, "num_chars": 126}, {"sum_logits": -121.60533142089844, "num_tokens": 42, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -144.14999389648438, "logits_per_token": -2.8953650338309154, "logits_per_char": -0.6050016488601913, "num_chars": 201}, {"sum_logits": -126.24391174316406, "num_tokens": 45, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -160.31561279296875, "logits_per_token": -2.8054202609592016, "logits_per_char": -0.4950741636986826, "num_chars": 255}, {"sum_logits": -111.05809020996094, "num_tokens": 35, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -137.40911865234375, "logits_per_token": -3.1730882917131695, "logits_per_char": -0.6035765772280486, "num_chars": 184}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 29, "native_id": 21710, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 54.02104187011719, "incorrect_loss_raw": 54.09217198689779, "correct_loss_per_char": 0.5936378227485405, "incorrect_loss_per_char": 0.8595774568270098, "correct_loss_per_token": 3.0011689927842884, "incorrect_loss_per_token": 3.4861504509335473, "correct_loss_uncond": -30.245880126953125, "incorrect_loss_uncond": -15.30374526977539}, "model_output": [{"sum_logits": -51.02413558959961, "num_tokens": 14, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -70.09559631347656, "logits_per_token": -3.6445811135428294, "logits_per_char": -0.6713702051263106, "num_chars": 76}, {"sum_logits": -34.974334716796875, "num_tokens": 10, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -46.727088928222656, "logits_per_token": -3.4974334716796873, "logits_per_char": -1.0598283247514204, "num_chars": 33}, {"sum_logits": -76.27804565429688, "num_tokens": 23, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -91.36506652832031, "logits_per_token": -3.316436767578125, "logits_per_char": -0.8475338406032986, "num_chars": 90}, {"sum_logits": -54.02104187011719, "num_tokens": 18, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -84.26692199707031, "logits_per_token": -3.0011689927842884, "logits_per_char": -0.5936378227485405, "num_chars": 91}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 30, "native_id": 38297, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 55.97454833984375, "incorrect_loss_raw": 96.26733144124348, "correct_loss_per_char": 0.4997727530343192, "incorrect_loss_per_char": 0.5351292125347276, "correct_loss_per_token": 2.7987274169921874, "incorrect_loss_per_token": 2.4687321618439584, "correct_loss_uncond": -23.893898010253906, "incorrect_loss_uncond": -30.503000895182293}, "model_output": [{"sum_logits": -96.12741088867188, "num_tokens": 34, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -126.5818099975586, "logits_per_token": -2.8272767908432903, "logits_per_char": -0.5430927168851518, "num_chars": 177}, {"sum_logits": -99.40526580810547, "num_tokens": 50, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -129.42791748046875, "logits_per_token": -1.9881053161621094, "logits_per_char": -0.49702632904052735, "num_chars": 200}, {"sum_logits": -55.97454833984375, "num_tokens": 20, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -79.86844635009766, "logits_per_token": -2.7987274169921874, "logits_per_char": -0.4997727530343192, "num_chars": 112}, {"sum_logits": -93.26931762695312, "num_tokens": 36, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -124.30126953125, "logits_per_token": -2.590814378526476, "logits_per_char": -0.5652685916785037, "num_chars": 165}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 31, "native_id": 46128, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 42.006141662597656, "incorrect_loss_raw": 123.15913645426433, "correct_loss_per_char": 0.39258076320184726, "incorrect_loss_per_char": 0.6554818701969404, "correct_loss_per_token": 2.100307083129883, "incorrect_loss_per_token": 3.1779494207958847, "correct_loss_uncond": -44.152252197265625, "incorrect_loss_uncond": -25.26428476969401}, "model_output": [{"sum_logits": -137.91506958007812, "num_tokens": 43, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -169.3096160888672, "logits_per_token": -3.2073271995367008, "logits_per_char": -0.6505427810381044, "num_chars": 212}, {"sum_logits": -145.56832885742188, "num_tokens": 40, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -165.16085815429688, "logits_per_token": -3.639208221435547, "logits_per_char": -0.7784402612696357, "num_chars": 187}, {"sum_logits": -42.006141662597656, "num_tokens": 20, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -86.15839385986328, "logits_per_token": -2.100307083129883, "logits_per_char": -0.39258076320184726, "num_chars": 107}, {"sum_logits": -85.99401092529297, "num_tokens": 32, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -110.79978942871094, "logits_per_token": -2.6873128414154053, "logits_per_char": -0.537462568283081, "num_chars": 160}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 32, "native_id": 10607, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 22.294723510742188, "incorrect_loss_raw": 44.60842514038086, "correct_loss_per_char": 0.5184819421102834, "incorrect_loss_per_char": 0.9239613674715815, "correct_loss_per_token": 2.4771915011935763, "incorrect_loss_per_token": 3.750402846531561, "correct_loss_uncond": -25.793567657470703, "incorrect_loss_uncond": -15.966196695963541}, "model_output": [{"sum_logits": -24.333171844482422, "num_tokens": 6, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -35.77069091796875, "logits_per_token": -4.05552864074707, "logits_per_char": -0.9012285868326823, "num_chars": 27}, {"sum_logits": -22.294723510742188, "num_tokens": 9, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -48.08829116821289, "logits_per_token": -2.4771915011935763, "logits_per_char": -0.5184819421102834, "num_chars": 43}, {"sum_logits": -62.819278717041016, "num_tokens": 19, "num_tokens_all": 424, "is_greedy": false, "sum_logits_uncond": -90.77901458740234, "logits_per_token": -3.306277827212685, "logits_per_char": -0.7852409839630127, "num_chars": 80}, {"sum_logits": -46.67282485961914, "num_tokens": 12, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -55.17416000366211, "logits_per_token": -3.8894020716349282, "logits_per_char": -1.0854145316190498, "num_chars": 43}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 33, "native_id": 8919, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 85.87931823730469, "incorrect_loss_raw": 127.90266927083333, "correct_loss_per_char": 0.7403389503215921, "incorrect_loss_per_char": 0.7300385214009409, "correct_loss_per_token": 3.733883401621943, "incorrect_loss_per_token": 3.1733475238826507, "correct_loss_uncond": -16.188934326171875, "incorrect_loss_uncond": -14.341771443684896}, "model_output": [{"sum_logits": -127.66001892089844, "num_tokens": 43, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -136.37704467773438, "logits_per_token": -2.9688376493232194, "logits_per_char": -0.7053039719386655, "num_chars": 181}, {"sum_logits": -85.87931823730469, "num_tokens": 23, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -102.06825256347656, "logits_per_token": -3.733883401621943, "logits_per_char": -0.7403389503215921, "num_chars": 116}, {"sum_logits": -114.00395202636719, "num_tokens": 38, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -132.44032287597656, "logits_per_token": -3.0001040006938733, "logits_per_char": -0.717005987587215, "num_chars": 159}, {"sum_logits": -142.04403686523438, "num_tokens": 40, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -157.91595458984375, "logits_per_token": -3.5511009216308596, "logits_per_char": -0.7678056046769426, "num_chars": 185}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 34, "native_id": 43449, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 57.3451042175293, "incorrect_loss_raw": 131.22632853190103, "correct_loss_per_char": 0.49012909587631875, "incorrect_loss_per_char": 0.5302974951185301, "correct_loss_per_token": 2.048039436340332, "incorrect_loss_per_token": 2.6422952400157658, "correct_loss_uncond": -19.727100372314453, "incorrect_loss_uncond": -25.939280192057293}, "model_output": [{"sum_logits": -57.3451042175293, "num_tokens": 28, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -77.07220458984375, "logits_per_token": -2.048039436340332, "logits_per_char": -0.49012909587631875, "num_chars": 117}, {"sum_logits": -122.09442138671875, "num_tokens": 48, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -150.06617736816406, "logits_per_token": -2.543633778889974, "logits_per_char": -0.49430939832679655, "num_chars": 247}, {"sum_logits": -123.52032470703125, "num_tokens": 51, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -146.75914001464844, "logits_per_token": -2.4219671511182597, "logits_per_char": -0.4443177147734937, "num_chars": 278}, {"sum_logits": -148.06423950195312, "num_tokens": 50, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -174.6715087890625, "logits_per_token": -2.9612847900390626, "logits_per_char": -0.6522653722553001, "num_chars": 227}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 35, "native_id": 14876, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 13.788455963134766, "incorrect_loss_raw": 55.44438616434733, "correct_loss_per_char": 0.22980759938557943, "incorrect_loss_per_char": 0.8116509049261748, "correct_loss_per_token": 0.9848897116524833, "incorrect_loss_per_token": 3.6925515268363203, "correct_loss_uncond": -32.27078628540039, "incorrect_loss_uncond": -18.723804473876953}, "model_output": [{"sum_logits": -81.00989532470703, "num_tokens": 20, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -95.06462097167969, "logits_per_token": -4.050494766235351, "logits_per_char": -0.9419755270314771, "num_chars": 86}, {"sum_logits": -71.09890747070312, "num_tokens": 17, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -96.41122436523438, "logits_per_token": -4.182288674747243, "logits_per_char": -0.8464155651274181, "num_chars": 84}, {"sum_logits": -13.788455963134766, "num_tokens": 14, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -46.059242248535156, "logits_per_token": -0.9848897116524833, "logits_per_char": -0.22980759938557943, "num_chars": 60}, {"sum_logits": -14.224355697631836, "num_tokens": 5, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -31.02872657775879, "logits_per_token": -2.8448711395263673, "logits_per_char": -0.6465616226196289, "num_chars": 22}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 36, "native_id": 12887, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 48.75580978393555, "incorrect_loss_raw": 138.81621551513672, "correct_loss_per_char": 0.54781808745995, "incorrect_loss_per_char": 0.6793093038788923, "correct_loss_per_token": 2.3217052278064547, "incorrect_loss_per_token": 2.939533769345348, "correct_loss_uncond": -26.55777359008789, "incorrect_loss_uncond": -16.923978169759113}, "model_output": [{"sum_logits": -113.2765121459961, "num_tokens": 35, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -128.12411499023438, "logits_per_token": -3.236471775599888, "logits_per_char": -0.7355617671817928, "num_chars": 154}, {"sum_logits": -136.62635803222656, "num_tokens": 56, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -152.61465454101562, "logits_per_token": -2.439756393432617, "logits_per_char": -0.5813887575839428, "num_chars": 235}, {"sum_logits": -48.75580978393555, "num_tokens": 21, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -75.31358337402344, "logits_per_token": -2.3217052278064547, "logits_per_char": -0.54781808745995, "num_chars": 89}, {"sum_logits": -166.5457763671875, "num_tokens": 53, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -186.4818115234375, "logits_per_token": -3.1423731390035377, "logits_per_char": -0.7209773868709416, "num_chars": 231}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 37, "native_id": 34268, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 90.38654327392578, "incorrect_loss_raw": 85.4454345703125, "correct_loss_per_char": 0.5049527557202558, "incorrect_loss_per_char": 0.6579276983033421, "correct_loss_per_token": 2.3176036736904044, "incorrect_loss_per_token": 2.940573197736338, "correct_loss_uncond": -27.137069702148438, "incorrect_loss_uncond": -23.08374277750651}, "model_output": [{"sum_logits": -89.11241912841797, "num_tokens": 26, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -109.50384521484375, "logits_per_token": -3.4274007357083836, "logits_per_char": -0.7128993530273438, "num_chars": 125}, {"sum_logits": -90.38654327392578, "num_tokens": 39, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -117.52361297607422, "logits_per_token": -2.3176036736904044, "logits_per_char": -0.5049527557202558, "num_chars": 179}, {"sum_logits": -92.892333984375, "num_tokens": 31, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -108.19207000732422, "logits_per_token": -2.996526902721774, "logits_per_char": -0.636248862906678, "num_chars": 146}, {"sum_logits": -74.33155059814453, "num_tokens": 31, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -107.89161682128906, "logits_per_token": -2.3977919547788558, "logits_per_char": -0.6246348789760044, "num_chars": 119}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 38, "native_id": 19186, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 188.4366455078125, "incorrect_loss_raw": 180.0181427001953, "correct_loss_per_char": 0.6431284829618175, "incorrect_loss_per_char": 0.6422382029149275, "correct_loss_per_token": 3.3649400983537947, "incorrect_loss_per_token": 3.2609623980321003, "correct_loss_uncond": -34.20930480957031, "incorrect_loss_uncond": -17.02679697672526}, "model_output": [{"sum_logits": -188.4366455078125, "num_tokens": 56, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -222.6459503173828, "logits_per_token": -3.3649400983537947, "logits_per_char": -0.6431284829618175, "num_chars": 293}, {"sum_logits": -235.71713256835938, "num_tokens": 63, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -249.90560913085938, "logits_per_token": -3.7415417867993552, "logits_per_char": -0.7297744042364067, "num_chars": 323}, {"sum_logits": -195.80282592773438, "num_tokens": 63, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -219.23565673828125, "logits_per_token": -3.1079813639322915, "logits_per_char": -0.6196291959738429, "num_chars": 316}, {"sum_logits": -108.53446960449219, "num_tokens": 37, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -121.9935531616211, "logits_per_token": -2.9333640433646537, "logits_per_char": -0.5773110085345329, "num_chars": 188}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 39, "native_id": 8510, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 72.55391693115234, "incorrect_loss_raw": 117.67668151855469, "correct_loss_per_char": 0.6717955271402994, "incorrect_loss_per_char": 0.7652224507045604, "correct_loss_per_token": 2.9021566772460936, "incorrect_loss_per_token": 3.683060544376694, "correct_loss_uncond": -25.27350616455078, "incorrect_loss_uncond": -12.498527526855469}, "model_output": [{"sum_logits": -109.34451293945312, "num_tokens": 40, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -126.12163543701172, "logits_per_token": -2.733612823486328, "logits_per_char": -0.6212756417014382, "num_chars": 176}, {"sum_logits": -72.55391693115234, "num_tokens": 25, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -97.82742309570312, "logits_per_token": -2.9021566772460936, "logits_per_char": -0.6717955271402994, "num_chars": 108}, {"sum_logits": -105.12727355957031, "num_tokens": 23, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -102.16781616210938, "logits_per_token": -4.570751024329144, "logits_per_char": -0.8688204426410769, "num_chars": 121}, {"sum_logits": -138.55825805664062, "num_tokens": 37, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -162.23617553710938, "logits_per_token": -3.7448177853146114, "logits_per_char": -0.8055712677711664, "num_chars": 172}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 40, "native_id": 6423, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 51.679222106933594, "incorrect_loss_raw": 67.72361246744792, "correct_loss_per_char": 0.3270836842210987, "incorrect_loss_per_char": 0.40774169305973557, "correct_loss_per_token": 1.4765492030552456, "incorrect_loss_per_token": 1.9044996508845575, "correct_loss_uncond": -23.487693786621094, "incorrect_loss_uncond": -16.773351033528645}, "model_output": [{"sum_logits": -84.77606201171875, "num_tokens": 36, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -103.85198974609375, "logits_per_token": -2.354890611436632, "logits_per_char": -0.4709781222873264, "num_chars": 180}, {"sum_logits": -54.59654235839844, "num_tokens": 40, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -73.65737915039062, "logits_per_token": -1.364913558959961, "logits_per_char": -0.32692540334370324, "num_chars": 167}, {"sum_logits": -51.679222106933594, "num_tokens": 35, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -75.16691589355469, "logits_per_token": -1.4765492030552456, "logits_per_char": -0.3270836842210987, "num_chars": 158}, {"sum_logits": -63.79823303222656, "num_tokens": 32, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -75.98152160644531, "logits_per_token": -1.99369478225708, "logits_per_char": -0.4253215535481771, "num_chars": 150}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 41, "native_id": 296, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 71.72764587402344, "incorrect_loss_raw": 96.38549296061198, "correct_loss_per_char": 0.5517511221078726, "incorrect_loss_per_char": 0.5506239049574908, "correct_loss_per_token": 2.2414889335632324, "incorrect_loss_per_token": 2.2407873470369988, "correct_loss_uncond": -15.407707214355469, "incorrect_loss_uncond": -36.46465810139974}, "model_output": [{"sum_logits": -71.72764587402344, "num_tokens": 32, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -87.1353530883789, "logits_per_token": -2.2414889335632324, "logits_per_char": -0.5517511221078726, "num_chars": 130}, {"sum_logits": -117.55975341796875, "num_tokens": 51, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -153.15011596679688, "logits_per_token": -2.305093204273897, "logits_per_char": -0.54678955078125, "num_chars": 215}, {"sum_logits": -94.35000610351562, "num_tokens": 36, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -126.08598327636719, "logits_per_token": -2.620833502875434, "logits_per_char": -0.650689697265625, "num_chars": 145}, {"sum_logits": -77.24671936035156, "num_tokens": 43, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -119.3143539428711, "logits_per_token": -1.7964353339616643, "logits_per_char": -0.4543924668255974, "num_chars": 170}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 42, "native_id": 31143, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 49.15155792236328, "incorrect_loss_raw": 105.76847585042317, "correct_loss_per_char": 0.4200987856612246, "incorrect_loss_per_char": 0.5492815157076021, "correct_loss_per_token": 2.3405503772553944, "incorrect_loss_per_token": 2.962780057466947, "correct_loss_uncond": -10.125545501708984, "incorrect_loss_uncond": -11.007520039876303}, "model_output": [{"sum_logits": -117.18257141113281, "num_tokens": 39, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -122.8885498046875, "logits_per_token": -3.0046813182341747, "logits_per_char": -0.5660993787977431, "num_chars": 207}, {"sum_logits": -49.15155792236328, "num_tokens": 21, "num_tokens_all": 441, "is_greedy": false, "sum_logits_uncond": -59.277103424072266, "logits_per_token": -2.3405503772553944, "logits_per_char": -0.4200987856612246, "num_chars": 117}, {"sum_logits": -94.45236206054688, "num_tokens": 40, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -106.40034484863281, "logits_per_token": -2.361309051513672, "logits_per_char": -0.5105533084353885, "num_chars": 185}, {"sum_logits": -105.67049407958984, "num_tokens": 30, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -121.03909301757812, "logits_per_token": -3.522349802652995, "logits_per_char": -0.5711918598896748, "num_chars": 185}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 43, "native_id": 40282, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 107.70649719238281, "incorrect_loss_raw": 168.51780700683594, "correct_loss_per_char": 0.4642521430706156, "incorrect_loss_per_char": 0.7737011502887551, "correct_loss_per_token": 2.243885358174642, "incorrect_loss_per_token": 3.579845416798354, "correct_loss_uncond": -50.95018005371094, "incorrect_loss_uncond": -16.043930053710938}, "model_output": [{"sum_logits": -176.9749755859375, "num_tokens": 49, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -183.95091247558594, "logits_per_token": -3.6117341956313775, "logits_per_char": -0.7728164872748362, "num_chars": 229}, {"sum_logits": -107.70649719238281, "num_tokens": 48, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -158.65667724609375, "logits_per_token": -2.243885358174642, "logits_per_char": -0.4642521430706156, "num_chars": 232}, {"sum_logits": -144.63565063476562, "num_tokens": 45, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -171.23489379882812, "logits_per_token": -3.214125569661458, "logits_per_char": -0.688741193498884, "num_chars": 210}, {"sum_logits": -183.9427947998047, "num_tokens": 47, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -198.49940490722656, "logits_per_token": -3.9136764851022274, "logits_per_char": -0.8595457700925453, "num_chars": 214}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 44, "native_id": 4430, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 96.05430603027344, "incorrect_loss_raw": 97.63607533772786, "correct_loss_per_char": 0.6197052001953125, "incorrect_loss_per_char": 0.8344816097241834, "correct_loss_per_token": 2.6681751675075955, "incorrect_loss_per_token": 3.803962967612527, "correct_loss_uncond": -55.558135986328125, "incorrect_loss_uncond": -28.619061787923176}, "model_output": [{"sum_logits": -77.99337768554688, "num_tokens": 22, "num_tokens_all": 442, "is_greedy": false, "sum_logits_uncond": -103.88593292236328, "logits_per_token": -3.5451535311612217, "logits_per_char": -0.795850792709662, "num_chars": 98}, {"sum_logits": -119.16645050048828, "num_tokens": 24, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -158.8853759765625, "logits_per_token": -4.965268770853679, "logits_per_char": -0.9930537541707357, "num_chars": 120}, {"sum_logits": -95.74839782714844, "num_tokens": 33, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -115.99410247802734, "logits_per_token": -2.9014666008226797, "logits_per_char": -0.7145402822921525, "num_chars": 134}, {"sum_logits": -96.05430603027344, "num_tokens": 36, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -151.61244201660156, "logits_per_token": -2.6681751675075955, "logits_per_char": -0.6197052001953125, "num_chars": 155}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 45, "native_id": 37463, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 75.15345764160156, "incorrect_loss_raw": 65.60297775268555, "correct_loss_per_char": 0.46106415731043904, "incorrect_loss_per_char": 0.5033860797765678, "correct_loss_per_token": 2.0875960456000433, "incorrect_loss_per_token": 2.271335570017497, "correct_loss_uncond": -29.549346923828125, "incorrect_loss_uncond": -25.75625991821289}, "model_output": [{"sum_logits": -75.15345764160156, "num_tokens": 36, "num_tokens_all": 447, "is_greedy": false, "sum_logits_uncond": -104.70280456542969, "logits_per_token": -2.0875960456000433, "logits_per_char": -0.46106415731043904, "num_chars": 163}, {"sum_logits": -61.34653091430664, "num_tokens": 25, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -70.04130554199219, "logits_per_token": -2.4538612365722656, "logits_per_char": -0.6134653091430664, "num_chars": 100}, {"sum_logits": -60.93379211425781, "num_tokens": 30, "num_tokens_all": 441, "is_greedy": false, "sum_logits_uncond": -99.2520523071289, "logits_per_token": -2.0311264038085937, "logits_per_char": -0.3931212394468246, "num_chars": 155}, {"sum_logits": -74.52861022949219, "num_tokens": 32, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -104.78435516357422, "logits_per_token": -2.329019069671631, "logits_per_char": -0.5035716907398121, "num_chars": 148}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 46, "native_id": 16457, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.464608192443848, "incorrect_loss_raw": 60.782876332600914, "correct_loss_per_char": 0.3488202730814616, "incorrect_loss_per_char": 0.8048791942930565, "correct_loss_per_token": 1.7441013654073079, "incorrect_loss_per_token": 3.7307937339500143, "correct_loss_uncond": -23.87918758392334, "incorrect_loss_uncond": -11.946215311686197}, "model_output": [{"sum_logits": -10.464608192443848, "num_tokens": 6, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -34.34379577636719, "logits_per_token": -1.7441013654073079, "logits_per_char": -0.3488202730814616, "num_chars": 30}, {"sum_logits": -81.33341217041016, "num_tokens": 20, "num_tokens_all": 434, "is_greedy": false, "sum_logits_uncond": -95.92394256591797, "logits_per_token": -4.066670608520508, "logits_per_char": -0.8299327772490832, "num_chars": 98}, {"sum_logits": -54.495147705078125, "num_tokens": 12, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -63.47769546508789, "logits_per_token": -4.541262308756511, "logits_per_char": -0.9560552228961074, "num_chars": 57}, {"sum_logits": -46.52006912231445, "num_tokens": 18, "num_tokens_all": 432, "is_greedy": false, "sum_logits_uncond": -58.78563690185547, "logits_per_token": -2.5844482845730252, "logits_per_char": -0.6286495827339791, "num_chars": 74}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 47, "native_id": 1200, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 25.749752044677734, "incorrect_loss_raw": 72.62773895263672, "correct_loss_per_char": 0.4598170007978167, "incorrect_loss_per_char": 0.848632408549204, "correct_loss_per_token": 2.145812670389811, "incorrect_loss_per_token": 3.9064206512380033, "correct_loss_uncond": -26.305286407470703, "incorrect_loss_uncond": -14.776799519856771}, "model_output": [{"sum_logits": -43.996116638183594, "num_tokens": 10, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -50.57691955566406, "logits_per_token": -4.3996116638183596, "logits_per_char": -0.9776914808485243, "num_chars": 45}, {"sum_logits": -25.749752044677734, "num_tokens": 12, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -52.05503845214844, "logits_per_token": -2.145812670389811, "logits_per_char": -0.4598170007978167, "num_chars": 56}, {"sum_logits": -83.55615997314453, "num_tokens": 26, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -102.29544067382812, "logits_per_token": -3.213698460505559, "logits_per_char": -0.6738399997834237, "num_chars": 124}, {"sum_logits": -90.33094024658203, "num_tokens": 22, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -109.34125518798828, "logits_per_token": -4.105951829390093, "logits_per_char": -0.8943657450156637, "num_chars": 101}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 48, "native_id": 16649, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 107.75828552246094, "incorrect_loss_raw": 108.64518737792969, "correct_loss_per_char": 0.7136310299500724, "incorrect_loss_per_char": 0.6787027926687331, "correct_loss_per_token": 2.912386095201647, "incorrect_loss_per_token": 2.9029885520292, "correct_loss_uncond": -18.254226684570312, "incorrect_loss_uncond": -24.929039001464844}, "model_output": [{"sum_logits": -94.82576751708984, "num_tokens": 43, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -121.9271011352539, "logits_per_token": -2.2052504073741823, "logits_per_char": -0.5327290309948868, "num_chars": 178}, {"sum_logits": -107.75828552246094, "num_tokens": 37, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -126.01251220703125, "logits_per_token": -2.912386095201647, "logits_per_char": -0.7136310299500724, "num_chars": 151}, {"sum_logits": -105.8384017944336, "num_tokens": 35, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -130.02944946289062, "logits_per_token": -3.023954336983817, "logits_per_char": -0.6453561085026439, "num_chars": 164}, {"sum_logits": -125.27139282226562, "num_tokens": 36, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -148.76612854003906, "logits_per_token": -3.479760911729601, "logits_per_char": -0.8580232385086687, "num_chars": 146}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 49, "native_id": 36242, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 29.13253402709961, "incorrect_loss_raw": 31.07917022705078, "correct_loss_per_char": 0.51109708819473, "incorrect_loss_per_char": 0.6105674791559926, "correct_loss_per_token": 1.8207833766937256, "incorrect_loss_per_token": 2.6750517161205565, "correct_loss_uncond": -27.638511657714844, "incorrect_loss_uncond": -22.84820810953776}, "model_output": [{"sum_logits": -28.983156204223633, "num_tokens": 15, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -55.68488311767578, "logits_per_token": -1.9322104136149088, "logits_per_char": -0.40821346766512157, "num_chars": 71}, {"sum_logits": -29.13253402709961, "num_tokens": 16, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -56.77104568481445, "logits_per_token": -1.8207833766937256, "logits_per_char": -0.51109708819473, "num_chars": 57}, {"sum_logits": -36.573978424072266, "num_tokens": 11, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -54.472259521484375, "logits_per_token": -3.324907129461115, "logits_per_char": -0.7314795684814454, "num_chars": 50}, {"sum_logits": -27.680376052856445, "num_tokens": 10, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -51.62499237060547, "logits_per_token": -2.7680376052856444, "logits_per_char": -0.6920094013214111, "num_chars": 40}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 50, "native_id": 33325, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 133.6472625732422, "incorrect_loss_raw": 111.40464782714844, "correct_loss_per_char": 0.6130608374919366, "incorrect_loss_per_char": 0.8597736160257433, "correct_loss_per_token": 3.0374377857555044, "incorrect_loss_per_token": 4.258574417342743, "correct_loss_uncond": -17.224594116210938, "incorrect_loss_uncond": -11.836659749348959}, "model_output": [{"sum_logits": -138.54623413085938, "num_tokens": 27, "num_tokens_all": 431, "is_greedy": false, "sum_logits_uncond": -154.06137084960938, "logits_per_token": -5.131342004846643, "logits_per_char": -1.041701008502702, "num_chars": 133}, {"sum_logits": -95.9268798828125, "num_tokens": 29, "num_tokens_all": 433, "is_greedy": false, "sum_logits_uncond": -105.38856506347656, "logits_per_token": -3.3078234442349137, "logits_per_char": -0.7267187869910038, "num_chars": 132}, {"sum_logits": -99.74082946777344, "num_tokens": 23, "num_tokens_all": 427, "is_greedy": false, "sum_logits_uncond": -110.27398681640625, "logits_per_token": -4.336557802946671, "logits_per_char": -0.8109010525835239, "num_chars": 123}, {"sum_logits": -133.6472625732422, "num_tokens": 44, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -150.87185668945312, "logits_per_token": -3.0374377857555044, "logits_per_char": -0.6130608374919366, "num_chars": 218}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 51, "native_id": 21837, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 82.44768524169922, "incorrect_loss_raw": 149.9601084391276, "correct_loss_per_char": 0.36003356000742015, "incorrect_loss_per_char": 0.7970137059432375, "correct_loss_per_token": 1.5855324084942157, "incorrect_loss_per_token": 3.401167874055328, "correct_loss_uncond": -40.384788513183594, "incorrect_loss_uncond": -20.56988525390625}, "model_output": [{"sum_logits": -128.7489013671875, "num_tokens": 44, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -149.60366821289062, "logits_per_token": -2.9261113947088067, "logits_per_char": -0.7442133027005058, "num_chars": 173}, {"sum_logits": -178.0309295654297, "num_tokens": 46, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -201.20962524414062, "logits_per_token": -3.8702375992484717, "logits_per_char": -0.9129791259765625, "num_chars": 195}, {"sum_logits": -82.44768524169922, "num_tokens": 52, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -122.83247375488281, "logits_per_token": -1.5855324084942157, "logits_per_char": -0.36003356000742015, "num_chars": 229}, {"sum_logits": -143.10049438476562, "num_tokens": 42, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -160.7766876220703, "logits_per_token": -3.4071546282087053, "logits_per_char": -0.7338486891526442, "num_chars": 195}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 52, "native_id": 41876, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 24.510379791259766, "incorrect_loss_raw": 31.502920786539715, "correct_loss_per_char": 0.9427069150484525, "incorrect_loss_per_char": 0.8478051845843976, "correct_loss_per_token": 4.085063298543294, "incorrect_loss_per_token": 3.2817660030136757, "correct_loss_uncond": -14.599845886230469, "incorrect_loss_uncond": -15.4775021870931}, "model_output": [{"sum_logits": -24.510379791259766, "num_tokens": 6, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -39.110225677490234, "logits_per_token": -4.085063298543294, "logits_per_char": -0.9427069150484525, "num_chars": 26}, {"sum_logits": -35.182151794433594, "num_tokens": 8, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -45.445369720458984, "logits_per_token": -4.397768974304199, "logits_per_char": -0.8795537948608398, "num_chars": 40}, {"sum_logits": -33.47126007080078, "num_tokens": 13, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -52.77619552612305, "logits_per_token": -2.5747123131385217, "logits_per_char": -0.6694252014160156, "num_chars": 50}, {"sum_logits": -25.855350494384766, "num_tokens": 9, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -42.719703674316406, "logits_per_token": -2.872816721598307, "logits_per_char": -0.9944365574763372, "num_chars": 26}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 53, "native_id": 12293, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.17245101928711, "incorrect_loss_raw": 45.99030685424805, "correct_loss_per_char": 0.36689804077148436, "incorrect_loss_per_char": 0.8117889781221767, "correct_loss_per_token": 1.3103501456124442, "incorrect_loss_per_token": 3.8004369944434377, "correct_loss_uncond": -25.70742416381836, "incorrect_loss_uncond": -13.528739929199219}, "model_output": [{"sum_logits": -68.51251220703125, "num_tokens": 18, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -84.23796081542969, "logits_per_token": -3.8062506781684027, "logits_per_char": -0.7785512750799005, "num_chars": 88}, {"sum_logits": -37.56601333618164, "num_tokens": 8, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -45.76849365234375, "logits_per_token": -4.695751667022705, "logits_per_char": -1.04350037044949, "num_chars": 36}, {"sum_logits": -9.17245101928711, "num_tokens": 7, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -34.87987518310547, "logits_per_token": -1.3103501456124442, "logits_per_char": -0.36689804077148436, "num_chars": 25}, {"sum_logits": -31.89239501953125, "num_tokens": 11, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -48.55068588256836, "logits_per_token": -2.8993086381392046, "logits_per_char": -0.6133152888371394, "num_chars": 52}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 54, "native_id": 18208, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 42.35536575317383, "incorrect_loss_raw": 62.373799641927086, "correct_loss_per_char": 0.7563458170209613, "incorrect_loss_per_char": 0.7972995098366534, "correct_loss_per_token": 3.529613812764486, "incorrect_loss_per_token": 3.809962765114586, "correct_loss_uncond": -22.588191986083984, "incorrect_loss_uncond": -21.40570831298828}, "model_output": [{"sum_logits": -72.12197875976562, "num_tokens": 19, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -93.34429931640625, "logits_per_token": -3.795893618935033, "logits_per_char": -1.0452460689821106, "num_chars": 69}, {"sum_logits": -66.96932983398438, "num_tokens": 17, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -92.75306701660156, "logits_per_token": -3.9393723431755516, "logits_per_char": -0.6975971857706705, "num_chars": 96}, {"sum_logits": -48.03009033203125, "num_tokens": 13, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -65.24115753173828, "logits_per_token": -3.694622333233173, "logits_per_char": -0.6490552747571791, "num_chars": 74}, {"sum_logits": -42.35536575317383, "num_tokens": 12, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -64.94355773925781, "logits_per_token": -3.529613812764486, "logits_per_char": -0.7563458170209613, "num_chars": 56}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 55, "native_id": 47214, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 100.80012512207031, "incorrect_loss_raw": 72.50318145751953, "correct_loss_per_char": 0.5760007149832589, "incorrect_loss_per_char": 0.5206336081362467, "correct_loss_per_token": 2.4585396371236663, "incorrect_loss_per_token": 2.4342178376782844, "correct_loss_uncond": -20.972747802734375, "incorrect_loss_uncond": -28.33393096923828}, "model_output": [{"sum_logits": -100.80012512207031, "num_tokens": 41, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -121.77287292480469, "logits_per_token": -2.4585396371236663, "logits_per_char": -0.5760007149832589, "num_chars": 175}, {"sum_logits": -73.86639404296875, "num_tokens": 34, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -103.81556701660156, "logits_per_token": -2.1725410012637867, "logits_per_char": -0.4559653953269676, "num_chars": 162}, {"sum_logits": -74.89044189453125, "num_tokens": 28, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -101.12804412841797, "logits_per_token": -2.674658639090402, "logits_per_char": -0.5850815773010254, "num_chars": 128}, {"sum_logits": -68.7527084350586, "num_tokens": 28, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -97.5677261352539, "logits_per_token": -2.455453872680664, "logits_per_char": -0.520853851780747, "num_chars": 132}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 56, "native_id": 36066, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 71.12550354003906, "incorrect_loss_raw": 88.14259084065755, "correct_loss_per_char": 0.49392710791693795, "incorrect_loss_per_char": 0.7282335862903205, "correct_loss_per_token": 2.370850118001302, "incorrect_loss_per_token": 2.8334717042867807, "correct_loss_uncond": -13.080650329589844, "incorrect_loss_uncond": -12.892974853515625}, "model_output": [{"sum_logits": -92.48347473144531, "num_tokens": 38, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -103.34171295166016, "logits_per_token": -2.433775650827508, "logits_per_char": -0.581657073782675, "num_chars": 159}, {"sum_logits": -103.010009765625, "num_tokens": 37, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -115.19241333007812, "logits_per_token": -2.784054317989865, "logits_per_char": -0.6960135794974662, "num_chars": 148}, {"sum_logits": -68.93428802490234, "num_tokens": 21, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -84.57257080078125, "logits_per_token": -3.2825851440429688, "logits_per_char": -0.9070301055908203, "num_chars": 76}, {"sum_logits": -71.12550354003906, "num_tokens": 30, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -84.2061538696289, "logits_per_token": -2.370850118001302, "logits_per_char": -0.49392710791693795, "num_chars": 144}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 57, "native_id": 40788, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 39.884307861328125, "incorrect_loss_raw": 54.10745366414388, "correct_loss_per_char": 0.6330842517671131, "incorrect_loss_per_char": 0.7935929611874529, "correct_loss_per_token": 3.3236923217773438, "incorrect_loss_per_token": 3.6063193103723363, "correct_loss_uncond": -26.29058837890625, "incorrect_loss_uncond": -16.52832285563151}, "model_output": [{"sum_logits": -61.397239685058594, "num_tokens": 14, "num_tokens_all": 435, "is_greedy": false, "sum_logits_uncond": -86.73886108398438, "logits_per_token": -4.385517120361328, "logits_per_char": -0.8898150678993999, "num_chars": 69}, {"sum_logits": -64.39325714111328, "num_tokens": 19, "num_tokens_all": 440, "is_greedy": false, "sum_logits_uncond": -79.96367645263672, "logits_per_token": -3.389118796900699, "logits_per_char": -0.8820994128919628, "num_chars": 73}, {"sum_logits": -36.531864166259766, "num_tokens": 12, "num_tokens_all": 433, "is_greedy": false, "sum_logits_uncond": -45.20479202270508, "logits_per_token": -3.0443220138549805, "logits_per_char": -0.6088644027709961, "num_chars": 60}, {"sum_logits": -39.884307861328125, "num_tokens": 12, "num_tokens_all": 433, "is_greedy": false, "sum_logits_uncond": -66.17489624023438, "logits_per_token": -3.3236923217773438, "logits_per_char": -0.6330842517671131, "num_chars": 63}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 58, "native_id": 18133, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 24.787403106689453, "incorrect_loss_raw": 44.983175913492836, "correct_loss_per_char": 0.4348667211699904, "incorrect_loss_per_char": 0.7343893139963069, "correct_loss_per_token": 1.9067233158991888, "incorrect_loss_per_token": 3.3978561456413323, "correct_loss_uncond": -31.746963500976562, "incorrect_loss_uncond": -31.219071706136067}, "model_output": [{"sum_logits": -24.787403106689453, "num_tokens": 13, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -56.534366607666016, "logits_per_token": -1.9067233158991888, "logits_per_char": -0.4348667211699904, "num_chars": 57}, {"sum_logits": -40.424442291259766, "num_tokens": 11, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -75.36644744873047, "logits_per_token": -3.674949299205433, "logits_per_char": -0.6851600388349113, "num_chars": 59}, {"sum_logits": -48.96626281738281, "num_tokens": 15, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -82.79390716552734, "logits_per_token": -3.264417521158854, "logits_per_char": -0.689665673484265, "num_chars": 71}, {"sum_logits": -45.55882263183594, "num_tokens": 14, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -70.4463882446289, "logits_per_token": -3.2542016165597096, "logits_per_char": -0.8283422296697444, "num_chars": 55}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 59, "native_id": 43540, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 74.96666717529297, "incorrect_loss_raw": 75.53369267781575, "correct_loss_per_char": 0.4119047646994119, "incorrect_loss_per_char": 0.621143575216811, "correct_loss_per_token": 1.6659259372287327, "incorrect_loss_per_token": 2.771488578992012, "correct_loss_uncond": -40.234169006347656, "incorrect_loss_uncond": -11.56545639038086}, "model_output": [{"sum_logits": -74.96666717529297, "num_tokens": 45, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -115.20083618164062, "logits_per_token": -1.6659259372287327, "logits_per_char": -0.4119047646994119, "num_chars": 182}, {"sum_logits": -61.55563735961914, "num_tokens": 26, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -77.77116394042969, "logits_per_token": -2.3675245138315053, "logits_per_char": -0.5647306179781573, "num_chars": 109}, {"sum_logits": -66.81398010253906, "num_tokens": 25, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -74.8514404296875, "logits_per_token": -2.6725592041015624, "logits_per_char": -0.5710596589960604, "num_chars": 117}, {"sum_logits": -98.23146057128906, "num_tokens": 30, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -108.67484283447266, "logits_per_token": -3.2743820190429687, "logits_per_char": -0.7276404486762152, "num_chars": 135}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 60, "native_id": 2195, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 107.13554382324219, "incorrect_loss_raw": 142.07268778483072, "correct_loss_per_char": 0.49830485499182414, "incorrect_loss_per_char": 0.6883388937018928, "correct_loss_per_token": 2.142710876464844, "incorrect_loss_per_token": 3.1717291442518794, "correct_loss_uncond": -16.790542602539062, "incorrect_loss_uncond": -15.352640787760416}, "model_output": [{"sum_logits": -120.02671813964844, "num_tokens": 33, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -131.19384765625, "logits_per_token": -3.6371732769590435, "logits_per_char": -0.794878928077142, "num_chars": 151}, {"sum_logits": -145.34921264648438, "num_tokens": 56, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -162.13516235351562, "logits_per_token": -2.5955216544015065, "logits_per_char": -0.5677703619003296, "num_chars": 256}, {"sum_logits": -107.13554382324219, "num_tokens": 50, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -123.92608642578125, "logits_per_token": -2.142710876464844, "logits_per_char": -0.49830485499182414, "num_chars": 215}, {"sum_logits": -160.84213256835938, "num_tokens": 49, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -178.9469757080078, "logits_per_token": -3.2824925013950894, "logits_per_char": -0.7023673911282069, "num_chars": 229}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 61, "native_id": 36438, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 27.015335083007812, "incorrect_loss_raw": 35.01867802937826, "correct_loss_per_char": 0.5403067016601563, "incorrect_loss_per_char": 0.732418808541874, "correct_loss_per_token": 3.001703898111979, "incorrect_loss_per_token": 2.654120763142904, "correct_loss_uncond": -25.200111389160156, "incorrect_loss_uncond": -23.356908162434895}, "model_output": [{"sum_logits": -27.015335083007812, "num_tokens": 9, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -52.21544647216797, "logits_per_token": -3.001703898111979, "logits_per_char": -0.5403067016601563, "num_chars": 50}, {"sum_logits": -38.90875244140625, "num_tokens": 12, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -59.6852912902832, "logits_per_token": -3.242396036783854, "logits_per_char": -0.9263988676525298, "num_chars": 42}, {"sum_logits": -28.116535186767578, "num_tokens": 12, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -48.959434509277344, "logits_per_token": -2.3430445988972983, "logits_per_char": -0.6857691508967702, "num_chars": 41}, {"sum_logits": -38.03074645996094, "num_tokens": 16, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -66.4820327758789, "logits_per_token": -2.3769216537475586, "logits_per_char": -0.5850884070763221, "num_chars": 65}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 62, "native_id": 6302, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.611673831939697, "incorrect_loss_raw": 12.637253761291504, "correct_loss_per_char": 0.27184549399784635, "incorrect_loss_per_char": 0.5427070681590462, "correct_loss_per_token": 1.268612305323283, "incorrect_loss_per_token": 2.0887082266429116, "correct_loss_uncond": -17.354098796844482, "incorrect_loss_uncond": -19.270678520202637}, "model_output": [{"sum_logits": -10.780303955078125, "num_tokens": 6, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -27.909698486328125, "logits_per_token": -1.7967173258463542, "logits_per_char": -0.5133478073846727, "num_chars": 21}, {"sum_logits": -7.611673831939697, "num_tokens": 6, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -24.96577262878418, "logits_per_token": -1.268612305323283, "logits_per_char": -0.27184549399784635, "num_chars": 28}, {"sum_logits": -10.385985374450684, "num_tokens": 5, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -25.785629272460938, "logits_per_token": -2.0771970748901367, "logits_per_char": -0.4945707321166992, "num_chars": 21}, {"sum_logits": -16.745471954345703, "num_tokens": 7, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -42.02846908569336, "logits_per_token": -2.3922102791922435, "logits_per_char": -0.6202026649757668, "num_chars": 27}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 63, "native_id": 18099, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 72.1075668334961, "incorrect_loss_raw": 88.76924133300781, "correct_loss_per_char": 0.5077997664330711, "incorrect_loss_per_char": 0.676427914989007, "correct_loss_per_token": 2.4035855611165364, "incorrect_loss_per_token": 2.8749362958683027, "correct_loss_uncond": -22.45610809326172, "incorrect_loss_uncond": -13.621976216634115}, "model_output": [{"sum_logits": -79.089599609375, "num_tokens": 22, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -94.61778259277344, "logits_per_token": -3.5949818004261362, "logits_per_char": -0.7323111074942129, "num_chars": 108}, {"sum_logits": -78.26990509033203, "num_tokens": 35, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -97.83161926269531, "logits_per_token": -2.2362830025809153, "logits_per_char": -0.5017301608354617, "num_chars": 156}, {"sum_logits": -72.1075668334961, "num_tokens": 30, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -94.56367492675781, "logits_per_token": -2.4035855611165364, "logits_per_char": -0.5077997664330711, "num_chars": 142}, {"sum_logits": -108.9482192993164, "num_tokens": 39, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -114.72425079345703, "logits_per_token": -2.7935440845978565, "logits_per_char": -0.795242476637346, "num_chars": 137}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 64, "native_id": 27897, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 38.468170166015625, "incorrect_loss_raw": 41.71207618713379, "correct_loss_per_char": 0.4525667078354779, "incorrect_loss_per_char": 0.6535905966247441, "correct_loss_per_token": 2.4042606353759766, "incorrect_loss_per_token": 2.8195922896516845, "correct_loss_uncond": -42.670318603515625, "incorrect_loss_uncond": -20.188325881958008}, "model_output": [{"sum_logits": -38.468170166015625, "num_tokens": 16, "num_tokens_all": 427, "is_greedy": false, "sum_logits_uncond": -81.13848876953125, "logits_per_token": -2.4042606353759766, "logits_per_char": -0.4525667078354779, "num_chars": 85}, {"sum_logits": -71.3423843383789, "num_tokens": 22, "num_tokens_all": 433, "is_greedy": false, "sum_logits_uncond": -96.79393005371094, "logits_per_token": -3.2428356517444956, "logits_per_char": -0.6994351405723422, "num_chars": 102}, {"sum_logits": -26.392351150512695, "num_tokens": 9, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -43.464351654052734, "logits_per_token": -2.9324834611680775, "logits_per_char": -0.7133067878516944, "num_chars": 37}, {"sum_logits": -27.401493072509766, "num_tokens": 12, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -45.44292449951172, "logits_per_token": -2.2834577560424805, "logits_per_char": -0.5480298614501953, "num_chars": 50}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 65, "native_id": 40212, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 100.35111999511719, "incorrect_loss_raw": 96.31593068440755, "correct_loss_per_char": 0.4079313820939723, "incorrect_loss_per_char": 0.6664921611718472, "correct_loss_per_token": 1.8245658180930397, "incorrect_loss_per_token": 3.0537587857190824, "correct_loss_uncond": -29.961196899414062, "incorrect_loss_uncond": -14.916422526041666}, "model_output": [{"sum_logits": -100.35111999511719, "num_tokens": 55, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -130.31231689453125, "logits_per_token": -1.8245658180930397, "logits_per_char": -0.4079313820939723, "num_chars": 246}, {"sum_logits": -129.3678436279297, "num_tokens": 39, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -147.97984313964844, "logits_per_token": -3.3171241955879407, "logits_per_char": -0.7609873154584099, "num_chars": 170}, {"sum_logits": -66.55414581298828, "num_tokens": 22, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -80.17118072509766, "logits_per_token": -3.025188446044922, "logits_per_char": -0.5787317027216372, "num_chars": 115}, {"sum_logits": -93.02580261230469, "num_tokens": 33, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -105.54603576660156, "logits_per_token": -2.8189637155243843, "logits_per_char": -0.6597574653354943, "num_chars": 141}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 66, "native_id": 35710, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 26.492141723632812, "incorrect_loss_raw": 38.2104237874349, "correct_loss_per_char": 0.3895903194651884, "incorrect_loss_per_char": 0.5790642092288684, "correct_loss_per_token": 1.7661427815755208, "incorrect_loss_per_token": 2.6408009635077585, "correct_loss_uncond": -26.64902114868164, "incorrect_loss_uncond": -24.916097005208332}, "model_output": [{"sum_logits": -28.699966430664062, "num_tokens": 12, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -52.231689453125, "logits_per_token": -2.3916638692220054, "logits_per_char": -0.5314808598271122, "num_chars": 54}, {"sum_logits": -24.683475494384766, "num_tokens": 10, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -48.34716033935547, "logits_per_token": -2.4683475494384766, "logits_per_char": -0.525180329667761, "num_chars": 47}, {"sum_logits": -26.492141723632812, "num_tokens": 15, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -53.14116287231445, "logits_per_token": -1.7661427815755208, "logits_per_char": -0.3895903194651884, "num_chars": 68}, {"sum_logits": -61.24782943725586, "num_tokens": 20, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -88.80071258544922, "logits_per_token": -3.062391471862793, "logits_per_char": -0.6805314381917318, "num_chars": 90}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 67, "native_id": 13274, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 78.10247039794922, "incorrect_loss_raw": 127.20269521077473, "correct_loss_per_char": 0.4244699478149414, "incorrect_loss_per_char": 0.6533661611099985, "correct_loss_per_token": 2.1108775783229516, "incorrect_loss_per_token": 2.912847062718799, "correct_loss_uncond": -31.928016662597656, "incorrect_loss_uncond": -26.831703186035156}, "model_output": [{"sum_logits": -140.69613647460938, "num_tokens": 45, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -170.03857421875, "logits_per_token": -3.126580810546875, "logits_per_char": -0.7366289867780595, "num_chars": 191}, {"sum_logits": -78.10247039794922, "num_tokens": 37, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -110.03048706054688, "logits_per_token": -2.1108775783229516, "logits_per_char": -0.4244699478149414, "num_chars": 184}, {"sum_logits": -132.7557830810547, "num_tokens": 48, "num_tokens_all": 494, "is_greedy": false, "sum_logits_uncond": -146.34767150878906, "logits_per_token": -2.765745480855306, "logits_per_char": -0.6572068469359142, "num_chars": 202}, {"sum_logits": -108.15616607666016, "num_tokens": 38, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -145.71694946289062, "logits_per_token": -2.8462148967542147, "logits_per_char": -0.5662626496160218, "num_chars": 191}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 68, "native_id": 31218, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 77.62388610839844, "incorrect_loss_raw": 123.77731831868489, "correct_loss_per_char": 0.42886124921767094, "incorrect_loss_per_char": 0.7012130616598656, "correct_loss_per_token": 1.9903560540614984, "incorrect_loss_per_token": 3.3606811912816483, "correct_loss_uncond": -31.756149291992188, "incorrect_loss_uncond": -20.035125732421875}, "model_output": [{"sum_logits": -126.52630615234375, "num_tokens": 40, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -140.95431518554688, "logits_per_token": -3.163157653808594, "logits_per_char": -0.6589911778767904, "num_chars": 192}, {"sum_logits": -119.539306640625, "num_tokens": 33, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -145.046142578125, "logits_per_token": -3.622403231534091, "logits_per_char": -0.6568093771462912, "num_chars": 182}, {"sum_logits": -77.62388610839844, "num_tokens": 39, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -109.38003540039062, "logits_per_token": -1.9903560540614984, "logits_per_char": -0.42886124921767094, "num_chars": 181}, {"sum_logits": -125.26634216308594, "num_tokens": 38, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -145.43687438964844, "logits_per_token": -3.2964826885022616, "logits_per_char": -0.7878386299565153, "num_chars": 159}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 69, "native_id": 8955, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 33.13738250732422, "incorrect_loss_raw": 20.037034034729004, "correct_loss_per_char": 0.5713341811607624, "incorrect_loss_per_char": 0.4573438824896993, "correct_loss_per_token": 2.366955893380301, "incorrect_loss_per_token": 1.7427701552708943, "correct_loss_uncond": -27.366928100585938, "incorrect_loss_uncond": -28.57181453704834}, "model_output": [{"sum_logits": -36.569236755371094, "num_tokens": 16, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -64.88776397705078, "logits_per_token": -2.2855772972106934, "logits_per_char": -0.5224176679338728, "num_chars": 70}, {"sum_logits": -12.65260124206543, "num_tokens": 8, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -40.86672592163086, "logits_per_token": -1.5815751552581787, "logits_per_char": -0.48663850931020886, "num_chars": 26}, {"sum_logits": -33.13738250732422, "num_tokens": 14, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -60.504310607910156, "logits_per_token": -2.366955893380301, "logits_per_char": -0.5713341811607624, "num_chars": 58}, {"sum_logits": -10.889264106750488, "num_tokens": 8, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -40.07205581665039, "logits_per_token": -1.361158013343811, "logits_per_char": -0.36297547022501625, "num_chars": 30}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 70, "native_id": 26410, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 38.78484344482422, "incorrect_loss_raw": 74.74362055460612, "correct_loss_per_char": 0.49724158262595153, "incorrect_loss_per_char": 0.4998270377003409, "correct_loss_per_token": 2.0413075497275903, "incorrect_loss_per_token": 2.333952920593088, "correct_loss_uncond": -28.74498748779297, "incorrect_loss_uncond": -19.23080317179362}, "model_output": [{"sum_logits": -114.98655700683594, "num_tokens": 40, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -125.78684997558594, "logits_per_token": -2.8746639251708985, "logits_per_char": -0.592714211375443, "num_chars": 194}, {"sum_logits": -52.305450439453125, "num_tokens": 27, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -70.37614440917969, "logits_per_token": -1.9372389051649306, "logits_per_char": -0.43227644991283576, "num_chars": 121}, {"sum_logits": -38.78484344482422, "num_tokens": 19, "num_tokens_all": 437, "is_greedy": false, "sum_logits_uncond": -67.52983093261719, "logits_per_token": -2.0413075497275903, "logits_per_char": -0.49724158262595153, "num_chars": 78}, {"sum_logits": -56.9388542175293, "num_tokens": 26, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -85.7602767944336, "logits_per_token": -2.1899559314434347, "logits_per_char": -0.47449045181274413, "num_chars": 120}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 71, "native_id": 9926, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 63.63478469848633, "incorrect_loss_raw": 130.76007588704428, "correct_loss_per_char": 0.4644874795509951, "incorrect_loss_per_char": 0.7475758729421068, "correct_loss_per_token": 1.9885870218276978, "incorrect_loss_per_token": 3.3770079953933387, "correct_loss_uncond": -35.01809310913086, "incorrect_loss_uncond": -22.047398885091145}, "model_output": [{"sum_logits": -63.63478469848633, "num_tokens": 32, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -98.65287780761719, "logits_per_token": -1.9885870218276978, "logits_per_char": -0.4644874795509951, "num_chars": 137}, {"sum_logits": -135.20909118652344, "num_tokens": 44, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -161.72161865234375, "logits_per_token": -3.072933890602805, "logits_per_char": -0.6469334506532222, "num_chars": 209}, {"sum_logits": -141.75177001953125, "num_tokens": 46, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -168.83346557617188, "logits_per_token": -3.081560217815897, "logits_per_char": -0.6948616177428002, "num_chars": 204}, {"sum_logits": -115.31936645507812, "num_tokens": 29, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -127.86734008789062, "logits_per_token": -3.9765298777613145, "logits_per_char": -0.9009325504302979, "num_chars": 128}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 72, "native_id": 9059, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 36.19591522216797, "incorrect_loss_raw": 53.992493311564125, "correct_loss_per_char": 0.8043536716037326, "incorrect_loss_per_char": 0.8405062143007914, "correct_loss_per_token": 3.2905377474698154, "incorrect_loss_per_token": 3.9019766726525003, "correct_loss_uncond": -12.241706848144531, "incorrect_loss_uncond": -18.164948145548504}, "model_output": [{"sum_logits": -36.19591522216797, "num_tokens": 11, "num_tokens_all": 395, "is_greedy": false, "sum_logits_uncond": -48.4376220703125, "logits_per_token": -3.2905377474698154, "logits_per_char": -0.8043536716037326, "num_chars": 45}, {"sum_logits": -20.194543838500977, "num_tokens": 6, "num_tokens_all": 390, "is_greedy": false, "sum_logits_uncond": -37.54422378540039, "logits_per_token": -3.3657573064168296, "logits_per_char": -0.6310794949531555, "num_chars": 32}, {"sum_logits": -88.16232299804688, "num_tokens": 17, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -105.39436340332031, "logits_per_token": -5.1860189998851105, "logits_per_char": -1.1754976399739583, "num_chars": 75}, {"sum_logits": -53.62061309814453, "num_tokens": 17, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -73.53373718261719, "logits_per_token": -3.1541537116555607, "logits_per_char": -0.7149415079752605, "num_chars": 75}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 73, "native_id": 44435, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 16.827068328857422, "incorrect_loss_raw": 41.58229637145996, "correct_loss_per_char": 0.5609022776285807, "incorrect_loss_per_char": 0.7964777559062819, "correct_loss_per_token": 2.4038669041224887, "incorrect_loss_per_token": 3.5783393683256928, "correct_loss_uncond": -19.61963653564453, "incorrect_loss_uncond": -14.682353337605795}, "model_output": [{"sum_logits": -55.217369079589844, "num_tokens": 16, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -75.25282287597656, "logits_per_token": -3.4510855674743652, "logits_per_char": -0.756402316158765, "num_chars": 73}, {"sum_logits": -16.827068328857422, "num_tokens": 7, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -36.44670486450195, "logits_per_token": -2.4038669041224887, "logits_per_char": -0.5609022776285807, "num_chars": 30}, {"sum_logits": -39.74127197265625, "num_tokens": 10, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -51.610008239746094, "logits_per_token": -3.974127197265625, "logits_per_char": -0.8279431660970052, "num_chars": 48}, {"sum_logits": -29.78824806213379, "num_tokens": 9, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -41.93111801147461, "logits_per_token": -3.3098053402370877, "logits_per_char": -0.8050877854630754, "num_chars": 37}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 74, "native_id": 9197, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 86.82876586914062, "incorrect_loss_raw": 86.60644785563152, "correct_loss_per_char": 0.47447413043246245, "incorrect_loss_per_char": 0.5724993996875675, "correct_loss_per_token": 2.411910163031684, "incorrect_loss_per_token": 2.4993626518122896, "correct_loss_uncond": -20.353683471679688, "incorrect_loss_uncond": -15.257553100585938}, "model_output": [{"sum_logits": -101.53173828125, "num_tokens": 37, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -119.83800506591797, "logits_per_token": -2.7441010346283785, "logits_per_char": -0.6267391251929012, "num_chars": 162}, {"sum_logits": -83.86461639404297, "num_tokens": 30, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -90.62751770019531, "logits_per_token": -2.7954872131347654, "logits_per_char": -0.6451124338003306, "num_chars": 130}, {"sum_logits": -86.82876586914062, "num_tokens": 36, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -107.18244934082031, "logits_per_token": -2.411910163031684, "logits_per_char": -0.47447413043246245, "num_chars": 183}, {"sum_logits": -74.42298889160156, "num_tokens": 38, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -95.12648010253906, "logits_per_token": -1.9584997076737254, "logits_per_char": -0.4456466400694704, "num_chars": 167}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 75, "native_id": 38889, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.612127780914307, "incorrect_loss_raw": 26.73736826578776, "correct_loss_per_char": 0.28193065855238175, "incorrect_loss_per_char": 0.6997446660202732, "correct_loss_per_token": 1.2686879634857178, "incorrect_loss_per_token": 3.1046690582598324, "correct_loss_uncond": -25.652276515960693, "incorrect_loss_uncond": -18.026508967081707}, "model_output": [{"sum_logits": -7.612127780914307, "num_tokens": 6, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -33.264404296875, "logits_per_token": -1.2686879634857178, "logits_per_char": -0.28193065855238175, "num_chars": 27}, {"sum_logits": -20.055606842041016, "num_tokens": 7, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -30.471643447875977, "logits_per_token": -2.865086691720145, "logits_per_char": -0.7162716729300362, "num_chars": 28}, {"sum_logits": -38.994361877441406, "num_tokens": 9, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -52.681846618652344, "logits_per_token": -4.332706875271267, "logits_per_char": -0.951081997010766, "num_chars": 41}, {"sum_logits": -21.16213607788086, "num_tokens": 10, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -51.13814163208008, "logits_per_token": -2.1162136077880858, "logits_per_char": -0.43188032812001753, "num_chars": 49}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 76, "native_id": 12658, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 109.59986114501953, "incorrect_loss_raw": 113.17984008789062, "correct_loss_per_char": 0.6192082550566075, "incorrect_loss_per_char": 0.6763752470060158, "correct_loss_per_token": 2.5488339801167332, "incorrect_loss_per_token": 3.198499730024389, "correct_loss_uncond": -28.941627502441406, "incorrect_loss_uncond": -17.55456797281901}, "model_output": [{"sum_logits": -109.59986114501953, "num_tokens": 43, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -138.54148864746094, "logits_per_token": -2.5488339801167332, "logits_per_char": -0.6192082550566075, "num_chars": 177}, {"sum_logits": -106.73191833496094, "num_tokens": 36, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -120.80133056640625, "logits_per_token": -2.9647755093044705, "logits_per_char": -0.6241632651167306, "num_chars": 171}, {"sum_logits": -141.4420166015625, "num_tokens": 42, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -156.2977294921875, "logits_per_token": -3.3676670619419644, "logits_per_char": -0.7523511521359707, "num_chars": 188}, {"sum_logits": -91.36558532714844, "num_tokens": 28, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -115.10416412353516, "logits_per_token": -3.26305661882673, "logits_per_char": -0.652611323765346, "num_chars": 140}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 77, "native_id": 32342, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 125.9122085571289, "incorrect_loss_raw": 112.93318176269531, "correct_loss_per_char": 0.5427250368841763, "incorrect_loss_per_char": 0.5650536789624733, "correct_loss_per_token": 2.997909727550688, "incorrect_loss_per_token": 2.6393482109661996, "correct_loss_uncond": -22.48993682861328, "incorrect_loss_uncond": -14.4097900390625}, "model_output": [{"sum_logits": -131.0159912109375, "num_tokens": 53, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -147.48736572265625, "logits_per_token": -2.4719998341686322, "logits_per_char": -0.4981596623990019, "num_chars": 263}, {"sum_logits": -130.15933227539062, "num_tokens": 47, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -145.72564697265625, "logits_per_token": -2.7693474952210773, "logits_per_char": -0.5659101403277853, "num_chars": 230}, {"sum_logits": -125.9122085571289, "num_tokens": 42, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -148.4021453857422, "logits_per_token": -2.997909727550688, "logits_per_char": -0.5427250368841763, "num_chars": 232}, {"sum_logits": -77.62422180175781, "num_tokens": 29, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -88.81590270996094, "logits_per_token": -2.67669730350889, "logits_per_char": -0.6310912341606326, "num_chars": 123}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 78, "native_id": 9393, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 32.86695861816406, "incorrect_loss_raw": 58.40999539693197, "correct_loss_per_char": 0.4324599818179482, "incorrect_loss_per_char": 0.6372199377519387, "correct_loss_per_token": 2.054184913635254, "incorrect_loss_per_token": 3.1534787976843677, "correct_loss_uncond": -44.46482849121094, "incorrect_loss_uncond": -24.71205457051595}, "model_output": [{"sum_logits": -32.86695861816406, "num_tokens": 16, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -77.331787109375, "logits_per_token": -2.054184913635254, "logits_per_char": -0.4324599818179482, "num_chars": 76}, {"sum_logits": -58.59283447265625, "num_tokens": 15, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -68.51631164550781, "logits_per_token": -3.90618896484375, "logits_per_char": -0.751190185546875, "num_chars": 78}, {"sum_logits": -92.83148193359375, "num_tokens": 26, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -126.55693054199219, "logits_per_token": -3.5704416128305287, "logits_per_char": -0.7196238909580911, "num_chars": 129}, {"sum_logits": -23.8056697845459, "num_tokens": 12, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -54.29290771484375, "logits_per_token": -1.983805815378825, "logits_per_char": -0.44084573675085, "num_chars": 54}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 79, "native_id": 44198, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 163.5299835205078, "incorrect_loss_raw": 113.6859130859375, "correct_loss_per_char": 0.4150507195951975, "incorrect_loss_per_char": 0.5882604582957105, "correct_loss_per_token": 1.8582952672784978, "incorrect_loss_per_token": 2.870005979465566, "correct_loss_uncond": -22.5408935546875, "incorrect_loss_uncond": -20.544815063476562}, "model_output": [{"sum_logits": -163.5299835205078, "num_tokens": 88, "num_tokens_all": 503, "is_greedy": false, "sum_logits_uncond": -186.0708770751953, "logits_per_token": -1.8582952672784978, "logits_per_char": -0.4150507195951975, "num_chars": 394}, {"sum_logits": -46.5460205078125, "num_tokens": 23, "num_tokens_all": 438, "is_greedy": false, "sum_logits_uncond": -65.52116394042969, "logits_per_token": -2.023740022078804, "logits_per_char": -0.39114302947741597, "num_chars": 119}, {"sum_logits": -151.4073486328125, "num_tokens": 42, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -162.9088134765625, "logits_per_token": -3.6049368722098216, "logits_per_char": -0.7646835789535985, "num_chars": 198}, {"sum_logits": -143.1043701171875, "num_tokens": 48, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -174.26220703125, "logits_per_token": -2.9813410441080728, "logits_per_char": -0.608954766456117, "num_chars": 235}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 80, "native_id": 22925, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 73.63715362548828, "incorrect_loss_raw": 117.13337961832683, "correct_loss_per_char": 0.46605793433853343, "incorrect_loss_per_char": 0.5479839984577971, "correct_loss_per_token": 1.990193341229413, "incorrect_loss_per_token": 2.464105334891236, "correct_loss_uncond": -30.14354705810547, "incorrect_loss_uncond": -21.29755401611328}, "model_output": [{"sum_logits": -95.84056854248047, "num_tokens": 45, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -120.56997680664062, "logits_per_token": -2.1297904120551214, "logits_per_char": -0.4675149684999047, "num_chars": 205}, {"sum_logits": -137.53060913085938, "num_tokens": 51, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -152.23367309570312, "logits_per_token": -2.6966786104090072, "logits_per_char": -0.6223104485559248, "num_chars": 221}, {"sum_logits": -118.02896118164062, "num_tokens": 46, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -142.48915100097656, "logits_per_token": -2.5658469822095786, "logits_per_char": -0.5541265783175616, "num_chars": 213}, {"sum_logits": -73.63715362548828, "num_tokens": 37, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -103.78070068359375, "logits_per_token": -1.990193341229413, "logits_per_char": -0.46605793433853343, "num_chars": 158}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 81, "native_id": 18466, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 78.931396484375, "incorrect_loss_raw": 92.53705596923828, "correct_loss_per_char": 0.5227244800289735, "incorrect_loss_per_char": 0.5064004136350188, "correct_loss_per_token": 2.391860499526515, "incorrect_loss_per_token": 2.2355253587642028, "correct_loss_uncond": -24.878524780273438, "incorrect_loss_uncond": -13.451438903808594}, "model_output": [{"sum_logits": -78.931396484375, "num_tokens": 33, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -103.80992126464844, "logits_per_token": -2.391860499526515, "logits_per_char": -0.5227244800289735, "num_chars": 151}, {"sum_logits": -71.11441040039062, "num_tokens": 37, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -87.71510314941406, "logits_per_token": -1.9220110919024493, "logits_per_char": -0.43362445366091845, "num_chars": 164}, {"sum_logits": -116.45822143554688, "num_tokens": 51, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -125.81193542480469, "logits_per_token": -2.2834945379518996, "logits_per_char": -0.46030917563457263, "num_chars": 253}, {"sum_logits": -90.03853607177734, "num_tokens": 36, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -104.43844604492188, "logits_per_token": -2.5010704464382596, "logits_per_char": -0.6252676116095649, "num_chars": 144}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 82, "native_id": 36004, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 74.39213562011719, "incorrect_loss_raw": 108.29463195800781, "correct_loss_per_char": 0.46787506679318985, "incorrect_loss_per_char": 0.5980081620530265, "correct_loss_per_token": 2.066448211669922, "incorrect_loss_per_token": 2.9133721667168864, "correct_loss_uncond": -14.110626220703125, "incorrect_loss_uncond": -14.659037272135416}, "model_output": [{"sum_logits": -74.39213562011719, "num_tokens": 36, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -88.50276184082031, "logits_per_token": -2.066448211669922, "logits_per_char": -0.46787506679318985, "num_chars": 159}, {"sum_logits": -109.0361328125, "num_tokens": 37, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -126.8480224609375, "logits_per_token": -2.946922508445946, "logits_per_char": -0.6230636160714286, "num_chars": 175}, {"sum_logits": -129.92718505859375, "num_tokens": 43, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -142.61439514160156, "logits_per_token": -3.0215624432231105, "logits_per_char": -0.6595288581654505, "num_chars": 197}, {"sum_logits": -85.92057800292969, "num_tokens": 31, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -99.39859008789062, "logits_per_token": -2.771631548481603, "logits_per_char": -0.5114320119222006, "num_chars": 168}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 83, "native_id": 17573, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 62.89407730102539, "incorrect_loss_raw": 111.76759084065755, "correct_loss_per_char": 0.5241173108418783, "incorrect_loss_per_char": 0.659637584494044, "correct_loss_per_token": 2.329410270408348, "incorrect_loss_per_token": 2.961758389714161, "correct_loss_uncond": -23.187175750732422, "incorrect_loss_uncond": -14.195137023925781}, "model_output": [{"sum_logits": -66.8675308227539, "num_tokens": 26, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -73.98927307128906, "logits_per_token": -2.571828108567458, "logits_per_char": -0.6134635855298524, "num_chars": 109}, {"sum_logits": -128.2947540283203, "num_tokens": 53, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -152.92288208007812, "logits_per_token": -2.4206557363834023, "logits_per_char": -0.5553885455771442, "num_chars": 231}, {"sum_logits": -140.14048767089844, "num_tokens": 36, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -150.9760284423828, "logits_per_token": -3.892791324191623, "logits_per_char": -0.8100606223751354, "num_chars": 173}, {"sum_logits": -62.89407730102539, "num_tokens": 27, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -86.08125305175781, "logits_per_token": -2.329410270408348, "logits_per_char": -0.5241173108418783, "num_chars": 120}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 84, "native_id": 46569, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 99.88037109375, "incorrect_loss_raw": 137.93148295084634, "correct_loss_per_char": 0.51220703125, "incorrect_loss_per_char": 0.8046822113898123, "correct_loss_per_token": 2.1251142785904253, "incorrect_loss_per_token": 3.520519196163495, "correct_loss_uncond": -24.56743621826172, "incorrect_loss_uncond": -11.294677734375}, "model_output": [{"sum_logits": -96.1790771484375, "num_tokens": 28, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -124.74371337890625, "logits_per_token": -3.434967041015625, "logits_per_char": -0.8664781725084459, "num_chars": 111}, {"sum_logits": -99.88037109375, "num_tokens": 47, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -124.44780731201172, "logits_per_token": -2.1251142785904253, "logits_per_char": -0.51220703125, "num_chars": 195}, {"sum_logits": -146.31170654296875, "num_tokens": 43, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -152.66070556640625, "logits_per_token": -3.4025978265806684, "logits_per_char": -0.7995175220927254, "num_chars": 183}, {"sum_logits": -171.3036651611328, "num_tokens": 46, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -170.27406311035156, "logits_per_token": -3.7239927208941914, "logits_per_char": -0.7480509395682655, "num_chars": 229}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 85, "native_id": 32736, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 116.8642349243164, "incorrect_loss_raw": 88.39400482177734, "correct_loss_per_char": 0.5756858863266818, "incorrect_loss_per_char": 0.6553771138629864, "correct_loss_per_token": 2.4864730834960938, "incorrect_loss_per_token": 2.939200459556782, "correct_loss_uncond": -23.657676696777344, "incorrect_loss_uncond": -16.64801279703776}, "model_output": [{"sum_logits": -116.8642349243164, "num_tokens": 47, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -140.52191162109375, "logits_per_token": -2.4864730834960938, "logits_per_char": -0.5756858863266818, "num_chars": 203}, {"sum_logits": -80.23373413085938, "num_tokens": 30, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -97.44322967529297, "logits_per_token": -2.674457804361979, "logits_per_char": -0.5650262966961928, "num_chars": 142}, {"sum_logits": -79.59297180175781, "num_tokens": 29, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -105.34522247314453, "logits_per_token": -2.744585234543373, "logits_per_char": -0.6745167101843882, "num_chars": 118}, {"sum_logits": -105.35530853271484, "num_tokens": 31, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -112.33760070800781, "logits_per_token": -3.3985583397649948, "logits_per_char": -0.7265883347083782, "num_chars": 145}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 86, "native_id": 33716, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 93.425537109375, "incorrect_loss_raw": 106.0000394185384, "correct_loss_per_char": 0.47184614701704547, "incorrect_loss_per_char": 0.7255690846629456, "correct_loss_per_token": 2.458566766036184, "incorrect_loss_per_token": 3.442527494677858, "correct_loss_uncond": -19.306045532226562, "incorrect_loss_uncond": -18.879833221435547}, "model_output": [{"sum_logits": -93.425537109375, "num_tokens": 38, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -112.73158264160156, "logits_per_token": -2.458566766036184, "logits_per_char": -0.47184614701704547, "num_chars": 198}, {"sum_logits": -115.73907470703125, "num_tokens": 31, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -134.08445739746094, "logits_per_token": -3.733518538936492, "logits_per_char": -0.6538930774408546, "num_chars": 177}, {"sum_logits": -141.32456970214844, "num_tokens": 34, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -161.0316162109375, "logits_per_token": -4.15660499123966, "logits_per_char": -0.9882837042108282, "num_chars": 143}, {"sum_logits": -60.93647384643555, "num_tokens": 25, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -79.52354431152344, "logits_per_token": -2.437458953857422, "logits_per_char": -0.5345304723371539, "num_chars": 114}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 87, "native_id": 10442, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 20.928775787353516, "incorrect_loss_raw": 28.165339787801106, "correct_loss_per_char": 0.6155522290398093, "incorrect_loss_per_char": 0.9577940213248305, "correct_loss_per_token": 3.4881292978922525, "incorrect_loss_per_token": 3.6704709416344055, "correct_loss_uncond": -16.369083404541016, "incorrect_loss_uncond": -11.828435897827148}, "model_output": [{"sum_logits": -25.166982650756836, "num_tokens": 7, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -40.12742614746094, "logits_per_token": -3.595283235822405, "logits_per_char": -0.8678269879571323, "num_chars": 29}, {"sum_logits": -25.155384063720703, "num_tokens": 8, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -35.938846588134766, "logits_per_token": -3.144423007965088, "logits_per_char": -0.739864237168256, "num_chars": 34}, {"sum_logits": -34.17365264892578, "num_tokens": 8, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -43.91505432128906, "logits_per_token": -4.271706581115723, "logits_per_char": -1.265690838849103, "num_chars": 27}, {"sum_logits": -20.928775787353516, "num_tokens": 6, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -37.29785919189453, "logits_per_token": -3.4881292978922525, "logits_per_char": -0.6155522290398093, "num_chars": 34}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 88, "native_id": 33087, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 84.90196990966797, "incorrect_loss_raw": 122.05319213867188, "correct_loss_per_char": 0.5273414280103601, "incorrect_loss_per_char": 0.6116234613921415, "correct_loss_per_token": 2.425770568847656, "incorrect_loss_per_token": 2.769880778815157, "correct_loss_uncond": -30.77825927734375, "incorrect_loss_uncond": -19.897694905598957}, "model_output": [{"sum_logits": -84.39744567871094, "num_tokens": 31, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -107.14624786376953, "logits_per_token": -2.722498247700353, "logits_per_char": -0.6298316841694847, "num_chars": 134}, {"sum_logits": -174.8291015625, "num_tokens": 60, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -193.32354736328125, "logits_per_token": -2.913818359375, "logits_per_char": -0.6622314453125, "num_chars": 264}, {"sum_logits": -84.90196990966797, "num_tokens": 35, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -115.68022918701172, "logits_per_token": -2.425770568847656, "logits_per_char": -0.5273414280103601, "num_chars": 161}, {"sum_logits": -106.93302917480469, "num_tokens": 40, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -125.38286590576172, "logits_per_token": -2.6733257293701174, "logits_per_char": -0.54280725469444, "num_chars": 197}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 89, "native_id": 48229, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 67.25555419921875, "incorrect_loss_raw": 63.941898345947266, "correct_loss_per_char": 0.490916454008896, "incorrect_loss_per_char": 0.4470452334802906, "correct_loss_per_token": 2.1695340064264115, "incorrect_loss_per_token": 1.8140783905844515, "correct_loss_uncond": -18.68706512451172, "incorrect_loss_uncond": -20.67220687866211}, "model_output": [{"sum_logits": -55.98099136352539, "num_tokens": 35, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -77.70439147949219, "logits_per_token": -1.5994568961007254, "logits_per_char": -0.4998302800314767, "num_chars": 112}, {"sum_logits": -67.25555419921875, "num_tokens": 31, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -85.94261932373047, "logits_per_token": -2.1695340064264115, "logits_per_char": -0.490916454008896, "num_chars": 137}, {"sum_logits": -76.63777923583984, "num_tokens": 41, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -95.61669921875, "logits_per_token": -1.8692141277034109, "logits_per_char": -0.43298180359231553, "num_chars": 177}, {"sum_logits": -59.20692443847656, "num_tokens": 30, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -80.52122497558594, "logits_per_token": -1.9735641479492188, "logits_per_char": -0.40832361681707974, "num_chars": 145}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 90, "native_id": 42456, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 13.77196216583252, "incorrect_loss_raw": 31.53830846150716, "correct_loss_per_char": 0.31299914013255725, "incorrect_loss_per_char": 0.6238238034573366, "correct_loss_per_token": 1.5302180184258356, "incorrect_loss_per_token": 2.872275765736898, "correct_loss_uncond": -26.26081371307373, "incorrect_loss_uncond": -22.79875310262044}, "model_output": [{"sum_logits": -37.353431701660156, "num_tokens": 16, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -66.9498519897461, "logits_per_token": -2.3345894813537598, "logits_per_char": -0.5493151720832375, "num_chars": 68}, {"sum_logits": -13.77196216583252, "num_tokens": 9, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -40.03277587890625, "logits_per_token": -1.5302180184258356, "logits_per_char": -0.31299914013255725, "num_chars": 44}, {"sum_logits": -35.0179557800293, "num_tokens": 10, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -54.27527618408203, "logits_per_token": -3.5017955780029295, "logits_per_char": -0.686626583922143, "num_chars": 51}, {"sum_logits": -22.24353790283203, "num_tokens": 8, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -41.78605651855469, "logits_per_token": -2.780442237854004, "logits_per_char": -0.6355296543666294, "num_chars": 35}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 91, "native_id": 29396, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 83.44448852539062, "incorrect_loss_raw": 108.2266108194987, "correct_loss_per_char": 0.389927516473788, "incorrect_loss_per_char": 0.6100033675089515, "correct_loss_per_token": 1.8140106201171875, "incorrect_loss_per_token": 2.8337348374126545, "correct_loss_uncond": -28.62224578857422, "incorrect_loss_uncond": -14.270342508951822}, "model_output": [{"sum_logits": -93.87841033935547, "num_tokens": 39, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -114.80799865722656, "logits_per_token": -2.4071387266501403, "logits_per_char": -0.4864166338826708, "num_chars": 193}, {"sum_logits": -83.44448852539062, "num_tokens": 46, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -112.06673431396484, "logits_per_token": -1.8140106201171875, "logits_per_char": -0.389927516473788, "num_chars": 214}, {"sum_logits": -83.19577026367188, "num_tokens": 26, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -92.097900390625, "logits_per_token": -3.1998373178335338, "logits_per_char": -0.6875683492865444, "num_chars": 121}, {"sum_logits": -147.60565185546875, "num_tokens": 51, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -160.5849609375, "logits_per_token": -2.894228467754289, "logits_per_char": -0.6560251193576389, "num_chars": 225}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 92, "native_id": 38912, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 116.33331298828125, "incorrect_loss_raw": 97.48844655354817, "correct_loss_per_char": 0.3984017568091824, "incorrect_loss_per_char": 0.5376050259172547, "correct_loss_per_token": 1.907103491611168, "incorrect_loss_per_token": 2.4609690319507247, "correct_loss_uncond": -29.767425537109375, "incorrect_loss_uncond": -18.484092712402344}, "model_output": [{"sum_logits": -116.33331298828125, "num_tokens": 61, "num_tokens_all": 494, "is_greedy": false, "sum_logits_uncond": -146.10073852539062, "logits_per_token": -1.907103491611168, "logits_per_char": -0.3984017568091824, "num_chars": 292}, {"sum_logits": -94.40092468261719, "num_tokens": 40, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -110.07240295410156, "logits_per_token": -2.3600231170654298, "logits_per_char": -0.4866027045495731, "num_chars": 194}, {"sum_logits": -73.64656829833984, "num_tokens": 31, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -96.61372375488281, "logits_per_token": -2.3756957515593498, "logits_per_char": -0.5009970632540125, "num_chars": 147}, {"sum_logits": -124.4178466796875, "num_tokens": 47, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -141.2314910888672, "logits_per_token": -2.6471882272273937, "logits_per_char": -0.6252153099481784, "num_chars": 199}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 93, "native_id": 29210, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 78.52899169921875, "incorrect_loss_raw": 114.85304768880208, "correct_loss_per_char": 0.6384470869855183, "incorrect_loss_per_char": 0.6714989956985148, "correct_loss_per_token": 3.414303986922554, "incorrect_loss_per_token": 3.213111419860038, "correct_loss_uncond": -25.959030151367188, "incorrect_loss_uncond": -11.582290649414062}, "model_output": [{"sum_logits": -94.86088562011719, "num_tokens": 31, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -117.4241943359375, "logits_per_token": -3.060028568390877, "logits_per_char": -0.6587561501397027, "num_chars": 144}, {"sum_logits": -128.43252563476562, "num_tokens": 40, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -128.2937469482422, "logits_per_token": -3.2108131408691407, "logits_per_char": -0.7175001432109811, "num_chars": 179}, {"sum_logits": -78.52899169921875, "num_tokens": 23, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -104.48802185058594, "logits_per_token": -3.414303986922554, "logits_per_char": -0.6384470869855183, "num_chars": 123}, {"sum_logits": -121.26573181152344, "num_tokens": 36, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -133.58807373046875, "logits_per_token": -3.3684925503200955, "logits_per_char": -0.6382406937448601, "num_chars": 190}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 94, "native_id": 12246, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 39.130393981933594, "incorrect_loss_raw": 119.01192728678386, "correct_loss_per_char": 0.3623184627956814, "incorrect_loss_per_char": 0.743380896306503, "correct_loss_per_token": 1.9565196990966798, "incorrect_loss_per_token": 3.6446240246625963, "correct_loss_uncond": -45.46661376953125, "incorrect_loss_uncond": -14.164983113606771}, "model_output": [{"sum_logits": -104.60758209228516, "num_tokens": 28, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -118.2960433959961, "logits_per_token": -3.73598507472447, "logits_per_char": -0.7525725330380227, "num_chars": 139}, {"sum_logits": -39.130393981933594, "num_tokens": 20, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -84.59700775146484, "logits_per_token": -1.9565196990966798, "logits_per_char": -0.3623184627956814, "num_chars": 108}, {"sum_logits": -180.32595825195312, "num_tokens": 43, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -195.68316650390625, "logits_per_token": -4.193626936091933, "logits_per_char": -0.8505941426978921, "num_chars": 212}, {"sum_logits": -72.10224151611328, "num_tokens": 24, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -85.55152130126953, "logits_per_token": -3.0042600631713867, "logits_per_char": -0.6269760131835938, "num_chars": 115}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 95, "native_id": 9715, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 52.49232482910156, "incorrect_loss_raw": 49.11660130818685, "correct_loss_per_char": 0.4199385986328125, "incorrect_loss_per_char": 0.40577148259608015, "correct_loss_per_token": 1.7497441609700521, "incorrect_loss_per_token": 1.6527871018364315, "correct_loss_uncond": -64.56048583984375, "incorrect_loss_uncond": -54.33272171020508}, "model_output": [{"sum_logits": -47.595176696777344, "num_tokens": 32, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -94.36190795898438, "logits_per_token": -1.487349271774292, "logits_per_char": -0.34996453453512755, "num_chars": 136}, {"sum_logits": -61.26027297973633, "num_tokens": 28, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -115.34449768066406, "logits_per_token": -2.18786689213344, "logits_per_char": -0.5569115725430576, "num_chars": 110}, {"sum_logits": -38.494354248046875, "num_tokens": 30, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -100.64156341552734, "logits_per_token": -1.2831451416015625, "logits_per_char": -0.31043834071005544, "num_chars": 124}, {"sum_logits": -52.49232482910156, "num_tokens": 30, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -117.05281066894531, "logits_per_token": -1.7497441609700521, "logits_per_char": -0.4199385986328125, "num_chars": 125}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 96, "native_id": 31253, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 38.67295455932617, "incorrect_loss_raw": 109.40701802571614, "correct_loss_per_char": 0.44968551813169966, "incorrect_loss_per_char": 0.839508552271528, "correct_loss_per_token": 2.1484974755181208, "incorrect_loss_per_token": 3.7369056247090757, "correct_loss_uncond": -29.753528594970703, "incorrect_loss_uncond": -26.826889038085938}, "model_output": [{"sum_logits": -75.86805725097656, "num_tokens": 27, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -106.21292114257812, "logits_per_token": -2.809928046332465, "logits_per_char": -0.6270087376113765, "num_chars": 121}, {"sum_logits": -163.758056640625, "num_tokens": 36, "num_tokens_all": 438, "is_greedy": false, "sum_logits_uncond": -191.90484619140625, "logits_per_token": -4.548834906684028, "logits_per_char": -1.153225750990317, "num_chars": 142}, {"sum_logits": -38.67295455932617, "num_tokens": 18, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -68.42648315429688, "logits_per_token": -2.1484974755181208, "logits_per_char": -0.44968551813169966, "num_chars": 86}, {"sum_logits": -88.59494018554688, "num_tokens": 23, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -110.58395385742188, "logits_per_token": -3.851953921110734, "logits_per_char": -0.7382911682128906, "num_chars": 120}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 97, "native_id": 46480, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 26.636260986328125, "incorrect_loss_raw": 28.158652623494465, "correct_loss_per_char": 0.9184917581492457, "incorrect_loss_per_char": 0.6398899159829295, "correct_loss_per_token": 3.805180140904018, "incorrect_loss_per_token": 2.659607039557563, "correct_loss_uncond": -20.00218963623047, "incorrect_loss_uncond": -29.454228083292644}, "model_output": [{"sum_logits": -17.95549964904785, "num_tokens": 9, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -45.38581466674805, "logits_per_token": -1.9950555165608723, "logits_per_char": -0.5281029308543486, "num_chars": 34}, {"sum_logits": -15.85418701171875, "num_tokens": 9, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -48.184288024902344, "logits_per_token": -1.7615763346354167, "logits_per_char": -0.41721544767680924, "num_chars": 38}, {"sum_logits": -50.6662712097168, "num_tokens": 12, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -79.26853942871094, "logits_per_token": -4.222189267476399, "logits_per_char": -0.9743513694176307, "num_chars": 52}, {"sum_logits": -26.636260986328125, "num_tokens": 7, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -46.638450622558594, "logits_per_token": -3.805180140904018, "logits_per_char": -0.9184917581492457, "num_chars": 29}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 98, "native_id": 26419, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 112.76275634765625, "incorrect_loss_raw": 141.62959798177084, "correct_loss_per_char": 0.7776741817079741, "incorrect_loss_per_char": 0.8460465379803422, "correct_loss_per_token": 2.9674409565172697, "incorrect_loss_per_token": 3.641856984226768, "correct_loss_uncond": -19.711563110351562, "incorrect_loss_uncond": -28.447102864583332}, "model_output": [{"sum_logits": -133.28953552246094, "num_tokens": 40, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -185.9163818359375, "logits_per_token": -3.3322383880615236, "logits_per_char": -0.7660318133474766, "num_chars": 174}, {"sum_logits": -159.70176696777344, "num_tokens": 43, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -185.15663146972656, "logits_per_token": -3.713994580645894, "logits_per_char": -0.8232049843699661, "num_chars": 194}, {"sum_logits": -131.89749145507812, "num_tokens": 34, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -139.15708923339844, "logits_per_token": -3.879337983972886, "logits_per_char": -0.9489028162235836, "num_chars": 139}, {"sum_logits": -112.76275634765625, "num_tokens": 38, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -132.4743194580078, "logits_per_token": -2.9674409565172697, "logits_per_char": -0.7776741817079741, "num_chars": 145}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 99, "native_id": 28964, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 96.00475311279297, "incorrect_loss_raw": 146.70827992757162, "correct_loss_per_char": 0.4706115348666322, "incorrect_loss_per_char": 0.7462323106809731, "correct_loss_per_token": 2.2858274550664994, "incorrect_loss_per_token": 3.114353853861491, "correct_loss_uncond": -27.32849884033203, "incorrect_loss_uncond": -9.918619791666666}, "model_output": [{"sum_logits": -151.18453979492188, "num_tokens": 50, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -162.0660858154297, "logits_per_token": -3.0236907958984376, "logits_per_char": -0.709786571807145, "num_chars": 213}, {"sum_logits": -108.11295318603516, "num_tokens": 40, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -115.3852310180664, "logits_per_token": -2.702823829650879, "logits_per_char": -0.6799556804153154, "num_chars": 159}, {"sum_logits": -180.8273468017578, "num_tokens": 50, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -192.42938232421875, "logits_per_token": -3.616546936035156, "logits_per_char": -0.8489546798204592, "num_chars": 213}, {"sum_logits": -96.00475311279297, "num_tokens": 42, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -123.333251953125, "logits_per_token": -2.2858274550664994, "logits_per_char": -0.4706115348666322, "num_chars": 204}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 100, "native_id": 30141, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 115.75241088867188, "incorrect_loss_raw": 135.06458791097006, "correct_loss_per_char": 0.49047631732488084, "incorrect_loss_per_char": 0.78418064466005, "correct_loss_per_token": 2.362294099768814, "incorrect_loss_per_token": 3.5307115856398887, "correct_loss_uncond": -38.49330139160156, "incorrect_loss_uncond": -19.263832092285156}, "model_output": [{"sum_logits": -126.03895568847656, "num_tokens": 32, "num_tokens_all": 435, "is_greedy": false, "sum_logits_uncond": -140.9897918701172, "logits_per_token": -3.9387173652648926, "logits_per_char": -0.8516145654626794, "num_chars": 148}, {"sum_logits": -115.75241088867188, "num_tokens": 49, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -154.24571228027344, "logits_per_token": -2.362294099768814, "logits_per_char": -0.49047631732488084, "num_chars": 236}, {"sum_logits": -244.4414520263672, "num_tokens": 65, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -270.99267578125, "logits_per_token": -3.760637723482572, "logits_per_char": -0.8202733289475409, "num_chars": 298}, {"sum_logits": -34.713356018066406, "num_tokens": 12, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -51.00279235839844, "logits_per_token": -2.8927796681722007, "logits_per_char": -0.6806540395699295, "num_chars": 51}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 101, "native_id": 40243, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 106.57925415039062, "incorrect_loss_raw": 134.68435923258463, "correct_loss_per_char": 0.7453094695831513, "incorrect_loss_per_char": 0.7186430969573284, "correct_loss_per_token": 2.664481353759766, "incorrect_loss_per_token": 3.2114093600493034, "correct_loss_uncond": -36.837127685546875, "incorrect_loss_uncond": -22.88922373453776}, "model_output": [{"sum_logits": -106.57925415039062, "num_tokens": 40, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -143.4163818359375, "logits_per_token": -2.664481353759766, "logits_per_char": -0.7453094695831513, "num_chars": 143}, {"sum_logits": -181.52403259277344, "num_tokens": 54, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -201.93988037109375, "logits_per_token": -3.361556159125434, "logits_per_char": -0.7627060192973674, "num_chars": 238}, {"sum_logits": -97.74586486816406, "num_tokens": 31, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -117.99378967285156, "logits_per_token": -3.1530924151020665, "logits_per_char": -0.7187195946188534, "num_chars": 136}, {"sum_logits": -124.7831802368164, "num_tokens": 40, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -152.78707885742188, "logits_per_token": -3.1195795059204103, "logits_per_char": -0.6745036769557644, "num_chars": 185}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 102, "native_id": 26033, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 23.101301193237305, "incorrect_loss_raw": 31.020415623982746, "correct_loss_per_char": 0.6417028109232584, "incorrect_loss_per_char": 0.6089408121199332, "correct_loss_per_token": 2.887662649154663, "incorrect_loss_per_token": 2.881240036924508, "correct_loss_uncond": -23.5376033782959, "incorrect_loss_uncond": -15.43411127726237}, "model_output": [{"sum_logits": -16.69569206237793, "num_tokens": 6, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -30.124910354614258, "logits_per_token": -2.782615343729655, "logits_per_char": -0.6421420023991511, "num_chars": 26}, {"sum_logits": -55.50254440307617, "num_tokens": 14, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -65.19303894042969, "logits_per_token": -3.9644674573625838, "logits_per_char": -0.7500343838253537, "num_chars": 74}, {"sum_logits": -23.101301193237305, "num_tokens": 8, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -46.6389045715332, "logits_per_token": -2.887662649154663, "logits_per_char": -0.6417028109232584, "num_chars": 36}, {"sum_logits": -20.86301040649414, "num_tokens": 11, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -44.045631408691406, "logits_per_token": -1.8966373096812854, "logits_per_char": -0.4346460501352946, "num_chars": 48}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 103, "native_id": 34815, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 50.7861442565918, "incorrect_loss_raw": 86.26023356119792, "correct_loss_per_char": 0.5402781303892744, "incorrect_loss_per_char": 0.6532457744527265, "correct_loss_per_token": 2.987420250387753, "incorrect_loss_per_token": 3.0030293027260733, "correct_loss_uncond": -13.15863037109375, "incorrect_loss_uncond": -8.508209228515625}, "model_output": [{"sum_logits": -90.96099853515625, "num_tokens": 27, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -96.79942321777344, "logits_per_token": -3.3689258716724537, "logits_per_char": -0.6788134219041512, "num_chars": 134}, {"sum_logits": -67.13748168945312, "num_tokens": 23, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -71.31523895263672, "logits_per_token": -2.919020943019701, "logits_per_char": -0.6921389864892075, "num_chars": 97}, {"sum_logits": -100.68222045898438, "num_tokens": 37, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -116.19066619873047, "logits_per_token": -2.721141093486064, "logits_per_char": -0.5887849149648209, "num_chars": 171}, {"sum_logits": -50.7861442565918, "num_tokens": 17, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -63.94477462768555, "logits_per_token": -2.987420250387753, "logits_per_char": -0.5402781303892744, "num_chars": 94}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 104, "native_id": 21774, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 102.68042755126953, "incorrect_loss_raw": 82.70816294352214, "correct_loss_per_char": 0.41072171020507814, "incorrect_loss_per_char": 0.5141617481452995, "correct_loss_per_token": 1.6832856975617956, "incorrect_loss_per_token": 2.08474570514822, "correct_loss_uncond": -25.77893829345703, "incorrect_loss_uncond": -27.36157989501953}, "model_output": [{"sum_logits": -99.06494140625, "num_tokens": 57, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -125.8511734008789, "logits_per_token": -1.7379814281798245, "logits_per_char": -0.43834044870022126, "num_chars": 226}, {"sum_logits": -77.1692123413086, "num_tokens": 34, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -100.63850402832031, "logits_per_token": -2.269682715920841, "logits_per_char": -0.5716237951208044, "num_chars": 135}, {"sum_logits": -71.89033508300781, "num_tokens": 32, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -103.71955108642578, "logits_per_token": -2.246572971343994, "logits_per_char": -0.5325210006148727, "num_chars": 135}, {"sum_logits": -102.68042755126953, "num_tokens": 61, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -128.45936584472656, "logits_per_token": -1.6832856975617956, "logits_per_char": -0.41072171020507814, "num_chars": 250}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 105, "native_id": 7122, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 69.38717651367188, "incorrect_loss_raw": 120.26074727376302, "correct_loss_per_char": 0.40341381693995276, "incorrect_loss_per_char": 0.5617500932347136, "correct_loss_per_token": 1.7791583721454327, "incorrect_loss_per_token": 2.6245148889919583, "correct_loss_uncond": -36.85469055175781, "incorrect_loss_uncond": -28.311670939127605}, "model_output": [{"sum_logits": -81.36001586914062, "num_tokens": 31, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -113.28791809082031, "logits_per_token": -2.62451664094002, "logits_per_char": -0.5811429704938617, "num_chars": 140}, {"sum_logits": -121.02786254882812, "num_tokens": 50, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -158.76002502441406, "logits_per_token": -2.4205572509765627, "logits_per_char": -0.5063927303298248, "num_chars": 239}, {"sum_logits": -158.3943634033203, "num_tokens": 56, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -173.6693115234375, "logits_per_token": -2.8284707750592912, "logits_per_char": -0.597714578880454, "num_chars": 265}, {"sum_logits": -69.38717651367188, "num_tokens": 39, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -106.24186706542969, "logits_per_token": -1.7791583721454327, "logits_per_char": -0.40341381693995276, "num_chars": 172}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 106, "native_id": 48193, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 63.0472412109375, "incorrect_loss_raw": 104.55298360188802, "correct_loss_per_char": 0.7596053157944277, "incorrect_loss_per_char": 0.6722035746972589, "correct_loss_per_token": 3.708661247702206, "incorrect_loss_per_token": 3.483315173490548, "correct_loss_uncond": -25.382339477539062, "incorrect_loss_uncond": -25.39709218343099}, "model_output": [{"sum_logits": -113.37704467773438, "num_tokens": 36, "num_tokens_all": 438, "is_greedy": false, "sum_logits_uncond": -152.11270141601562, "logits_per_token": -3.1493623521592884, "logits_per_char": -0.6161795906398607, "num_chars": 184}, {"sum_logits": -88.65249633789062, "num_tokens": 28, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -106.39752960205078, "logits_per_token": -3.1661605834960938, "logits_per_char": -0.6030782063802084, "num_chars": 147}, {"sum_logits": -63.0472412109375, "num_tokens": 17, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -88.42958068847656, "logits_per_token": -3.708661247702206, "logits_per_char": -0.7596053157944277, "num_chars": 83}, {"sum_logits": -111.62940979003906, "num_tokens": 27, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -131.33999633789062, "logits_per_token": -4.134422584816262, "logits_per_char": -0.7973529270717076, "num_chars": 140}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 107, "native_id": 18005, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 90.35172271728516, "incorrect_loss_raw": 118.26412455240886, "correct_loss_per_char": 0.6023448181152343, "incorrect_loss_per_char": 0.7424917942329644, "correct_loss_per_token": 2.4419384518185177, "incorrect_loss_per_token": 3.3412981768389485, "correct_loss_uncond": -28.169517517089844, "incorrect_loss_uncond": -22.374338785807293}, "model_output": [{"sum_logits": -148.36953735351562, "num_tokens": 42, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -172.85922241210938, "logits_per_token": -3.5326080322265625, "logits_per_char": -0.7647914296572971, "num_chars": 194}, {"sum_logits": -91.13786315917969, "num_tokens": 27, "num_tokens_all": 438, "is_greedy": false, "sum_logits_uncond": -115.83488464355469, "logits_per_token": -3.3754764133029513, "logits_per_char": -0.7723547725354211, "num_chars": 118}, {"sum_logits": -90.35172271728516, "num_tokens": 37, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -118.521240234375, "logits_per_token": -2.4419384518185177, "logits_per_char": -0.6023448181152343, "num_chars": 150}, {"sum_logits": -115.28497314453125, "num_tokens": 37, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -133.22128295898438, "logits_per_token": -3.115810084987331, "logits_per_char": -0.6903291805061752, "num_chars": 167}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 108, "native_id": 27354, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 50.26043701171875, "incorrect_loss_raw": 61.970986684163414, "correct_loss_per_char": 0.4408810264185855, "incorrect_loss_per_char": 0.736393029715151, "correct_loss_per_token": 1.795015607561384, "incorrect_loss_per_token": 3.1887641341597948, "correct_loss_uncond": -35.031463623046875, "incorrect_loss_uncond": -25.601016998291016}, "model_output": [{"sum_logits": -48.71599197387695, "num_tokens": 18, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -82.3237075805664, "logits_per_token": -2.7064439985487194, "logits_per_char": -0.686140732026436, "num_chars": 71}, {"sum_logits": -87.04946899414062, "num_tokens": 20, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -105.55810546875, "logits_per_token": -4.352473449707031, "logits_per_char": -0.8882598876953125, "num_chars": 98}, {"sum_logits": -50.26043701171875, "num_tokens": 28, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -85.29190063476562, "logits_per_token": -1.795015607561384, "logits_per_char": -0.4408810264185855, "num_chars": 114}, {"sum_logits": -50.147499084472656, "num_tokens": 20, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -74.83419799804688, "logits_per_token": -2.507374954223633, "logits_per_char": -0.6347784694237045, "num_chars": 79}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 109, "native_id": 39408, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 58.7081413269043, "incorrect_loss_raw": 40.06548817952474, "correct_loss_per_char": 0.7724755437750566, "incorrect_loss_per_char": 0.4950439990898097, "correct_loss_per_token": 3.6692588329315186, "incorrect_loss_per_token": 2.4078451396490337, "correct_loss_uncond": -33.141422271728516, "incorrect_loss_uncond": -33.57360585530599}, "model_output": [{"sum_logits": -58.7081413269043, "num_tokens": 16, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -91.84956359863281, "logits_per_token": -3.6692588329315186, "logits_per_char": -0.7724755437750566, "num_chars": 76}, {"sum_logits": -31.197086334228516, "num_tokens": 13, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -63.32062530517578, "logits_per_token": -2.399775871863732, "logits_per_char": -0.48745447397232056, "num_chars": 64}, {"sum_logits": -19.986804962158203, "num_tokens": 13, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -55.88255310058594, "logits_per_token": -1.537446535550631, "logits_per_char": -0.32765254036324926, "num_chars": 61}, {"sum_logits": -69.0125732421875, "num_tokens": 21, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -101.71410369873047, "logits_per_token": -3.286313011532738, "logits_per_char": -0.6700249829338593, "num_chars": 103}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 110, "native_id": 46313, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 101.2332992553711, "incorrect_loss_raw": 77.84489822387695, "correct_loss_per_char": 0.4730528002587434, "incorrect_loss_per_char": 0.6138296116055457, "correct_loss_per_token": 2.024665985107422, "incorrect_loss_per_token": 2.7046880157822994, "correct_loss_uncond": -22.38776397705078, "incorrect_loss_uncond": -16.051171620686848}, "model_output": [{"sum_logits": -97.08407592773438, "num_tokens": 36, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -112.28609466552734, "logits_per_token": -2.6967798868815103, "logits_per_char": -0.7086428899834626, "num_chars": 137}, {"sum_logits": -55.85557174682617, "num_tokens": 22, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -76.065673828125, "logits_per_token": -2.538889624855735, "logits_per_char": -0.5530254628398631, "num_chars": 101}, {"sum_logits": -80.59504699707031, "num_tokens": 28, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -93.33644104003906, "logits_per_token": -2.878394535609654, "logits_per_char": -0.5798204819933116, "num_chars": 139}, {"sum_logits": -101.2332992553711, "num_tokens": 50, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -123.62106323242188, "logits_per_token": -2.024665985107422, "logits_per_char": -0.4730528002587434, "num_chars": 214}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 111, "native_id": 8879, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 45.043174743652344, "incorrect_loss_raw": 88.65974934895833, "correct_loss_per_char": 0.4331074494581956, "incorrect_loss_per_char": 0.5441781411407697, "correct_loss_per_token": 1.876798947652181, "incorrect_loss_per_token": 2.6102632965457406, "correct_loss_uncond": -28.450950622558594, "incorrect_loss_uncond": -17.544474283854168}, "model_output": [{"sum_logits": -45.043174743652344, "num_tokens": 24, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -73.49412536621094, "logits_per_token": -1.876798947652181, "logits_per_char": -0.4331074494581956, "num_chars": 104}, {"sum_logits": -75.88539123535156, "num_tokens": 32, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -90.25657653808594, "logits_per_token": -2.3714184761047363, "logits_per_char": -0.516227151260895, "num_chars": 147}, {"sum_logits": -76.05390930175781, "num_tokens": 26, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -96.01899719238281, "logits_per_token": -2.925150357759916, "logits_per_char": -0.54324220929827, "num_chars": 140}, {"sum_logits": -114.03994750976562, "num_tokens": 45, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -132.33709716796875, "logits_per_token": -2.5342210557725693, "logits_per_char": -0.5730650628631438, "num_chars": 199}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 112, "native_id": 38909, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 30.62244415283203, "incorrect_loss_raw": 60.07600402832031, "correct_loss_per_char": 0.6657053076702616, "incorrect_loss_per_char": 0.6632668215912214, "correct_loss_per_token": 3.062244415283203, "incorrect_loss_per_token": 2.9014951125435204, "correct_loss_uncond": -28.487762451171875, "incorrect_loss_uncond": -23.021268208821613}, "model_output": [{"sum_logits": -69.45231628417969, "num_tokens": 23, "num_tokens_all": 435, "is_greedy": false, "sum_logits_uncond": -88.58984375, "logits_per_token": -3.0196659253991167, "logits_per_char": -0.7234616279602051, "num_chars": 96}, {"sum_logits": -55.28253936767578, "num_tokens": 20, "num_tokens_all": 432, "is_greedy": false, "sum_logits_uncond": -77.55390167236328, "logits_per_token": -2.7641269683837892, "logits_per_char": -0.6428202252055324, "num_chars": 86}, {"sum_logits": -55.49315643310547, "num_tokens": 19, "num_tokens_all": 431, "is_greedy": false, "sum_logits_uncond": -83.1480712890625, "logits_per_token": -2.9206924438476562, "logits_per_char": -0.6235186116079267, "num_chars": 89}, {"sum_logits": -30.62244415283203, "num_tokens": 10, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -59.110206604003906, "logits_per_token": -3.062244415283203, "logits_per_char": -0.6657053076702616, "num_chars": 46}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 113, "native_id": 15379, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 83.59248352050781, "incorrect_loss_raw": 70.4793217976888, "correct_loss_per_char": 0.5257388900660869, "incorrect_loss_per_char": 0.536637474335914, "correct_loss_per_token": 2.5331055612275093, "incorrect_loss_per_token": 2.677269225402288, "correct_loss_uncond": -19.174957275390625, "incorrect_loss_uncond": -10.836212158203125}, "model_output": [{"sum_logits": -83.59248352050781, "num_tokens": 33, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -102.76744079589844, "logits_per_token": -2.5331055612275093, "logits_per_char": -0.5257388900660869, "num_chars": 159}, {"sum_logits": -62.440277099609375, "num_tokens": 32, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -80.277587890625, "logits_per_token": -1.951258659362793, "logits_per_char": -0.3830691846601802, "num_chars": 163}, {"sum_logits": -69.74049377441406, "num_tokens": 23, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -80.71856689453125, "logits_per_token": -3.0321953814962637, "logits_per_char": -0.6171725112780005, "num_chars": 113}, {"sum_logits": -79.25719451904297, "num_tokens": 26, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -82.95044708251953, "logits_per_token": -3.0483536353478065, "logits_per_char": -0.6096707270695613, "num_chars": 130}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 114, "native_id": 30263, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 80.90617370605469, "incorrect_loss_raw": 108.40747578938802, "correct_loss_per_char": 0.4065636869650989, "incorrect_loss_per_char": 0.5836284162880876, "correct_loss_per_token": 1.8815389233966207, "incorrect_loss_per_token": 2.798426583480218, "correct_loss_uncond": -26.95520782470703, "incorrect_loss_uncond": -14.835070292154947}, "model_output": [{"sum_logits": -80.90617370605469, "num_tokens": 43, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -107.86138153076172, "logits_per_token": -1.8815389233966207, "logits_per_char": -0.4065636869650989, "num_chars": 199}, {"sum_logits": -114.93785095214844, "num_tokens": 40, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -137.38270568847656, "logits_per_token": -2.8734462738037108, "logits_per_char": -0.5447291514319831, "num_chars": 211}, {"sum_logits": -116.54638671875, "num_tokens": 39, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -124.49983978271484, "logits_per_token": -2.988368890224359, "logits_per_char": -0.6547549815660112, "num_chars": 178}, {"sum_logits": -93.73818969726562, "num_tokens": 37, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -107.8450927734375, "logits_per_token": -2.5334645864125847, "logits_per_char": -0.5514011158662684, "num_chars": 170}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 115, "native_id": 16485, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 22.667129516601562, "incorrect_loss_raw": 35.118578592936196, "correct_loss_per_char": 0.6476322719029017, "incorrect_loss_per_char": 0.7524958857907936, "correct_loss_per_token": 2.5185699462890625, "incorrect_loss_per_token": 3.2479313487098334, "correct_loss_uncond": -18.575881958007812, "incorrect_loss_uncond": -17.36158816019694}, "model_output": [{"sum_logits": -35.66115188598633, "num_tokens": 10, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -55.65242004394531, "logits_per_token": -3.5661151885986326, "logits_per_char": -0.8915287971496582, "num_chars": 40}, {"sum_logits": -22.667129516601562, "num_tokens": 9, "num_tokens_all": 395, "is_greedy": false, "sum_logits_uncond": -41.243011474609375, "logits_per_token": -2.5185699462890625, "logits_per_char": -0.6476322719029017, "num_chars": 35}, {"sum_logits": -22.390560150146484, "num_tokens": 8, "num_tokens_all": 394, "is_greedy": false, "sum_logits_uncond": -30.45750617980957, "logits_per_token": -2.7988200187683105, "logits_per_char": -0.6997050046920776, "num_chars": 32}, {"sum_logits": -47.30402374267578, "num_tokens": 14, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -71.33057403564453, "logits_per_token": -3.378858838762556, "logits_per_char": -0.6662538555306448, "num_chars": 71}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 116, "native_id": 18751, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.585676193237305, "incorrect_loss_raw": 34.8791249593099, "correct_loss_per_char": 0.3995787115658031, "incorrect_loss_per_char": 0.7524460610207374, "correct_loss_per_token": 1.698209524154663, "incorrect_loss_per_token": 3.247467883680233, "correct_loss_uncond": -19.195993423461914, "incorrect_loss_uncond": -21.40821075439453}, "model_output": [{"sum_logits": -35.85712432861328, "num_tokens": 21, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -78.48726654052734, "logits_per_token": -1.7074821108863467, "logits_per_char": -0.39841249254014754, "num_chars": 90}, {"sum_logits": -13.585676193237305, "num_tokens": 8, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -32.78166961669922, "logits_per_token": -1.698209524154663, "logits_per_char": -0.3995787115658031, "num_chars": 34}, {"sum_logits": -40.507904052734375, "num_tokens": 9, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -49.477149963378906, "logits_per_token": -4.500878228081597, "logits_per_char": -1.0948082176414695, "num_chars": 37}, {"sum_logits": -28.27234649658203, "num_tokens": 8, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -40.89759063720703, "logits_per_token": -3.534043312072754, "logits_per_char": -0.7641174728805954, "num_chars": 37}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 117, "native_id": 23928, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 101.72250366210938, "incorrect_loss_raw": 134.6649373372396, "correct_loss_per_char": 0.6919898208306761, "incorrect_loss_per_char": 0.9038801576232585, "correct_loss_per_token": 3.0825001109730112, "incorrect_loss_per_token": 3.957411347679292, "correct_loss_uncond": -20.785110473632812, "incorrect_loss_uncond": -9.699824015299479}, "model_output": [{"sum_logits": -101.72250366210938, "num_tokens": 33, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -122.50761413574219, "logits_per_token": -3.0825001109730112, "logits_per_char": -0.6919898208306761, "num_chars": 147}, {"sum_logits": -116.08078002929688, "num_tokens": 30, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -118.34664916992188, "logits_per_token": -3.869359334309896, "logits_per_char": -0.8598576298466435, "num_chars": 135}, {"sum_logits": -143.36581420898438, "num_tokens": 35, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -157.6034698486328, "logits_per_token": -4.096166120256696, "logits_per_char": -0.9131580522865247, "num_chars": 157}, {"sum_logits": -144.5482177734375, "num_tokens": 37, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -157.1441650390625, "logits_per_token": -3.9067085884712838, "logits_per_char": -0.9386247907366071, "num_chars": 154}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 118, "native_id": 2041, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 92.02859497070312, "incorrect_loss_raw": 96.27029673258464, "correct_loss_per_char": 0.634679965315194, "incorrect_loss_per_char": 0.5530367450689263, "correct_loss_per_token": 3.2867355346679688, "incorrect_loss_per_token": 2.6736576444928937, "correct_loss_uncond": -18.352996826171875, "incorrect_loss_uncond": -21.400052388509113}, "model_output": [{"sum_logits": -110.42684936523438, "num_tokens": 40, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -137.98382568359375, "logits_per_token": -2.7606712341308595, "logits_per_char": -0.5692105637383216, "num_chars": 194}, {"sum_logits": -92.02859497070312, "num_tokens": 28, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -110.381591796875, "logits_per_token": -3.2867355346679688, "logits_per_char": -0.634679965315194, "num_chars": 145}, {"sum_logits": -68.1182861328125, "num_tokens": 31, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -90.63238525390625, "logits_per_token": -2.197364068800403, "logits_per_char": -0.46338970158375853, "num_chars": 147}, {"sum_logits": -110.26575469970703, "num_tokens": 36, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -124.39483642578125, "logits_per_token": -3.0629376305474176, "logits_per_char": -0.6265099698846991, "num_chars": 176}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 119, "native_id": 10763, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 41.54439163208008, "incorrect_loss_raw": 61.193294525146484, "correct_loss_per_char": 0.5466367320010537, "incorrect_loss_per_char": 0.8155929191601697, "correct_loss_per_token": 2.7696261088053387, "incorrect_loss_per_token": 3.5784795072343614, "correct_loss_uncond": -47.92769241333008, "incorrect_loss_uncond": -21.22550328572591}, "model_output": [{"sum_logits": -47.77137756347656, "num_tokens": 15, "num_tokens_all": 424, "is_greedy": false, "sum_logits_uncond": -67.60603332519531, "logits_per_token": -3.184758504231771, "logits_per_char": -0.8380943432188871, "num_chars": 57}, {"sum_logits": -74.98812866210938, "num_tokens": 20, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -101.41896057128906, "logits_per_token": -3.749406433105469, "logits_per_char": -0.7977460495969082, "num_chars": 94}, {"sum_logits": -60.820377349853516, "num_tokens": 16, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -78.23139953613281, "logits_per_token": -3.8012735843658447, "logits_per_char": -0.8109383646647136, "num_chars": 75}, {"sum_logits": -41.54439163208008, "num_tokens": 15, "num_tokens_all": 424, "is_greedy": false, "sum_logits_uncond": -89.47208404541016, "logits_per_token": -2.7696261088053387, "logits_per_char": -0.5466367320010537, "num_chars": 76}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 120, "native_id": 11116, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 68.36927795410156, "incorrect_loss_raw": 85.25913747151692, "correct_loss_per_char": 0.4650971289394664, "incorrect_loss_per_char": 0.4445710853114992, "correct_loss_per_token": 2.2054605791645665, "incorrect_loss_per_token": 2.060268614232827, "correct_loss_uncond": -30.694007873535156, "incorrect_loss_uncond": -22.155410766601562}, "model_output": [{"sum_logits": -66.93132019042969, "num_tokens": 29, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -89.76164245605469, "logits_per_token": -2.307976558290679, "logits_per_char": -0.45223864993533575, "num_chars": 148}, {"sum_logits": -109.09864044189453, "num_tokens": 51, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -125.77101135253906, "logits_per_token": -2.1391890282724417, "logits_per_char": -0.4827373470880289, "num_chars": 226}, {"sum_logits": -68.36927795410156, "num_tokens": 31, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -99.06328582763672, "logits_per_token": -2.2054605791645665, "logits_per_char": -0.4650971289394664, "num_chars": 147}, {"sum_logits": -79.74745178222656, "num_tokens": 46, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -106.71099090576172, "logits_per_token": -1.73364025613536, "logits_per_char": -0.3987372589111328, "num_chars": 200}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 121, "native_id": 8758, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 16.223026275634766, "incorrect_loss_raw": 37.10206667582194, "correct_loss_per_char": 0.3605116950141059, "incorrect_loss_per_char": 0.8556271711985269, "correct_loss_per_token": 1.3519188563028972, "incorrect_loss_per_token": 3.609975401973312, "correct_loss_uncond": -36.49827194213867, "incorrect_loss_uncond": -14.864330927530924}, "model_output": [{"sum_logits": -16.223026275634766, "num_tokens": 12, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -52.72129821777344, "logits_per_token": -1.3519188563028972, "logits_per_char": -0.3605116950141059, "num_chars": 45}, {"sum_logits": -43.608795166015625, "num_tokens": 13, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -52.02360534667969, "logits_per_token": -3.354522705078125, "logits_per_char": -0.8386306762695312, "num_chars": 52}, {"sum_logits": -42.2663459777832, "num_tokens": 11, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -60.9351806640625, "logits_per_token": -3.8423950888893823, "logits_per_char": -0.88054887453715, "num_chars": 48}, {"sum_logits": -25.431058883666992, "num_tokens": 7, "num_tokens_all": 395, "is_greedy": false, "sum_logits_uncond": -42.940406799316406, "logits_per_token": -3.6330084119524275, "logits_per_char": -0.8477019627888998, "num_chars": 30}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 122, "native_id": 2736, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 62.33679962158203, "incorrect_loss_raw": 79.59107462565105, "correct_loss_per_char": 0.4299089629074623, "incorrect_loss_per_char": 0.5756985960623971, "correct_loss_per_token": 1.8334352829877067, "incorrect_loss_per_token": 2.773607127728018, "correct_loss_uncond": -20.035804748535156, "incorrect_loss_uncond": -17.568094889322918}, "model_output": [{"sum_logits": -77.61075592041016, "num_tokens": 33, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -105.48442840576172, "logits_per_token": -2.3518410884972774, "logits_per_char": -0.5389635827806261, "num_chars": 144}, {"sum_logits": -83.64201354980469, "num_tokens": 27, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -91.91142272949219, "logits_per_token": -3.09785235369647, "logits_per_char": -0.5728905037657855, "num_chars": 146}, {"sum_logits": -62.33679962158203, "num_tokens": 34, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -82.37260437011719, "logits_per_token": -1.8334352829877067, "logits_per_char": -0.4299089629074623, "num_chars": 145}, {"sum_logits": -77.52045440673828, "num_tokens": 27, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -94.08165740966797, "logits_per_token": -2.8711279409903065, "logits_per_char": -0.61524170164078, "num_chars": 126}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 123, "native_id": 37283, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 109.59754180908203, "incorrect_loss_raw": 124.72647094726562, "correct_loss_per_char": 0.45101869057235405, "incorrect_loss_per_char": 0.5993091579858526, "correct_loss_per_token": 2.0678781473411703, "incorrect_loss_per_token": 2.796443911685916, "correct_loss_uncond": -40.47699737548828, "incorrect_loss_uncond": -32.27645365397135}, "model_output": [{"sum_logits": -109.59754180908203, "num_tokens": 53, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -150.0745391845703, "logits_per_token": -2.0678781473411703, "logits_per_char": -0.45101869057235405, "num_chars": 243}, {"sum_logits": -138.21363830566406, "num_tokens": 54, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -176.95303344726562, "logits_per_token": -2.5595118204752603, "logits_per_char": -0.5664493373182954, "num_chars": 244}, {"sum_logits": -102.49676513671875, "num_tokens": 33, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -132.33790588378906, "logits_per_token": -3.1059625799005683, "logits_per_char": -0.5959114252134811, "num_chars": 172}, {"sum_logits": -133.46900939941406, "num_tokens": 49, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -161.71783447265625, "logits_per_token": -2.7238573346819197, "logits_per_char": -0.6355667114257812, "num_chars": 210}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 124, "native_id": 21284, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 37.887168884277344, "incorrect_loss_raw": 43.76170984903971, "correct_loss_per_char": 0.4405484753985738, "incorrect_loss_per_char": 0.877045420504421, "correct_loss_per_token": 2.228656993192785, "incorrect_loss_per_token": 4.018124919467502, "correct_loss_uncond": -33.212486267089844, "incorrect_loss_uncond": -13.387446085611979}, "model_output": [{"sum_logits": -56.99421310424805, "num_tokens": 15, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -72.24139404296875, "logits_per_token": -3.7996142069498697, "logits_per_char": -0.850659897078329, "num_chars": 67}, {"sum_logits": -37.887168884277344, "num_tokens": 17, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -71.09965515136719, "logits_per_token": -2.228656993192785, "logits_per_char": -0.4405484753985738, "num_chars": 86}, {"sum_logits": -33.026756286621094, "num_tokens": 8, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -49.12545394897461, "logits_per_token": -4.128344535827637, "logits_per_char": -0.9713751849006204, "num_chars": 34}, {"sum_logits": -41.26416015625, "num_tokens": 10, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -50.08061981201172, "logits_per_token": -4.126416015625, "logits_per_char": -0.8091011795343137, "num_chars": 51}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 125, "native_id": 14530, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 85.79803466796875, "incorrect_loss_raw": 104.9074592590332, "correct_loss_per_char": 0.539610280930621, "incorrect_loss_per_char": 0.6534565467575847, "correct_loss_per_token": 2.5999404444839014, "incorrect_loss_per_token": 2.8231842666277274, "correct_loss_uncond": -14.81817626953125, "incorrect_loss_uncond": -20.268619537353516}, "model_output": [{"sum_logits": -85.79803466796875, "num_tokens": 33, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -100.6162109375, "logits_per_token": -2.5999404444839014, "logits_per_char": -0.539610280930621, "num_chars": 159}, {"sum_logits": -156.54269409179688, "num_tokens": 48, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -178.00289916992188, "logits_per_token": -3.261306126912435, "logits_per_char": -0.708337982315823, "num_chars": 221}, {"sum_logits": -104.42304992675781, "num_tokens": 47, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -124.81150817871094, "logits_per_token": -2.2217670197182513, "logits_per_char": -0.4948959712168617, "num_chars": 211}, {"sum_logits": -53.75663375854492, "num_tokens": 18, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -72.71382904052734, "logits_per_token": -2.9864796532524958, "logits_per_char": -0.7571356867400694, "num_chars": 71}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 126, "native_id": 13309, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 86.54932403564453, "incorrect_loss_raw": 139.34374491373697, "correct_loss_per_char": 0.5512695798448696, "incorrect_loss_per_char": 0.6847141010764409, "correct_loss_per_token": 2.7919136785691783, "incorrect_loss_per_token": 3.4044771731987535, "correct_loss_uncond": -49.781700134277344, "incorrect_loss_uncond": -23.81756591796875}, "model_output": [{"sum_logits": -86.54932403564453, "num_tokens": 31, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -136.33102416992188, "logits_per_token": -2.7919136785691783, "logits_per_char": -0.5512695798448696, "num_chars": 157}, {"sum_logits": -117.54739379882812, "num_tokens": 39, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -146.75811767578125, "logits_per_token": -3.0140357384314904, "logits_per_char": -0.6186704936780427, "num_chars": 190}, {"sum_logits": -148.82704162597656, "num_tokens": 48, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -189.98031616210938, "logits_per_token": -3.100563367207845, "logits_per_char": -0.6201126734415691, "num_chars": 240}, {"sum_logits": -151.65679931640625, "num_tokens": 37, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -152.74549865722656, "logits_per_token": -4.098832413956925, "logits_per_char": -0.815359136109711, "num_chars": 186}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 127, "native_id": 31986, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 85.18997192382812, "incorrect_loss_raw": 126.78436533610027, "correct_loss_per_char": 0.5496127220892137, "incorrect_loss_per_char": 0.5760407700033814, "correct_loss_per_token": 2.662186622619629, "incorrect_loss_per_token": 2.884233314394977, "correct_loss_uncond": -20.4827880859375, "incorrect_loss_uncond": -20.316627502441406}, "model_output": [{"sum_logits": -153.93069458007812, "num_tokens": 49, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -160.84225463867188, "logits_per_token": -3.141442746532207, "logits_per_char": -0.5808705455852005, "num_chars": 265}, {"sum_logits": -102.97616577148438, "num_tokens": 39, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -135.47201538085938, "logits_per_token": -2.6404145069611378, "logits_per_char": -0.5391422291700753, "num_chars": 191}, {"sum_logits": -123.44623565673828, "num_tokens": 43, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -144.98870849609375, "logits_per_token": -2.870842689691588, "logits_per_char": -0.6081095352548683, "num_chars": 203}, {"sum_logits": -85.18997192382812, "num_tokens": 32, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -105.67276000976562, "logits_per_token": -2.662186622619629, "logits_per_char": -0.5496127220892137, "num_chars": 155}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 128, "native_id": 15714, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 27.00457000732422, "incorrect_loss_raw": 38.82962544759115, "correct_loss_per_char": 1.0801828002929688, "incorrect_loss_per_char": 0.7553150203849404, "correct_loss_per_token": 3.8577957153320312, "incorrect_loss_per_token": 3.1878647825608275, "correct_loss_uncond": -15.1173095703125, "incorrect_loss_uncond": -21.99796422322591}, "model_output": [{"sum_logits": -27.00457000732422, "num_tokens": 7, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -42.12187957763672, "logits_per_token": -3.8577957153320312, "logits_per_char": -1.0801828002929688, "num_chars": 25}, {"sum_logits": -36.38096618652344, "num_tokens": 13, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -59.49055862426758, "logits_per_token": -2.798535860501803, "logits_per_char": -0.6382625646758497, "num_chars": 57}, {"sum_logits": -26.512908935546875, "num_tokens": 7, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -45.291847229003906, "logits_per_token": -3.7875584193638394, "logits_per_char": -0.9819595902054398, "num_chars": 27}, {"sum_logits": -53.595001220703125, "num_tokens": 18, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -77.70036315917969, "logits_per_token": -2.97750006781684, "logits_per_char": -0.6457229062735316, "num_chars": 83}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 129, "native_id": 46865, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 62.660369873046875, "incorrect_loss_raw": 107.51064046223958, "correct_loss_per_char": 0.4933887391578494, "incorrect_loss_per_char": 0.6591485527426751, "correct_loss_per_token": 2.506414794921875, "incorrect_loss_per_token": 3.275562563180383, "correct_loss_uncond": -28.123703002929688, "incorrect_loss_uncond": -12.272163391113281}, "model_output": [{"sum_logits": -62.660369873046875, "num_tokens": 25, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -90.78407287597656, "logits_per_token": -2.506414794921875, "logits_per_char": -0.4933887391578494, "num_chars": 127}, {"sum_logits": -148.79547119140625, "num_tokens": 49, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -156.1096649169922, "logits_per_token": -3.0366422692123725, "logits_per_char": -0.5722902738131009, "num_chars": 260}, {"sum_logits": -76.75820922851562, "num_tokens": 24, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -91.42333221435547, "logits_per_token": -3.198258717854818, "logits_per_char": -0.7173664413879965, "num_chars": 107}, {"sum_logits": -96.97824096679688, "num_tokens": 27, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -111.81541442871094, "logits_per_token": -3.5917867024739585, "logits_per_char": -0.6877889430269282, "num_chars": 141}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 130, "native_id": 7360, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 55.853599548339844, "incorrect_loss_raw": 121.78923034667969, "correct_loss_per_char": 0.3385066639293324, "incorrect_loss_per_char": 0.7518581114471775, "correct_loss_per_token": 1.801729017688382, "incorrect_loss_per_token": 3.386003981398831, "correct_loss_uncond": -39.045265197753906, "incorrect_loss_uncond": -16.679128011067707}, "model_output": [{"sum_logits": -55.853599548339844, "num_tokens": 31, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -94.89886474609375, "logits_per_token": -1.801729017688382, "logits_per_char": -0.3385066639293324, "num_chars": 165}, {"sum_logits": -102.73491668701172, "num_tokens": 32, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -118.84088134765625, "logits_per_token": -3.210466146469116, "logits_per_char": -0.6585571582500751, "num_chars": 156}, {"sum_logits": -153.94796752929688, "num_tokens": 37, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -180.36288452148438, "logits_per_token": -4.1607558791701855, "logits_per_char": -0.9932126937373992, "num_chars": 155}, {"sum_logits": -108.68480682373047, "num_tokens": 39, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -116.20130920410156, "logits_per_token": -2.7867899185571914, "logits_per_char": -0.6038044823540581, "num_chars": 180}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 131, "native_id": 17042, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.480987548828125, "incorrect_loss_raw": 19.73759396870931, "correct_loss_per_char": 0.3261918288010817, "incorrect_loss_per_char": 0.5865488541707475, "correct_loss_per_token": 1.696197509765625, "incorrect_loss_per_token": 2.699808191370081, "correct_loss_uncond": -20.054332733154297, "incorrect_loss_uncond": -19.16450309753418}, "model_output": [{"sum_logits": -8.480987548828125, "num_tokens": 5, "num_tokens_all": 394, "is_greedy": false, "sum_logits_uncond": -28.535320281982422, "logits_per_token": -1.696197509765625, "logits_per_char": -0.3261918288010817, "num_chars": 26}, {"sum_logits": -11.138679504394531, "num_tokens": 8, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -35.046234130859375, "logits_per_token": -1.3923349380493164, "logits_per_char": -0.3840923967032597, "num_chars": 29}, {"sum_logits": -23.494693756103516, "num_tokens": 9, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -43.15707015991211, "logits_per_token": -2.610521528455946, "logits_per_char": -0.6526303821139865, "num_chars": 36}, {"sum_logits": -24.579408645629883, "num_tokens": 6, "num_tokens_all": 395, "is_greedy": false, "sum_logits_uncond": -38.502986907958984, "logits_per_token": -4.0965681076049805, "logits_per_char": -0.7229237836949965, "num_chars": 34}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 132, "native_id": 27749, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 94.36502838134766, "incorrect_loss_raw": 83.16700236002605, "correct_loss_per_char": 0.6333223381298501, "incorrect_loss_per_char": 0.6045294089589947, "correct_loss_per_token": 2.9489071369171143, "incorrect_loss_per_token": 2.578469856225383, "correct_loss_uncond": -40.37018585205078, "incorrect_loss_uncond": -30.494155883789062}, "model_output": [{"sum_logits": -94.36502838134766, "num_tokens": 32, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -134.73521423339844, "logits_per_token": -2.9489071369171143, "logits_per_char": -0.6333223381298501, "num_chars": 149}, {"sum_logits": -76.63240051269531, "num_tokens": 29, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -103.7431869506836, "logits_per_token": -2.6424965694032867, "logits_per_char": -0.7030495459880304, "num_chars": 109}, {"sum_logits": -84.0433578491211, "num_tokens": 35, "num_tokens_all": 494, "is_greedy": false, "sum_logits_uncond": -114.2845458984375, "logits_per_token": -2.4012387956891743, "logits_per_char": -0.5062852882477175, "num_chars": 166}, {"sum_logits": -88.82524871826172, "num_tokens": 33, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -122.95574188232422, "logits_per_token": -2.6916742035836885, "logits_per_char": -0.6042533926412362, "num_chars": 147}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 133, "native_id": 487, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 57.69861602783203, "incorrect_loss_raw": 133.16080729166666, "correct_loss_per_char": 0.5106072214852392, "incorrect_loss_per_char": 0.6708714522898465, "correct_loss_per_token": 2.307944641113281, "incorrect_loss_per_token": 2.967350123930942, "correct_loss_uncond": -20.59563446044922, "incorrect_loss_uncond": -13.573158264160156}, "model_output": [{"sum_logits": -57.69861602783203, "num_tokens": 25, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -78.29425048828125, "logits_per_token": -2.307944641113281, "logits_per_char": -0.5106072214852392, "num_chars": 113}, {"sum_logits": -98.62635803222656, "num_tokens": 38, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -115.0932846069336, "logits_per_token": -2.595430474532278, "logits_per_char": -0.5767623276738395, "num_chars": 171}, {"sum_logits": -189.1552276611328, "num_tokens": 51, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -191.37432861328125, "logits_per_token": -3.7089260325712314, "logits_per_char": -0.8118250114211709, "num_chars": 233}, {"sum_logits": -111.70083618164062, "num_tokens": 43, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -133.73428344726562, "logits_per_token": -2.597693864689317, "logits_per_char": -0.6240270177745286, "num_chars": 179}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 134, "native_id": 25362, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 65.37356567382812, "incorrect_loss_raw": 68.86283111572266, "correct_loss_per_char": 0.6739542852971971, "incorrect_loss_per_char": 0.691724559119598, "correct_loss_per_token": 3.8455038631663605, "incorrect_loss_per_token": 3.1907816534308817, "correct_loss_uncond": -23.24755096435547, "incorrect_loss_uncond": -33.49791971842448}, "model_output": [{"sum_logits": -47.71411895751953, "num_tokens": 18, "num_tokens_all": 427, "is_greedy": false, "sum_logits_uncond": -84.34789276123047, "logits_per_token": -2.650784386528863, "logits_per_char": -0.589063197006414, "num_chars": 81}, {"sum_logits": -65.37356567382812, "num_tokens": 17, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -88.6211166381836, "logits_per_token": -3.8455038631663605, "logits_per_char": -0.6739542852971971, "num_chars": 97}, {"sum_logits": -66.5184326171875, "num_tokens": 19, "num_tokens_all": 428, "is_greedy": false, "sum_logits_uncond": -96.07403564453125, "logits_per_token": -3.5009701377467106, "logits_per_char": -0.7645796852550287, "num_chars": 87}, {"sum_logits": -92.35594177246094, "num_tokens": 27, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -126.66032409667969, "logits_per_token": -3.4205904360170716, "logits_per_char": -0.7215307950973511, "num_chars": 128}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 135, "native_id": 24073, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 107.29717254638672, "incorrect_loss_raw": 86.07234446207683, "correct_loss_per_char": 0.5928020582673299, "incorrect_loss_per_char": 0.6598002819530125, "correct_loss_per_token": 2.3843816121419272, "incorrect_loss_per_token": 3.018731139114084, "correct_loss_uncond": -37.62177276611328, "incorrect_loss_uncond": -16.50432586669922}, "model_output": [{"sum_logits": -105.49545288085938, "num_tokens": 30, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -116.86678314208984, "logits_per_token": -3.516515096028646, "logits_per_char": -0.7931988938410479, "num_chars": 133}, {"sum_logits": -88.20744323730469, "num_tokens": 28, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -117.0983657836914, "logits_per_token": -3.1502658299037387, "logits_per_char": -0.6485841414507698, "num_chars": 136}, {"sum_logits": -107.29717254638672, "num_tokens": 45, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -144.9189453125, "logits_per_token": -2.3843816121419272, "logits_per_char": -0.5928020582673299, "num_chars": 181}, {"sum_logits": -64.5141372680664, "num_tokens": 27, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -73.76486206054688, "logits_per_token": -2.389412491409867, "logits_per_char": -0.53761781056722, "num_chars": 120}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 136, "native_id": 49263, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.39095687866211, "incorrect_loss_raw": 19.36374282836914, "correct_loss_per_char": 0.6376646132696242, "incorrect_loss_per_char": 0.5197535947589107, "correct_loss_per_token": 2.678191375732422, "incorrect_loss_per_token": 2.704120116763645, "correct_loss_uncond": -15.306381225585938, "incorrect_loss_uncond": -23.486948649088543}, "model_output": [{"sum_logits": -17.263465881347656, "num_tokens": 5, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -31.500049591064453, "logits_per_token": -3.4526931762695314, "logits_per_char": -0.6639794569749099, "num_chars": 26}, {"sum_logits": -25.739519119262695, "num_tokens": 12, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -59.87697219848633, "logits_per_token": -2.1449599266052246, "logits_per_char": -0.4085637955438523, "num_chars": 63}, {"sum_logits": -13.39095687866211, "num_tokens": 5, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -28.697338104248047, "logits_per_token": -2.678191375732422, "logits_per_char": -0.6376646132696242, "num_chars": 21}, {"sum_logits": -15.08824348449707, "num_tokens": 6, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -37.175052642822266, "logits_per_token": -2.5147072474161782, "logits_per_char": -0.48671753175797, "num_chars": 31}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 137, "native_id": 31828, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 130.83810424804688, "incorrect_loss_raw": 94.7534065246582, "correct_loss_per_char": 0.7350455294834094, "incorrect_loss_per_char": 0.6303580483448864, "correct_loss_per_token": 3.1151929582868303, "incorrect_loss_per_token": 2.920996316274007, "correct_loss_uncond": -6.5081787109375, "incorrect_loss_uncond": -10.91054662068685}, "model_output": [{"sum_logits": -55.87819290161133, "num_tokens": 24, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -69.07483673095703, "logits_per_token": -2.3282580375671387, "logits_per_char": -0.5587819290161132, "num_chars": 100}, {"sum_logits": -130.83810424804688, "num_tokens": 42, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -137.34628295898438, "logits_per_token": -3.1151929582868303, "logits_per_char": -0.7350455294834094, "num_chars": 178}, {"sum_logits": -170.8927001953125, "num_tokens": 48, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -180.66851806640625, "logits_per_token": -3.5602645874023438, "logits_per_char": -0.7334450652159334, "num_chars": 233}, {"sum_logits": -57.48932647705078, "num_tokens": 20, "num_tokens_all": 432, "is_greedy": false, "sum_logits_uncond": -67.24850463867188, "logits_per_token": -2.874466323852539, "logits_per_char": -0.5988471508026123, "num_chars": 96}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 138, "native_id": 36523, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 55.256500244140625, "incorrect_loss_raw": 76.8587137858073, "correct_loss_per_char": 0.313957387750799, "incorrect_loss_per_char": 0.4863014897355605, "correct_loss_per_token": 1.5787571498325892, "incorrect_loss_per_token": 2.6322819318639517, "correct_loss_uncond": -14.621810913085938, "incorrect_loss_uncond": -12.601064046223959}, "model_output": [{"sum_logits": -119.29962158203125, "num_tokens": 31, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -131.92117309570312, "logits_per_token": -3.8483748897429435, "logits_per_char": -0.7409914383977096, "num_chars": 161}, {"sum_logits": -55.098548889160156, "num_tokens": 28, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -64.09077453613281, "logits_per_token": -1.9678053174700056, "logits_per_char": -0.3554745089623236, "num_chars": 155}, {"sum_logits": -56.17797088623047, "num_tokens": 27, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -72.36738586425781, "logits_per_token": -2.0806655883789062, "logits_per_char": -0.3624385218466482, "num_chars": 155}, {"sum_logits": -55.256500244140625, "num_tokens": 35, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -69.87831115722656, "logits_per_token": -1.5787571498325892, "logits_per_char": -0.313957387750799, "num_chars": 176}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 139, "native_id": 4867, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 84.46128845214844, "incorrect_loss_raw": 111.70054117838542, "correct_loss_per_char": 0.5414185157189002, "incorrect_loss_per_char": 0.6331178892330128, "correct_loss_per_token": 2.2827375257337414, "incorrect_loss_per_token": 2.6073722082471087, "correct_loss_uncond": -31.21318817138672, "incorrect_loss_uncond": -18.866434733072918}, "model_output": [{"sum_logits": -84.46128845214844, "num_tokens": 37, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -115.67447662353516, "logits_per_token": -2.2827375257337414, "logits_per_char": -0.5414185157189002, "num_chars": 156}, {"sum_logits": -144.48985290527344, "num_tokens": 54, "num_tokens_all": 500, "is_greedy": false, "sum_logits_uncond": -157.32101440429688, "logits_per_token": -2.6757380167643228, "logits_per_char": -0.6597710178322989, "num_chars": 219}, {"sum_logits": -111.63160705566406, "num_tokens": 48, "num_tokens_all": 494, "is_greedy": false, "sum_logits_uncond": -136.1306610107422, "logits_per_token": -2.3256584803263345, "logits_per_char": -0.5814146200815836, "num_chars": 192}, {"sum_logits": -78.98016357421875, "num_tokens": 28, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -98.24925231933594, "logits_per_token": -2.8207201276506697, "logits_per_char": -0.6581680297851562, "num_chars": 120}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 140, "native_id": 19921, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 164.3249969482422, "incorrect_loss_raw": 182.4765828450521, "correct_loss_per_char": 0.5889784836854559, "incorrect_loss_per_char": 0.8912666198108289, "correct_loss_per_token": 2.8331896025559, "incorrect_loss_per_token": 3.999297389502324, "correct_loss_uncond": -36.95542907714844, "incorrect_loss_uncond": -10.733881632486979}, "model_output": [{"sum_logits": -179.3250732421875, "num_tokens": 51, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -178.6429901123047, "logits_per_token": -3.516177906709559, "logits_per_char": -0.804148310503083, "num_chars": 223}, {"sum_logits": -164.3249969482422, "num_tokens": 58, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -201.28042602539062, "logits_per_token": -2.8331896025559, "logits_per_char": -0.5889784836854559, "num_chars": 279}, {"sum_logits": -197.73883056640625, "num_tokens": 51, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -215.65512084960938, "logits_per_token": -3.8772319718903185, "logits_per_char": -0.8308354225479254, "num_chars": 238}, {"sum_logits": -170.3658447265625, "num_tokens": 37, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -185.33328247070312, "logits_per_token": -4.604482289907095, "logits_per_char": -1.0388161263814786, "num_chars": 164}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 141, "native_id": 954, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 53.15077209472656, "incorrect_loss_raw": 88.47593180338542, "correct_loss_per_char": 0.43926257929526086, "incorrect_loss_per_char": 0.5560683509190267, "correct_loss_per_token": 1.9685471146195024, "incorrect_loss_per_token": 2.4550348563654225, "correct_loss_uncond": -8.393020629882812, "incorrect_loss_uncond": -18.46898905436198}, "model_output": [{"sum_logits": -103.9070816040039, "num_tokens": 35, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -133.132568359375, "logits_per_token": -2.9687737601143973, "logits_per_char": -0.6973629637852611, "num_chars": 149}, {"sum_logits": -53.15077209472656, "num_tokens": 27, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -61.543792724609375, "logits_per_token": -1.9685471146195024, "logits_per_char": -0.43926257929526086, "num_chars": 121}, {"sum_logits": -86.94164276123047, "num_tokens": 35, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -97.825439453125, "logits_per_token": -2.4840469360351562, "logits_per_char": -0.5269190470377604, "num_chars": 165}, {"sum_logits": -74.57907104492188, "num_tokens": 39, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -89.87675476074219, "logits_per_token": -1.9122838729467146, "logits_per_char": -0.4439230419340588, "num_chars": 168}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 142, "native_id": 27094, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 96.21644592285156, "incorrect_loss_raw": 78.11516189575195, "correct_loss_per_char": 0.7127144142433449, "incorrect_loss_per_char": 0.5466434402320678, "correct_loss_per_token": 3.103756320091986, "incorrect_loss_per_token": 2.299594122102362, "correct_loss_uncond": -18.86163330078125, "incorrect_loss_uncond": -21.219974517822266}, "model_output": [{"sum_logits": -96.21644592285156, "num_tokens": 31, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -115.07807922363281, "logits_per_token": -3.103756320091986, "logits_per_char": -0.7127144142433449, "num_chars": 135}, {"sum_logits": -98.0067138671875, "num_tokens": 37, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -124.63253021240234, "logits_per_token": -2.648830104518581, "logits_per_char": -0.5868665501029192, "num_chars": 167}, {"sum_logits": -74.45734405517578, "num_tokens": 30, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -93.12614440917969, "logits_per_token": -2.481911468505859, "logits_per_char": -0.6589145491608476, "num_chars": 113}, {"sum_logits": -61.88142776489258, "num_tokens": 35, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -80.24673461914062, "logits_per_token": -1.7680407932826452, "logits_per_char": -0.3941492214324368, "num_chars": 157}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 143, "native_id": 43910, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 46.14438247680664, "incorrect_loss_raw": 24.83229637145996, "correct_loss_per_char": 0.8545256014223452, "incorrect_loss_per_char": 0.9075389677039093, "correct_loss_per_token": 3.8453652064005532, "incorrect_loss_per_token": 3.4260821942929867, "correct_loss_uncond": -27.56435775756836, "incorrect_loss_uncond": -12.944554646809896}, "model_output": [{"sum_logits": -17.883352279663086, "num_tokens": 5, "num_tokens_all": 391, "is_greedy": false, "sum_logits_uncond": -31.944292068481445, "logits_per_token": -3.576670455932617, "logits_per_char": -0.8941676139831543, "num_chars": 20}, {"sum_logits": -46.14438247680664, "num_tokens": 12, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -73.708740234375, "logits_per_token": -3.8453652064005532, "logits_per_char": -0.8545256014223452, "num_chars": 54}, {"sum_logits": -29.605186462402344, "num_tokens": 8, "num_tokens_all": 394, "is_greedy": false, "sum_logits_uncond": -39.68640899658203, "logits_per_token": -3.700648307800293, "logits_per_char": -0.8971268624970408, "num_chars": 33}, {"sum_logits": -27.008350372314453, "num_tokens": 9, "num_tokens_all": 395, "is_greedy": false, "sum_logits_uncond": -41.699851989746094, "logits_per_token": -3.0009278191460504, "logits_per_char": -0.9313224266315329, "num_chars": 29}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 144, "native_id": 46822, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 66.77356719970703, "incorrect_loss_raw": 63.61465199788412, "correct_loss_per_char": 0.6813629306092555, "incorrect_loss_per_char": 0.5203717265725017, "correct_loss_per_token": 2.473095081470631, "incorrect_loss_per_token": 2.1067963648935684, "correct_loss_uncond": -26.838623046875, "incorrect_loss_uncond": -25.11974843343099}, "model_output": [{"sum_logits": -56.959293365478516, "num_tokens": 30, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -80.48371124267578, "logits_per_token": -1.8986431121826173, "logits_per_char": -0.4484983729565237, "num_chars": 127}, {"sum_logits": -66.77356719970703, "num_tokens": 27, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -93.61219024658203, "logits_per_token": -2.473095081470631, "logits_per_char": -0.6813629306092555, "num_chars": 98}, {"sum_logits": -63.42665481567383, "num_tokens": 35, "num_tokens_all": 494, "is_greedy": false, "sum_logits_uncond": -83.71460723876953, "logits_per_token": -1.8121901375906808, "logits_per_char": -0.42855847848428263, "num_chars": 148}, {"sum_logits": -70.4580078125, "num_tokens": 27, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -102.0048828125, "logits_per_token": -2.6095558449074074, "logits_per_char": -0.684058328276699, "num_chars": 103}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 145, "native_id": 685, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 67.70626068115234, "incorrect_loss_raw": 96.6731465657552, "correct_loss_per_char": 0.40301345643543063, "incorrect_loss_per_char": 0.5605821671930314, "correct_loss_per_token": 1.934464590890067, "incorrect_loss_per_token": 2.791381399574501, "correct_loss_uncond": -27.76081085205078, "incorrect_loss_uncond": -14.848602294921875}, "model_output": [{"sum_logits": -113.88945007324219, "num_tokens": 34, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -120.20347595214844, "logits_per_token": -3.349689708036535, "logits_per_char": -0.6434432207527807, "num_chars": 177}, {"sum_logits": -67.70626068115234, "num_tokens": 35, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -95.46707153320312, "logits_per_token": -1.934464590890067, "logits_per_char": -0.40301345643543063, "num_chars": 168}, {"sum_logits": -74.52023315429688, "num_tokens": 28, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -95.28275299072266, "logits_per_token": -2.6614368983677457, "logits_per_char": -0.5035150888803843, "num_chars": 148}, {"sum_logits": -101.60975646972656, "num_tokens": 43, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -119.07901763916016, "logits_per_token": -2.3630175923192223, "logits_per_char": -0.5347881919459293, "num_chars": 190}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 146, "native_id": 12843, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 83.58379364013672, "incorrect_loss_raw": 106.29048665364583, "correct_loss_per_char": 0.5127840100621884, "incorrect_loss_per_char": 0.4877564049587048, "correct_loss_per_token": 1.99009032476516, "incorrect_loss_per_token": 2.2129057363224285, "correct_loss_uncond": -18.88836669921875, "incorrect_loss_uncond": -30.41766357421875}, "model_output": [{"sum_logits": -83.58379364013672, "num_tokens": 42, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -102.47216033935547, "logits_per_token": -1.99009032476516, "logits_per_char": -0.5127840100621884, "num_chars": 163}, {"sum_logits": -64.71878814697266, "num_tokens": 31, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -86.62445068359375, "logits_per_token": -2.0877028434507308, "logits_per_char": -0.46227705819266185, "num_chars": 140}, {"sum_logits": -132.3831787109375, "num_tokens": 54, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -169.61862182617188, "logits_per_token": -2.4515403464988426, "logits_per_char": -0.525330074249752, "num_chars": 252}, {"sum_logits": -121.76949310302734, "num_tokens": 58, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -153.88137817382812, "logits_per_token": -2.099474019017713, "logits_per_char": -0.47566208243370056, "num_chars": 256}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 147, "native_id": 14253, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 17.537250518798828, "incorrect_loss_raw": 51.152139027913414, "correct_loss_per_char": 0.4739797437513197, "incorrect_loss_per_char": 0.6676781601506728, "correct_loss_per_token": 2.922875086466471, "incorrect_loss_per_token": 3.0088442060682508, "correct_loss_uncond": -24.063007354736328, "incorrect_loss_uncond": -31.37789789835612}, "model_output": [{"sum_logits": -37.840492248535156, "num_tokens": 12, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -75.56105041503906, "logits_per_token": -3.153374354044596, "logits_per_char": -0.7007498564543547, "num_chars": 54}, {"sum_logits": -25.962352752685547, "num_tokens": 9, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -60.07780456542969, "logits_per_token": -2.8847058614095054, "logits_per_char": -0.6332281159191597, "num_chars": 41}, {"sum_logits": -17.537250518798828, "num_tokens": 6, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -41.600257873535156, "logits_per_token": -2.922875086466471, "logits_per_char": -0.4739797437513197, "num_chars": 37}, {"sum_logits": -89.65357208251953, "num_tokens": 30, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -111.95125579833984, "logits_per_token": -2.988452402750651, "logits_per_char": -0.6690565080785039, "num_chars": 134}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 148, "native_id": 18876, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 83.90454864501953, "incorrect_loss_raw": 155.50259399414062, "correct_loss_per_char": 0.5993182046072824, "incorrect_loss_per_char": 0.6577557988312847, "correct_loss_per_token": 2.8932602981041216, "incorrect_loss_per_token": 3.1333077920186656, "correct_loss_uncond": -24.425071716308594, "incorrect_loss_uncond": -24.738739013671875}, "model_output": [{"sum_logits": -143.09243774414062, "num_tokens": 50, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -172.65077209472656, "logits_per_token": -2.8618487548828124, "logits_per_char": -0.6624649895562066, "num_chars": 216}, {"sum_logits": -156.7064208984375, "num_tokens": 57, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -171.60865783691406, "logits_per_token": -2.7492354543585527, "logits_per_char": -0.6073892282885175, "num_chars": 258}, {"sum_logits": -83.90454864501953, "num_tokens": 29, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -108.32962036132812, "logits_per_token": -2.8932602981041216, "logits_per_char": -0.5993182046072824, "num_chars": 140}, {"sum_logits": -166.70892333984375, "num_tokens": 44, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -196.46456909179688, "logits_per_token": -3.788839166814631, "logits_per_char": -0.7034131786491298, "num_chars": 237}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 149, "native_id": 40853, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 70.57005310058594, "incorrect_loss_raw": 105.98818969726562, "correct_loss_per_char": 0.3582236197999286, "incorrect_loss_per_char": 0.5094904815725251, "correct_loss_per_token": 1.8094885410406651, "incorrect_loss_per_token": 2.670046874320961, "correct_loss_uncond": -29.06634521484375, "incorrect_loss_uncond": -18.98511250813802}, "model_output": [{"sum_logits": -102.84123229980469, "num_tokens": 41, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -118.6944808959961, "logits_per_token": -2.5083227390196265, "logits_per_char": -0.5142061614990234, "num_chars": 200}, {"sum_logits": -70.57005310058594, "num_tokens": 39, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -99.63639831542969, "logits_per_token": -1.8094885410406651, "logits_per_char": -0.3582236197999286, "num_chars": 197}, {"sum_logits": -94.03819274902344, "num_tokens": 38, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -112.31584930419922, "logits_per_token": -2.4746892828690377, "logits_per_char": -0.4923465588954107, "num_chars": 191}, {"sum_logits": -121.08514404296875, "num_tokens": 40, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -143.90957641601562, "logits_per_token": -3.0271286010742187, "logits_per_char": -0.5219187243231411, "num_chars": 232}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 150, "native_id": 38450, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 38.613975524902344, "incorrect_loss_raw": 59.779126485188804, "correct_loss_per_char": 0.4826746940612793, "incorrect_loss_per_char": 0.6874097553328632, "correct_loss_per_token": 2.1452208624945746, "incorrect_loss_per_token": 3.2123735074643736, "correct_loss_uncond": -22.92279815673828, "incorrect_loss_uncond": -23.86852773030599}, "model_output": [{"sum_logits": -38.613975524902344, "num_tokens": 18, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -61.536773681640625, "logits_per_token": -2.1452208624945746, "logits_per_char": -0.4826746940612793, "num_chars": 80}, {"sum_logits": -54.672454833984375, "num_tokens": 18, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -80.17559814453125, "logits_per_token": -3.037358601888021, "logits_per_char": -0.7009289081280048, "num_chars": 78}, {"sum_logits": -58.692100524902344, "num_tokens": 20, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -83.16654968261719, "logits_per_token": -2.934605026245117, "logits_per_char": -0.6594618036505882, "num_chars": 89}, {"sum_logits": -65.97282409667969, "num_tokens": 18, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -87.60081481933594, "logits_per_token": -3.6651568942599826, "logits_per_char": -0.7018385542199966, "num_chars": 94}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 151, "native_id": 36389, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 90.53761291503906, "incorrect_loss_raw": 106.41714223225911, "correct_loss_per_char": 0.43950297531572363, "incorrect_loss_per_char": 0.7672463407883511, "correct_loss_per_token": 2.0576730207963423, "incorrect_loss_per_token": 3.5403645997040507, "correct_loss_uncond": -25.26165008544922, "incorrect_loss_uncond": -22.160799662272137}, "model_output": [{"sum_logits": -90.53761291503906, "num_tokens": 44, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -115.79926300048828, "logits_per_token": -2.0576730207963423, "logits_per_char": -0.43950297531572363, "num_chars": 206}, {"sum_logits": -104.91156005859375, "num_tokens": 31, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -125.34596252441406, "logits_per_token": -3.384243872857863, "logits_per_char": -0.6902076319644326, "num_chars": 152}, {"sum_logits": -97.60234069824219, "num_tokens": 23, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -112.53050231933594, "logits_per_token": -4.243580030358356, "logits_per_char": -0.9207767990400206, "num_chars": 106}, {"sum_logits": -116.7375259399414, "num_tokens": 39, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -147.85736083984375, "logits_per_token": -2.9932698958959336, "logits_per_char": -0.6907545913606, "num_chars": 169}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 152, "native_id": 42532, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 54.39739990234375, "incorrect_loss_raw": 56.20063018798828, "correct_loss_per_char": 0.3725849308379709, "incorrect_loss_per_char": 0.46712682187497556, "correct_loss_per_token": 1.6484060576467803, "incorrect_loss_per_token": 2.0099663601560778, "correct_loss_uncond": -38.34785461425781, "incorrect_loss_uncond": -26.67608133951823}, "model_output": [{"sum_logits": -54.39739990234375, "num_tokens": 33, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -92.74525451660156, "logits_per_token": -1.6484060576467803, "logits_per_char": -0.3725849308379709, "num_chars": 146}, {"sum_logits": -41.95686340332031, "num_tokens": 27, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -70.98466491699219, "logits_per_token": -1.5539579038266782, "logits_per_char": -0.3496405283610026, "num_chars": 120}, {"sum_logits": -45.019371032714844, "num_tokens": 21, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -67.15135192871094, "logits_per_token": -2.1437795729864213, "logits_per_char": -0.5002152336968316, "num_chars": 90}, {"sum_logits": -81.62565612792969, "num_tokens": 35, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -110.4941177368164, "logits_per_token": -2.332161603655134, "logits_per_char": -0.5515247035670925, "num_chars": 148}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 153, "native_id": 33298, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 98.7406005859375, "incorrect_loss_raw": 92.68458811442058, "correct_loss_per_char": 0.5707549166817196, "incorrect_loss_per_char": 0.6513109408539335, "correct_loss_per_token": 2.3509666806175593, "incorrect_loss_per_token": 3.0624333257458685, "correct_loss_uncond": -21.484909057617188, "incorrect_loss_uncond": -16.146408081054688}, "model_output": [{"sum_logits": -94.59394073486328, "num_tokens": 36, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -114.56278228759766, "logits_per_token": -2.627609464857313, "logits_per_char": -0.5875400045643682, "num_chars": 161}, {"sum_logits": -100.5823974609375, "num_tokens": 31, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -116.79080963134766, "logits_per_token": -3.244593466481855, "logits_per_char": -0.7033734088177448, "num_chars": 143}, {"sum_logits": -82.87742614746094, "num_tokens": 25, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -95.13939666748047, "logits_per_token": -3.3150970458984377, "logits_per_char": -0.6630194091796875, "num_chars": 125}, {"sum_logits": -98.7406005859375, "num_tokens": 42, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -120.22550964355469, "logits_per_token": -2.3509666806175593, "logits_per_char": -0.5707549166817196, "num_chars": 173}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 154, "native_id": 16709, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 57.819244384765625, "incorrect_loss_raw": 104.0902328491211, "correct_loss_per_char": 0.45171284675598145, "incorrect_loss_per_char": 0.6649663590978303, "correct_loss_per_token": 1.9937670477505387, "incorrect_loss_per_token": 3.117392073819076, "correct_loss_uncond": -18.00128173828125, "incorrect_loss_uncond": -19.195348103841145}, "model_output": [{"sum_logits": -57.819244384765625, "num_tokens": 29, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -75.82052612304688, "logits_per_token": -1.9937670477505387, "logits_per_char": -0.45171284675598145, "num_chars": 128}, {"sum_logits": -117.62242126464844, "num_tokens": 35, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -136.99710083007812, "logits_per_token": -3.360640607561384, "logits_per_char": -0.7687739951937806, "num_chars": 153}, {"sum_logits": -110.52837371826172, "num_tokens": 37, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -130.9226837158203, "logits_per_token": -2.9872533437368034, "logits_per_char": -0.6209459197655153, "num_chars": 178}, {"sum_logits": -84.11990356445312, "num_tokens": 28, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -101.93695831298828, "logits_per_token": -3.0042822701590404, "logits_per_char": -0.6051791623341951, "num_chars": 139}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 155, "native_id": 34962, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 17.72991371154785, "incorrect_loss_raw": 33.73481686909994, "correct_loss_per_char": 0.5214680503396427, "incorrect_loss_per_char": 0.6332975722865601, "correct_loss_per_token": 1.9699904123942058, "incorrect_loss_per_token": 2.7262307680570164, "correct_loss_uncond": -23.7117977142334, "incorrect_loss_uncond": -23.4011656443278}, "model_output": [{"sum_logits": -39.69735336303711, "num_tokens": 13, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -64.34429931640625, "logits_per_token": -3.05364256638747, "logits_per_char": -0.6202711462974548, "num_chars": 64}, {"sum_logits": -17.72991371154785, "num_tokens": 9, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -41.44171142578125, "logits_per_token": -1.9699904123942058, "logits_per_char": -0.5214680503396427, "num_chars": 34}, {"sum_logits": -28.152021408081055, "num_tokens": 11, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -48.73488998413086, "logits_per_token": -2.559274673461914, "logits_per_char": -0.6256004757351346, "num_chars": 45}, {"sum_logits": -33.35507583618164, "num_tokens": 13, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -58.328758239746094, "logits_per_token": -2.565775064321665, "logits_per_char": -0.654021094827091, "num_chars": 51}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 156, "native_id": 32821, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 17.210952758789062, "incorrect_loss_raw": 35.51965840657552, "correct_loss_per_char": 0.44130648099459135, "incorrect_loss_per_char": 0.9760525210873111, "correct_loss_per_token": 1.9123280843098958, "incorrect_loss_per_token": 4.637092022668748, "correct_loss_uncond": -34.061946868896484, "incorrect_loss_uncond": -10.544339497884115}, "model_output": [{"sum_logits": -41.29045867919922, "num_tokens": 8, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -47.23888397216797, "logits_per_token": -5.161307334899902, "logits_per_char": -1.0587297097230568, "num_chars": 39}, {"sum_logits": -17.210952758789062, "num_tokens": 9, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -51.27289962768555, "logits_per_token": -1.9123280843098958, "logits_per_char": -0.44130648099459135, "num_chars": 39}, {"sum_logits": -32.14988327026367, "num_tokens": 8, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -49.45709991455078, "logits_per_token": -4.018735408782959, "logits_per_char": -0.765473411196754, "num_chars": 42}, {"sum_logits": -33.11863327026367, "num_tokens": 7, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -41.496009826660156, "logits_per_token": -4.731233324323382, "logits_per_char": -1.1039544423421224, "num_chars": 30}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 157, "native_id": 10827, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 40.11520004272461, "incorrect_loss_raw": 52.53871409098307, "correct_loss_per_char": 0.39718019844281793, "incorrect_loss_per_char": 0.6564541706668879, "correct_loss_per_token": 1.6714666684468586, "incorrect_loss_per_token": 3.0962934347299425, "correct_loss_uncond": -41.67412185668945, "incorrect_loss_uncond": -17.013647715250652}, "model_output": [{"sum_logits": -41.84794235229492, "num_tokens": 13, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -56.8368034362793, "logits_per_token": -3.219072488638071, "logits_per_char": -0.6245961545118645, "num_chars": 67}, {"sum_logits": -50.651607513427734, "num_tokens": 18, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -68.73741149902344, "logits_per_token": -2.8139781951904297, "logits_per_char": -0.6664685199135228, "num_chars": 76}, {"sum_logits": -65.11659240722656, "num_tokens": 20, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -83.08287048339844, "logits_per_token": -3.255829620361328, "logits_per_char": -0.6782978375752767, "num_chars": 96}, {"sum_logits": -40.11520004272461, "num_tokens": 24, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -81.78932189941406, "logits_per_token": -1.6714666684468586, "logits_per_char": -0.39718019844281793, "num_chars": 101}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 158, "native_id": 41237, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 41.05365753173828, "incorrect_loss_raw": 36.75962702433268, "correct_loss_per_char": 0.5701896879408095, "incorrect_loss_per_char": 0.5969138910636512, "correct_loss_per_token": 2.4149210312787224, "incorrect_loss_per_token": 2.4720042988106057, "correct_loss_uncond": -25.056541442871094, "incorrect_loss_uncond": -22.38528060913086}, "model_output": [{"sum_logits": -42.424625396728516, "num_tokens": 16, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -61.163230895996094, "logits_per_token": -2.6515390872955322, "logits_per_char": -0.614849643430848, "num_chars": 69}, {"sum_logits": -35.81254577636719, "num_tokens": 12, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -52.17076873779297, "logits_per_token": -2.9843788146972656, "logits_per_char": -0.7308682811503507, "num_chars": 49}, {"sum_logits": -41.05365753173828, "num_tokens": 17, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -66.11019897460938, "logits_per_token": -2.4149210312787224, "logits_per_char": -0.5701896879408095, "num_chars": 72}, {"sum_logits": -32.041709899902344, "num_tokens": 18, "num_tokens_all": 427, "is_greedy": false, "sum_logits_uncond": -64.10072326660156, "logits_per_token": -1.7800949944390192, "logits_per_char": -0.4450237486097548, "num_chars": 72}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 159, "native_id": 36590, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 39.806053161621094, "incorrect_loss_raw": 101.20645268758138, "correct_loss_per_char": 0.6124008178710938, "incorrect_loss_per_char": 0.8100666862332861, "correct_loss_per_token": 3.0620040893554688, "incorrect_loss_per_token": 3.704309986508082, "correct_loss_uncond": -28.97661590576172, "incorrect_loss_uncond": -24.418895721435547}, "model_output": [{"sum_logits": -168.9583740234375, "num_tokens": 45, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -192.02337646484375, "logits_per_token": -3.754630533854167, "logits_per_char": -0.9182520327360734, "num_chars": 184}, {"sum_logits": -63.56257247924805, "num_tokens": 16, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -87.23284149169922, "logits_per_token": -3.972660779953003, "logits_per_char": -0.814904775374975, "num_chars": 78}, {"sum_logits": -39.806053161621094, "num_tokens": 13, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -68.78266906738281, "logits_per_token": -3.0620040893554688, "logits_per_char": -0.6124008178710938, "num_chars": 65}, {"sum_logits": -71.0984115600586, "num_tokens": 21, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -97.61982727050781, "logits_per_token": -3.385638645717076, "logits_per_char": -0.6970432505888098, "num_chars": 102}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 160, "native_id": 42247, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 52.95802307128906, "incorrect_loss_raw": 121.53940836588542, "correct_loss_per_char": 0.3652277453192349, "incorrect_loss_per_char": 0.7222070433788081, "correct_loss_per_token": 1.8261387265961746, "incorrect_loss_per_token": 2.916018227205413, "correct_loss_uncond": -25.21575164794922, "incorrect_loss_uncond": -15.250930786132812}, "model_output": [{"sum_logits": -170.3013916015625, "num_tokens": 42, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -180.35960388183594, "logits_per_token": -4.054795038132441, "logits_per_char": -1.0077005420210798, "num_chars": 169}, {"sum_logits": -52.95802307128906, "num_tokens": 29, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -78.17377471923828, "logits_per_token": -1.8261387265961746, "logits_per_char": -0.3652277453192349, "num_chars": 145}, {"sum_logits": -114.80293273925781, "num_tokens": 41, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -128.4149932861328, "logits_per_token": -2.8000715302258, "logits_per_char": -0.7220310235173447, "num_chars": 159}, {"sum_logits": -79.51390075683594, "num_tokens": 42, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -101.59642028808594, "logits_per_token": -1.8931881132579984, "logits_per_char": -0.43688956459799966, "num_chars": 182}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 161, "native_id": 16289, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 13.876638412475586, "incorrect_loss_raw": 23.24080467224121, "correct_loss_per_char": 0.47850477284398574, "incorrect_loss_per_char": 0.6638564314161028, "correct_loss_per_token": 1.9823769160679408, "incorrect_loss_per_token": 2.570256785511569, "correct_loss_uncond": -12.321929931640625, "incorrect_loss_uncond": -20.753820419311523}, "model_output": [{"sum_logits": -26.40375518798828, "num_tokens": 8, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -39.36414337158203, "logits_per_token": -3.300469398498535, "logits_per_char": -0.8251173496246338, "num_chars": 32}, {"sum_logits": -23.375932693481445, "num_tokens": 9, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -48.51266098022461, "logits_per_token": -2.5973258548312717, "logits_per_char": -0.667883791242327, "num_chars": 35}, {"sum_logits": -19.942726135253906, "num_tokens": 11, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -44.10707092285156, "logits_per_token": -1.8129751032049006, "logits_per_char": -0.49856815338134763, "num_chars": 40}, {"sum_logits": -13.876638412475586, "num_tokens": 7, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -26.19856834411621, "logits_per_token": -1.9823769160679408, "logits_per_char": -0.47850477284398574, "num_chars": 29}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 162, "native_id": 15546, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 49.09811782836914, "incorrect_loss_raw": 84.07593282063802, "correct_loss_per_char": 0.4959405841249408, "incorrect_loss_per_char": 0.6472709898081416, "correct_loss_per_token": 1.9639247131347657, "incorrect_loss_per_token": 2.7423398296649637, "correct_loss_uncond": -23.344463348388672, "incorrect_loss_uncond": -16.50299580891927}, "model_output": [{"sum_logits": -81.17408752441406, "num_tokens": 30, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -94.25782775878906, "logits_per_token": -2.7058029174804688, "logits_per_char": -0.6057767725702542, "num_chars": 134}, {"sum_logits": -91.99346923828125, "num_tokens": 39, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -110.39356231689453, "logits_per_token": -2.358806903545673, "logits_per_char": -0.5609357880383004, "num_chars": 164}, {"sum_logits": -49.09811782836914, "num_tokens": 25, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -72.44258117675781, "logits_per_token": -1.9639247131347657, "logits_per_char": -0.4959405841249408, "num_chars": 99}, {"sum_logits": -79.06024169921875, "num_tokens": 25, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -97.08539581298828, "logits_per_token": -3.16240966796875, "logits_per_char": -0.7751004088158701, "num_chars": 102}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 163, "native_id": 42849, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 23.614778518676758, "incorrect_loss_raw": 26.320897102355957, "correct_loss_per_char": 0.3524593808757725, "incorrect_loss_per_char": 0.7009896655771185, "correct_loss_per_token": 1.574318567911784, "incorrect_loss_per_token": 3.273980837019663, "correct_loss_uncond": -38.10089683532715, "incorrect_loss_uncond": -13.079736391703287}, "model_output": [{"sum_logits": -45.940921783447266, "num_tokens": 9, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -48.26312255859375, "logits_per_token": -5.104546864827474, "logits_per_char": -1.0441118587147107, "num_chars": 44}, {"sum_logits": -10.676470756530762, "num_tokens": 7, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -32.98064422607422, "logits_per_token": -1.5252101080758231, "logits_per_char": -0.31401384578031655, "num_chars": 34}, {"sum_logits": -23.614778518676758, "num_tokens": 15, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -61.715675354003906, "logits_per_token": -1.574318567911784, "logits_per_char": -0.3524593808757725, "num_chars": 67}, {"sum_logits": -22.345298767089844, "num_tokens": 7, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -36.958133697509766, "logits_per_token": -3.192185538155692, "logits_per_char": -0.7448432922363282, "num_chars": 30}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 164, "native_id": 398, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 28.53170394897461, "incorrect_loss_raw": 53.02301279703776, "correct_loss_per_char": 0.4835882025249934, "incorrect_loss_per_char": 0.7711454527130911, "correct_loss_per_token": 2.3776419957478843, "incorrect_loss_per_token": 3.2050711737738715, "correct_loss_uncond": -22.41094970703125, "incorrect_loss_uncond": -22.131112416585285}, "model_output": [{"sum_logits": -64.92575073242188, "num_tokens": 24, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -93.92703247070312, "logits_per_token": -2.7052396138509116, "logits_per_char": -0.6365269679649204, "num_chars": 102}, {"sum_logits": -38.02528381347656, "num_tokens": 12, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -60.20003890991211, "logits_per_token": -3.168773651123047, "logits_per_char": -0.8266366046407948, "num_chars": 46}, {"sum_logits": -56.118003845214844, "num_tokens": 15, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -71.3353042602539, "logits_per_token": -3.741200256347656, "logits_per_char": -0.8502727855335582, "num_chars": 66}, {"sum_logits": -28.53170394897461, "num_tokens": 12, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -50.94265365600586, "logits_per_token": -2.3776419957478843, "logits_per_char": -0.4835882025249934, "num_chars": 59}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 165, "native_id": 14388, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 20.893381118774414, "incorrect_loss_raw": 27.44141133626302, "correct_loss_per_char": 0.4748495708812367, "incorrect_loss_per_char": 1.0558726265528633, "correct_loss_per_token": 1.8993982835249468, "incorrect_loss_per_token": 4.0253728548685705, "correct_loss_uncond": -23.67945671081543, "incorrect_loss_uncond": -17.85109583536784}, "model_output": [{"sum_logits": -23.807857513427734, "num_tokens": 5, "num_tokens_all": 390, "is_greedy": false, "sum_logits_uncond": -35.056053161621094, "logits_per_token": -4.7615715026855465, "logits_per_char": -1.0821753415194424, "num_chars": 22}, {"sum_logits": -20.893381118774414, "num_tokens": 11, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -44.572837829589844, "logits_per_token": -1.8993982835249468, "logits_per_char": -0.4748495708812367, "num_chars": 44}, {"sum_logits": -32.21152877807617, "num_tokens": 8, "num_tokens_all": 393, "is_greedy": false, "sum_logits_uncond": -51.40547561645508, "logits_per_token": -4.0264410972595215, "logits_per_char": -1.0737176259358725, "num_chars": 30}, {"sum_logits": -26.304847717285156, "num_tokens": 8, "num_tokens_all": 393, "is_greedy": false, "sum_logits_uncond": -49.415992736816406, "logits_per_token": -3.2881059646606445, "logits_per_char": -1.0117249122032752, "num_chars": 26}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 166, "native_id": 21241, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 29.90617561340332, "incorrect_loss_raw": 71.65699259440105, "correct_loss_per_char": 0.3603153688361846, "incorrect_loss_per_char": 0.738291543787176, "correct_loss_per_token": 1.9937450408935546, "incorrect_loss_per_token": 3.8637361444978633, "correct_loss_uncond": -32.74054145812988, "incorrect_loss_uncond": -19.841397603352863}, "model_output": [{"sum_logits": -71.2349853515625, "num_tokens": 18, "num_tokens_all": 431, "is_greedy": false, "sum_logits_uncond": -100.61734008789062, "logits_per_token": -3.9574991861979165, "logits_per_char": -0.6475907759232955, "num_chars": 110}, {"sum_logits": -29.90617561340332, "num_tokens": 15, "num_tokens_all": 428, "is_greedy": false, "sum_logits_uncond": -62.6467170715332, "logits_per_token": -1.9937450408935546, "logits_per_char": -0.3603153688361846, "num_chars": 83}, {"sum_logits": -54.740447998046875, "num_tokens": 13, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -77.87963104248047, "logits_per_token": -4.210803692157452, "logits_per_char": -0.8553194999694824, "num_chars": 64}, {"sum_logits": -88.99554443359375, "num_tokens": 26, "num_tokens_all": 439, "is_greedy": false, "sum_logits_uncond": -95.99819946289062, "logits_per_token": -3.4229055551382213, "logits_per_char": -0.71196435546875, "num_chars": 125}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 167, "native_id": 37396, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 31.25248146057129, "incorrect_loss_raw": 30.134318669637043, "correct_loss_per_char": 0.5787496566772461, "incorrect_loss_per_char": 0.772366453286759, "correct_loss_per_token": 2.4040370354285607, "incorrect_loss_per_token": 3.059966689064389, "correct_loss_uncond": -23.29506492614746, "incorrect_loss_uncond": -18.75769869486491}, "model_output": [{"sum_logits": -31.25248146057129, "num_tokens": 13, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -54.54754638671875, "logits_per_token": -2.4040370354285607, "logits_per_char": -0.5787496566772461, "num_chars": 54}, {"sum_logits": -39.582096099853516, "num_tokens": 14, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -61.912784576416016, "logits_per_token": -2.8272925785609653, "logits_per_char": -0.6384209048363471, "num_chars": 62}, {"sum_logits": -28.00288963317871, "num_tokens": 8, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -44.530357360839844, "logits_per_token": -3.500361204147339, "logits_per_char": -0.9656168839027142, "num_chars": 29}, {"sum_logits": -22.817970275878906, "num_tokens": 8, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -40.23291015625, "logits_per_token": -2.8522462844848633, "logits_per_char": -0.7130615711212158, "num_chars": 32}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 168, "native_id": 2433, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 106.75333404541016, "incorrect_loss_raw": 73.2078374226888, "correct_loss_per_char": 0.6206589188686636, "incorrect_loss_per_char": 0.6259450648929225, "correct_loss_per_token": 3.2349495165275806, "incorrect_loss_per_token": 2.73248713357108, "correct_loss_uncond": -11.966697692871094, "incorrect_loss_uncond": -24.989728291829426}, "model_output": [{"sum_logits": -92.65924072265625, "num_tokens": 36, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -127.46196746826172, "logits_per_token": -2.5738677978515625, "logits_per_char": -0.5719706217447916, "num_chars": 162}, {"sum_logits": -106.75333404541016, "num_tokens": 33, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -118.72003173828125, "logits_per_token": -3.2349495165275806, "logits_per_char": -0.6206589188686636, "num_chars": 172}, {"sum_logits": -56.013824462890625, "num_tokens": 21, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -79.93841552734375, "logits_per_token": -2.667324974423363, "logits_per_char": -0.636520732532848, "num_chars": 88}, {"sum_logits": -70.95044708251953, "num_tokens": 24, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -87.19231414794922, "logits_per_token": -2.956268628438314, "logits_per_char": -0.6693438404011276, "num_chars": 106}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 169, "native_id": 8998, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 57.626075744628906, "incorrect_loss_raw": 53.76324335734049, "correct_loss_per_char": 0.6002716223398844, "incorrect_loss_per_char": 0.7043292427617219, "correct_loss_per_token": 2.7440988449823287, "incorrect_loss_per_token": 3.3618223472877786, "correct_loss_uncond": -42.837799072265625, "incorrect_loss_uncond": -20.972885131835938}, "model_output": [{"sum_logits": -57.626075744628906, "num_tokens": 21, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -100.46387481689453, "logits_per_token": -2.7440988449823287, "logits_per_char": -0.6002716223398844, "num_chars": 96}, {"sum_logits": -41.692989349365234, "num_tokens": 12, "num_tokens_all": 434, "is_greedy": false, "sum_logits_uncond": -56.84990310668945, "logits_per_token": -3.4744157791137695, "logits_per_char": -0.7580543518066406, "num_chars": 55}, {"sum_logits": -85.85325622558594, "num_tokens": 30, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -118.01945495605469, "logits_per_token": -2.861775207519531, "logits_per_char": -0.5880360015451092, "num_chars": 146}, {"sum_logits": -33.74348449707031, "num_tokens": 9, "num_tokens_all": 431, "is_greedy": false, "sum_logits_uncond": -49.339027404785156, "logits_per_token": -3.749276055230035, "logits_per_char": -0.7668973749334161, "num_chars": 44}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 170, "native_id": 28556, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 70.12930297851562, "incorrect_loss_raw": 82.07358169555664, "correct_loss_per_char": 0.5795810163513688, "incorrect_loss_per_char": 0.5961722982851532, "correct_loss_per_token": 2.597381591796875, "incorrect_loss_per_token": 2.484779611364141, "correct_loss_uncond": -20.077774047851562, "incorrect_loss_uncond": -26.482641855875652}, "model_output": [{"sum_logits": -91.89329528808594, "num_tokens": 37, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -112.60694885253906, "logits_per_token": -2.483602575353674, "logits_per_char": -0.5779452533841883, "num_chars": 159}, {"sum_logits": -70.12930297851562, "num_tokens": 27, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -90.20707702636719, "logits_per_token": -2.597381591796875, "logits_per_char": -0.5795810163513688, "num_chars": 121}, {"sum_logits": -99.70014190673828, "num_tokens": 37, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -126.03829193115234, "logits_per_token": -2.6945984299118453, "logits_per_char": -0.6474035188749239, "num_chars": 154}, {"sum_logits": -54.6273078918457, "num_tokens": 24, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -87.02342987060547, "logits_per_token": -2.2761378288269043, "logits_per_char": -0.5631681225963474, "num_chars": 97}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 171, "native_id": 23497, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 113.67797088623047, "incorrect_loss_raw": 98.78277333577473, "correct_loss_per_char": 0.5518348101273324, "incorrect_loss_per_char": 0.5125368902642443, "correct_loss_per_token": 2.5261771308051215, "incorrect_loss_per_token": 2.3650855784786673, "correct_loss_uncond": -20.25200653076172, "incorrect_loss_uncond": -22.370676676432293}, "model_output": [{"sum_logits": -113.67797088623047, "num_tokens": 45, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -133.9299774169922, "logits_per_token": -2.5261771308051215, "logits_per_char": -0.5518348101273324, "num_chars": 206}, {"sum_logits": -66.24761962890625, "num_tokens": 38, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -97.72273254394531, "logits_per_token": -1.7433584112870066, "logits_per_char": -0.374280336886476, "num_chars": 177}, {"sum_logits": -142.61419677734375, "num_tokens": 50, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -163.9354705810547, "logits_per_token": -2.852283935546875, "logits_per_char": -0.568184050905752, "num_chars": 251}, {"sum_logits": -87.48650360107422, "num_tokens": 35, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -101.8021469116211, "logits_per_token": -2.4996143886021205, "logits_per_char": -0.5951462830005049, "num_chars": 147}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 172, "native_id": 15492, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 89.91301727294922, "incorrect_loss_raw": 116.69036610921223, "correct_loss_per_char": 0.6611251270069796, "incorrect_loss_per_char": 0.7160071271846817, "correct_loss_per_token": 2.6445005080279183, "incorrect_loss_per_token": 3.055803386031368, "correct_loss_uncond": -38.25745391845703, "incorrect_loss_uncond": -22.01360829671224}, "model_output": [{"sum_logits": -115.9368896484375, "num_tokens": 31, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -123.09683227539062, "logits_per_token": -3.739899666078629, "logits_per_char": -0.9274951171875, "num_chars": 125}, {"sum_logits": -109.90223693847656, "num_tokens": 38, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -131.31285095214844, "logits_per_token": -2.8921641299599097, "logits_per_char": -0.642703139991091, "num_chars": 171}, {"sum_logits": -124.23197174072266, "num_tokens": 49, "num_tokens_all": 498, "is_greedy": false, "sum_logits_uncond": -161.70223999023438, "logits_per_token": -2.5353463620555643, "logits_per_char": -0.5778231243754542, "num_chars": 215}, {"sum_logits": -89.91301727294922, "num_tokens": 34, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -128.17047119140625, "logits_per_token": -2.6445005080279183, "logits_per_char": -0.6611251270069796, "num_chars": 136}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 173, "native_id": 1288, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 41.29213333129883, "incorrect_loss_raw": 115.75512440999348, "correct_loss_per_char": 0.4974955823048052, "incorrect_loss_per_char": 0.5963590514447351, "correct_loss_per_token": 2.0646066665649414, "incorrect_loss_per_token": 2.7149855886645593, "correct_loss_uncond": -23.225635528564453, "incorrect_loss_uncond": -20.392298380533855}, "model_output": [{"sum_logits": -96.06602478027344, "num_tokens": 45, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -125.54926300048828, "logits_per_token": -2.134800550672743, "logits_per_char": -0.4876447958389515, "num_chars": 197}, {"sum_logits": -132.47525024414062, "num_tokens": 40, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -144.78244018554688, "logits_per_token": -3.311881256103516, "logits_per_char": -0.727885990352421, "num_chars": 182}, {"sum_logits": -118.7240982055664, "num_tokens": 44, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -138.11056518554688, "logits_per_token": -2.6982749592174184, "logits_per_char": -0.5735463681428329, "num_chars": 207}, {"sum_logits": -41.29213333129883, "num_tokens": 20, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -64.51776885986328, "logits_per_token": -2.0646066665649414, "logits_per_char": -0.4974955823048052, "num_chars": 83}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 174, "native_id": 22249, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 95.95275115966797, "incorrect_loss_raw": 161.36872355143228, "correct_loss_per_char": 0.4341753446138822, "incorrect_loss_per_char": 0.7864501139221415, "correct_loss_per_token": 2.3403110038943407, "incorrect_loss_per_token": 3.4576984457569977, "correct_loss_uncond": -30.274147033691406, "incorrect_loss_uncond": -19.27588399251302}, "model_output": [{"sum_logits": -95.95275115966797, "num_tokens": 41, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -126.22689819335938, "logits_per_token": -2.3403110038943407, "logits_per_char": -0.4341753446138822, "num_chars": 221}, {"sum_logits": -142.67196655273438, "num_tokens": 45, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -152.8984375, "logits_per_token": -3.1704881456163196, "logits_per_char": -0.7242231804707329, "num_chars": 197}, {"sum_logits": -162.62213134765625, "num_tokens": 40, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -179.60475158691406, "logits_per_token": -4.065553283691406, "logits_per_char": -0.8838159312372622, "num_chars": 184}, {"sum_logits": -178.81207275390625, "num_tokens": 57, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -209.43063354492188, "logits_per_token": -3.1370539079632676, "logits_per_char": -0.7513112300584296, "num_chars": 238}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 175, "native_id": 10278, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 103.22476959228516, "incorrect_loss_raw": 89.85498809814453, "correct_loss_per_char": 0.6001440092574718, "incorrect_loss_per_char": 0.5546446213610748, "correct_loss_per_token": 2.7898586376293286, "incorrect_loss_per_token": 2.4622047497041293, "correct_loss_uncond": -24.89954376220703, "incorrect_loss_uncond": -24.836896260579426}, "model_output": [{"sum_logits": -127.55821990966797, "num_tokens": 48, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -152.67605590820312, "logits_per_token": -2.6574629147847495, "logits_per_char": -0.5720099547518743, "num_chars": 223}, {"sum_logits": -103.22476959228516, "num_tokens": 37, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -128.1243133544922, "logits_per_token": -2.7898586376293286, "logits_per_char": -0.6001440092574718, "num_chars": 172}, {"sum_logits": -66.65573120117188, "num_tokens": 29, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -88.89347839355469, "logits_per_token": -2.298473489695582, "logits_per_char": -0.5419165138306656, "num_chars": 123}, {"sum_logits": -75.35101318359375, "num_tokens": 31, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -102.50611877441406, "logits_per_token": -2.4306778446320565, "logits_per_char": -0.5500073955006843, "num_chars": 137}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 176, "native_id": 41173, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 101.70789337158203, "incorrect_loss_raw": 91.9014155069987, "correct_loss_per_char": 0.6201700815340367, "incorrect_loss_per_char": 0.6287665161994561, "correct_loss_per_token": 2.607894701835437, "incorrect_loss_per_token": 2.515795730124775, "correct_loss_uncond": -10.746322631835938, "incorrect_loss_uncond": -25.374656677246094}, "model_output": [{"sum_logits": -87.319091796875, "num_tokens": 32, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -113.40235900878906, "logits_per_token": -2.7287216186523438, "logits_per_char": -0.6768921844718992, "num_chars": 129}, {"sum_logits": -101.70789337158203, "num_tokens": 39, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -112.45421600341797, "logits_per_token": -2.607894701835437, "logits_per_char": -0.6201700815340367, "num_chars": 164}, {"sum_logits": -103.46891784667969, "num_tokens": 41, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -123.18197631835938, "logits_per_token": -2.5236321426019437, "logits_per_char": -0.6507479109854069, "num_chars": 159}, {"sum_logits": -84.9162368774414, "num_tokens": 37, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -115.24388122558594, "logits_per_token": -2.295033429120038, "logits_per_char": -0.5586594531410619, "num_chars": 152}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 177, "native_id": 13167, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 62.5640869140625, "incorrect_loss_raw": 69.53291447957356, "correct_loss_per_char": 0.34952003862604747, "incorrect_loss_per_char": 0.5337029351067263, "correct_loss_per_token": 1.7875453404017858, "incorrect_loss_per_token": 2.5735948323171374, "correct_loss_uncond": -22.14844512939453, "incorrect_loss_uncond": -19.680299123128254}, "model_output": [{"sum_logits": -85.99510955810547, "num_tokens": 28, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -105.75704956054688, "logits_per_token": -3.071253912789481, "logits_per_char": -0.6098943940291168, "num_chars": 141}, {"sum_logits": -61.66175079345703, "num_tokens": 22, "num_tokens_all": 439, "is_greedy": false, "sum_logits_uncond": -84.85769653320312, "logits_per_token": -2.802806854248047, "logits_per_char": -0.5929014499370868, "num_chars": 104}, {"sum_logits": -62.5640869140625, "num_tokens": 35, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -84.71253204345703, "logits_per_token": -1.7875453404017858, "logits_per_char": -0.34952003862604747, "num_chars": 179}, {"sum_logits": -60.9418830871582, "num_tokens": 33, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -77.02489471435547, "logits_per_token": -1.846723729913885, "logits_per_char": -0.3983129613539752, "num_chars": 153}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 178, "native_id": 27212, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 86.64997863769531, "incorrect_loss_raw": 91.62579854329427, "correct_loss_per_char": 0.45130197207132977, "incorrect_loss_per_char": 0.5258821932742563, "correct_loss_per_token": 2.166249465942383, "incorrect_loss_per_token": 2.439327258804024, "correct_loss_uncond": -7.240242004394531, "incorrect_loss_uncond": -10.646006266276041}, "model_output": [{"sum_logits": -116.29741668701172, "num_tokens": 39, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -124.17616271972656, "logits_per_token": -2.9819850432567105, "logits_per_char": -0.6607807766307484, "num_chars": 176}, {"sum_logits": -81.07019805908203, "num_tokens": 32, "num_tokens_all": 438, "is_greedy": false, "sum_logits_uncond": -91.68551635742188, "logits_per_token": -2.5334436893463135, "logits_per_char": -0.547771608507311, "num_chars": 148}, {"sum_logits": -77.50978088378906, "num_tokens": 43, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -90.9537353515625, "logits_per_token": -1.802553043809048, "logits_per_char": -0.36909419468470983, "num_chars": 210}, {"sum_logits": -86.64997863769531, "num_tokens": 40, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -93.89022064208984, "logits_per_token": -2.166249465942383, "logits_per_char": -0.45130197207132977, "num_chars": 192}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 179, "native_id": 14758, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 57.1628532409668, "incorrect_loss_raw": 82.81163787841797, "correct_loss_per_char": 0.4970682890518852, "incorrect_loss_per_char": 0.5018833602287346, "correct_loss_per_token": 2.0415304728916714, "incorrect_loss_per_token": 2.2452211630464802, "correct_loss_uncond": -12.312259674072266, "incorrect_loss_uncond": -15.112271626790365}, "model_output": [{"sum_logits": -45.86126708984375, "num_tokens": 24, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -73.12602996826172, "logits_per_token": -1.9108861287434895, "logits_per_char": -0.3987936268682065, "num_chars": 115}, {"sum_logits": -57.1628532409668, "num_tokens": 28, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -69.47511291503906, "logits_per_token": -2.0415304728916714, "logits_per_char": -0.4970682890518852, "num_chars": 115}, {"sum_logits": -97.16557312011719, "num_tokens": 40, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -100.92218017578125, "logits_per_token": -2.42913932800293, "logits_per_char": -0.5520771200006659, "num_chars": 176}, {"sum_logits": -105.40807342529297, "num_tokens": 44, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -119.72351837158203, "logits_per_token": -2.395638032393022, "logits_per_char": -0.5547793338173315, "num_chars": 190}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 180, "native_id": 46015, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 166.31039428710938, "incorrect_loss_raw": 130.24763997395834, "correct_loss_per_char": 0.5434980205461091, "incorrect_loss_per_char": 0.5858685922080037, "correct_loss_per_token": 2.7263999063460553, "incorrect_loss_per_token": 2.6016880392466333, "correct_loss_uncond": -18.180084228515625, "incorrect_loss_uncond": -18.661570231119793}, "model_output": [{"sum_logits": -145.04226684570312, "num_tokens": 55, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -159.49339294433594, "logits_per_token": -2.6371321244673296, "logits_per_char": -0.6094212892676601, "num_chars": 238}, {"sum_logits": -114.26351928710938, "num_tokens": 45, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -134.2847137451172, "logits_per_token": -2.5391893174913194, "logits_per_char": -0.5982383208749182, "num_chars": 191}, {"sum_logits": -131.4371337890625, "num_tokens": 50, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -152.94952392578125, "logits_per_token": -2.62874267578125, "logits_per_char": -0.549946166481433, "num_chars": 239}, {"sum_logits": -166.31039428710938, "num_tokens": 61, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -184.490478515625, "logits_per_token": -2.7263999063460553, "logits_per_char": -0.5434980205461091, "num_chars": 306}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 181, "native_id": 38238, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 29.233375549316406, "incorrect_loss_raw": 35.64884694417318, "correct_loss_per_char": 0.5040237163675243, "incorrect_loss_per_char": 0.7606949037380565, "correct_loss_per_token": 2.0880982535226003, "incorrect_loss_per_token": 3.1081578003498778, "correct_loss_uncond": -21.38058090209961, "incorrect_loss_uncond": -14.015894571940104}, "model_output": [{"sum_logits": -29.233375549316406, "num_tokens": 14, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -50.613956451416016, "logits_per_token": -2.0880982535226003, "logits_per_char": -0.5040237163675243, "num_chars": 58}, {"sum_logits": -28.452667236328125, "num_tokens": 13, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -48.12288284301758, "logits_per_token": -2.1886667104867787, "logits_per_char": -0.5471666776216947, "num_chars": 52}, {"sum_logits": -33.80052185058594, "num_tokens": 11, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -42.84193801879883, "logits_per_token": -3.072774713689631, "logits_per_char": -0.7191600393741688, "num_chars": 47}, {"sum_logits": -44.69335174560547, "num_tokens": 11, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -58.02940368652344, "logits_per_token": -4.063031976873225, "logits_per_char": -1.0157579942183061, "num_chars": 44}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 182, "native_id": 40591, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 139.58139038085938, "incorrect_loss_raw": 153.65630340576172, "correct_loss_per_char": 0.4403198434727425, "incorrect_loss_per_char": 0.81333984681172, "correct_loss_per_token": 2.3263565063476563, "incorrect_loss_per_token": 3.931770458631043, "correct_loss_uncond": -33.17616271972656, "incorrect_loss_uncond": -14.548367818196615}, "model_output": [{"sum_logits": -167.88302612304688, "num_tokens": 34, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -178.42593383789062, "logits_per_token": -4.937736062442555, "logits_per_char": -0.9275305310665574, "num_chars": 181}, {"sum_logits": -169.1630096435547, "num_tokens": 51, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -187.7049560546875, "logits_per_token": -3.3169217577167585, "logits_per_char": -0.7077950194290991, "num_chars": 239}, {"sum_logits": -139.58139038085938, "num_tokens": 60, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -172.75755310058594, "logits_per_token": -2.3263565063476563, "logits_per_char": -0.4403198434727425, "num_chars": 317}, {"sum_logits": -123.9228744506836, "num_tokens": 35, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -138.48312377929688, "logits_per_token": -3.540653555733817, "logits_per_char": -0.8046939899395038, "num_chars": 154}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 183, "native_id": 22687, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 12.700992584228516, "incorrect_loss_raw": 66.362548828125, "correct_loss_per_char": 0.4536068780081613, "incorrect_loss_per_char": 0.6864247781248909, "correct_loss_per_token": 2.5401985168457033, "incorrect_loss_per_token": 3.436073370334633, "correct_loss_uncond": -16.01388931274414, "incorrect_loss_uncond": -29.894381205240887}, "model_output": [{"sum_logits": -50.58313751220703, "num_tokens": 17, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -72.88832092285156, "logits_per_token": -2.975478677188649, "logits_per_char": -0.6569238637948965, "num_chars": 77}, {"sum_logits": -12.700992584228516, "num_tokens": 5, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -28.714881896972656, "logits_per_token": -2.5401985168457033, "logits_per_char": -0.4536068780081613, "num_chars": 28}, {"sum_logits": -96.4154281616211, "num_tokens": 21, "num_tokens_all": 434, "is_greedy": false, "sum_logits_uncond": -121.78326416015625, "logits_per_token": -4.5912108648391, "logits_per_char": -0.817079899674755, "num_chars": 118}, {"sum_logits": -52.089080810546875, "num_tokens": 19, "num_tokens_all": 432, "is_greedy": false, "sum_logits_uncond": -94.09920501708984, "logits_per_token": -2.741530568976151, "logits_per_char": -0.5852705709050211, "num_chars": 89}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 184, "native_id": 7739, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 98.13807678222656, "incorrect_loss_raw": 104.81535975138347, "correct_loss_per_char": 0.5007044733787069, "incorrect_loss_per_char": 0.5899643311005364, "correct_loss_per_token": 2.0028178935148278, "incorrect_loss_per_token": 2.553522092036379, "correct_loss_uncond": -22.221755981445312, "incorrect_loss_uncond": -19.21340815226237}, "model_output": [{"sum_logits": -134.01611328125, "num_tokens": 50, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -151.24826049804688, "logits_per_token": -2.680322265625, "logits_per_char": -0.6569417317708334, "num_chars": 204}, {"sum_logits": -61.05791091918945, "num_tokens": 24, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -75.95980834960938, "logits_per_token": -2.544079621632894, "logits_per_char": -0.5130916883965501, "num_chars": 119}, {"sum_logits": -119.37205505371094, "num_tokens": 49, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -144.87823486328125, "logits_per_token": -2.4361643888512434, "logits_per_char": -0.5998595731342258, "num_chars": 199}, {"sum_logits": -98.13807678222656, "num_tokens": 49, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -120.35983276367188, "logits_per_token": -2.0028178935148278, "logits_per_char": -0.5007044733787069, "num_chars": 196}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 185, "native_id": 10071, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 51.77272415161133, "incorrect_loss_raw": 80.33114878336589, "correct_loss_per_char": 0.5177272415161133, "incorrect_loss_per_char": 0.5348932780097297, "correct_loss_per_token": 2.250988006591797, "incorrect_loss_per_token": 2.5450649281711732, "correct_loss_uncond": -26.243419647216797, "incorrect_loss_uncond": -26.252120971679688}, "model_output": [{"sum_logits": -59.203163146972656, "num_tokens": 26, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -86.5826416015625, "logits_per_token": -2.277044736422025, "logits_per_char": -0.5148101143215014, "num_chars": 115}, {"sum_logits": -96.01429748535156, "num_tokens": 40, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -121.66341400146484, "logits_per_token": -2.4003574371337892, "logits_per_char": -0.5218168341595194, "num_chars": 184}, {"sum_logits": -51.77272415161133, "num_tokens": 23, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -78.01614379882812, "logits_per_token": -2.250988006591797, "logits_per_char": -0.5177272415161133, "num_chars": 100}, {"sum_logits": -85.77598571777344, "num_tokens": 29, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -111.50375366210938, "logits_per_token": -2.957792610957705, "logits_per_char": -0.5680528855481685, "num_chars": 151}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 186, "native_id": 25662, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 82.21896362304688, "incorrect_loss_raw": 124.13592529296875, "correct_loss_per_char": 0.4952949615846197, "incorrect_loss_per_char": 0.596601564111844, "correct_loss_per_token": 2.652224633001512, "incorrect_loss_per_token": 2.7800230175127183, "correct_loss_uncond": -33.58845520019531, "incorrect_loss_uncond": -19.57988993326823}, "model_output": [{"sum_logits": -82.21896362304688, "num_tokens": 31, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -115.80741882324219, "logits_per_token": -2.652224633001512, "logits_per_char": -0.4952949615846197, "num_chars": 166}, {"sum_logits": -156.94741821289062, "num_tokens": 52, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -176.2349853515625, "logits_per_token": -3.0182195810171275, "logits_per_char": -0.6539475758870442, "num_chars": 240}, {"sum_logits": -98.8482437133789, "num_tokens": 37, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -118.80180358886719, "logits_per_token": -2.671574154415646, "logits_per_char": -0.5883824030558268, "num_chars": 168}, {"sum_logits": -116.61211395263672, "num_tokens": 44, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -136.11065673828125, "logits_per_token": -2.65027531710538, "logits_per_char": -0.5474747133926606, "num_chars": 213}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 187, "native_id": 24455, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 90.41438293457031, "incorrect_loss_raw": 123.79437001546223, "correct_loss_per_char": 0.38474205504072473, "incorrect_loss_per_char": 0.7166590429489997, "correct_loss_per_token": 1.674340424714265, "incorrect_loss_per_token": 3.080003292091059, "correct_loss_uncond": -47.47694396972656, "incorrect_loss_uncond": -40.4515126546224}, "model_output": [{"sum_logits": -134.72935485839844, "num_tokens": 48, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -174.12025451660156, "logits_per_token": -2.8068615595499673, "logits_per_char": -0.6702952978029773, "num_chars": 201}, {"sum_logits": -88.81884002685547, "num_tokens": 26, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -126.07173919677734, "logits_per_token": -3.4161092318021336, "logits_per_char": -0.7723377393639606, "num_chars": 115}, {"sum_logits": -147.8349151611328, "num_tokens": 49, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -192.545654296875, "logits_per_token": -3.0170390849210778, "logits_per_char": -0.7073440916800613, "num_chars": 209}, {"sum_logits": -90.41438293457031, "num_tokens": 54, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -137.89132690429688, "logits_per_token": -1.674340424714265, "logits_per_char": -0.38474205504072473, "num_chars": 235}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 188, "native_id": 44439, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 104.46224212646484, "incorrect_loss_raw": 141.76561482747397, "correct_loss_per_char": 0.5022223179156964, "incorrect_loss_per_char": 0.7162068546043029, "correct_loss_per_token": 2.487196241106306, "incorrect_loss_per_token": 3.2973337164720196, "correct_loss_uncond": -26.38733673095703, "incorrect_loss_uncond": -21.941065470377605}, "model_output": [{"sum_logits": -104.46224212646484, "num_tokens": 42, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -130.84957885742188, "logits_per_token": -2.487196241106306, "logits_per_char": -0.5022223179156964, "num_chars": 208}, {"sum_logits": -129.92466735839844, "num_tokens": 46, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -153.3731689453125, "logits_per_token": -2.824449290399966, "logits_per_char": -0.6099749641239364, "num_chars": 213}, {"sum_logits": -148.03240966796875, "num_tokens": 45, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -167.8509979248047, "logits_per_token": -3.289609103732639, "logits_per_char": -0.6668126561620213, "num_chars": 222}, {"sum_logits": -147.3397674560547, "num_tokens": 39, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -169.8958740234375, "logits_per_token": -3.7779427552834535, "logits_per_char": -0.8718329435269508, "num_chars": 169}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 189, "native_id": 39705, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 77.60092163085938, "incorrect_loss_raw": 93.04496002197266, "correct_loss_per_char": 0.438423286050053, "incorrect_loss_per_char": 0.5538726445090186, "correct_loss_per_token": 1.9897672213040865, "incorrect_loss_per_token": 2.5829677234996447, "correct_loss_uncond": -21.517410278320312, "incorrect_loss_uncond": -14.43664805094401}, "model_output": [{"sum_logits": -77.60092163085938, "num_tokens": 39, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -99.11833190917969, "logits_per_token": -1.9897672213040865, "logits_per_char": -0.438423286050053, "num_chars": 177}, {"sum_logits": -133.834716796875, "num_tokens": 40, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -142.70834350585938, "logits_per_token": -3.345867919921875, "logits_per_char": -0.6658443621735075, "num_chars": 201}, {"sum_logits": -78.19499969482422, "num_tokens": 33, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -107.06261444091797, "logits_per_token": -2.3695454452977036, "logits_per_char": -0.5392758599643049, "num_chars": 145}, {"sum_logits": -67.10516357421875, "num_tokens": 33, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -72.67386627197266, "logits_per_token": -2.033489805279356, "logits_per_char": -0.4564977113892432, "num_chars": 147}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 190, "native_id": 28791, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 48.92543029785156, "incorrect_loss_raw": 29.688613891601562, "correct_loss_per_char": 0.48441020096882736, "incorrect_loss_per_char": 0.7312404974426009, "correct_loss_per_token": 2.12719262164572, "incorrect_loss_per_token": 3.0620477546345106, "correct_loss_uncond": -42.903892517089844, "incorrect_loss_uncond": -17.555503845214844}, "model_output": [{"sum_logits": -39.41872787475586, "num_tokens": 10, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -50.65511703491211, "logits_per_token": -3.941872787475586, "logits_per_char": -1.0653710236420502, "num_chars": 37}, {"sum_logits": -48.92543029785156, "num_tokens": 23, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -91.8293228149414, "logits_per_token": -2.12719262164572, "logits_per_char": -0.48441020096882736, "num_chars": 101}, {"sum_logits": -28.207483291625977, "num_tokens": 11, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -48.055442810058594, "logits_per_token": -2.564316662875089, "logits_per_char": -0.5641496658325196, "num_chars": 50}, {"sum_logits": -21.43963050842285, "num_tokens": 8, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -43.021793365478516, "logits_per_token": -2.6799538135528564, "logits_per_char": -0.5642008028532329, "num_chars": 38}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 191, "native_id": 21520, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 83.99777221679688, "incorrect_loss_raw": 145.14971669514975, "correct_loss_per_char": 0.45900421976391736, "incorrect_loss_per_char": 0.6511625279066999, "correct_loss_per_token": 2.210467689915707, "incorrect_loss_per_token": 3.03572564164005, "correct_loss_uncond": -15.205062866210938, "incorrect_loss_uncond": -22.445749918619793}, "model_output": [{"sum_logits": -199.79754638671875, "num_tokens": 65, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -228.1817169189453, "logits_per_token": -3.0738084059495194, "logits_per_char": -0.6529331581265319, "num_chars": 306}, {"sum_logits": -77.49800872802734, "num_tokens": 27, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -95.13115692138672, "logits_per_token": -2.8702966195565685, "logits_per_char": -0.6007597575816073, "num_chars": 129}, {"sum_logits": -83.99777221679688, "num_tokens": 38, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -99.20283508300781, "logits_per_token": -2.210467689915707, "logits_per_char": -0.45900421976391736, "num_chars": 183}, {"sum_logits": -158.15359497070312, "num_tokens": 50, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -179.47352600097656, "logits_per_token": -3.1630718994140623, "logits_per_char": -0.6997946680119608, "num_chars": 226}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 192, "native_id": 9563, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 101.536376953125, "incorrect_loss_raw": 122.22817103068034, "correct_loss_per_char": 0.5154130809803299, "incorrect_loss_per_char": 0.7309797610704459, "correct_loss_per_token": 2.538409423828125, "incorrect_loss_per_token": 2.8781964331573473, "correct_loss_uncond": -27.72991943359375, "incorrect_loss_uncond": -42.749220530192055}, "model_output": [{"sum_logits": -62.97633743286133, "num_tokens": 46, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -120.18374633789062, "logits_per_token": -1.369050813757855, "logits_per_char": -0.2549649288779811, "num_chars": 247}, {"sum_logits": -125.41600036621094, "num_tokens": 37, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -162.662841796875, "logits_per_token": -3.3896216315192147, "logits_per_char": -0.830569538849079, "num_chars": 151}, {"sum_logits": -178.29217529296875, "num_tokens": 46, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -212.08558654785156, "logits_per_token": -3.8759168541949727, "logits_per_char": -1.107404815484278, "num_chars": 161}, {"sum_logits": -101.536376953125, "num_tokens": 40, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -129.26629638671875, "logits_per_token": -2.538409423828125, "logits_per_char": -0.5154130809803299, "num_chars": 197}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 193, "native_id": 2024, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 111.84967041015625, "incorrect_loss_raw": 107.90027109781902, "correct_loss_per_char": 0.6737931952419052, "incorrect_loss_per_char": 0.605810645020414, "correct_loss_per_token": 3.195704868861607, "incorrect_loss_per_token": 2.641868541481776, "correct_loss_uncond": -25.474563598632812, "incorrect_loss_uncond": -23.445025126139324}, "model_output": [{"sum_logits": -134.31951904296875, "num_tokens": 51, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -150.68612670898438, "logits_per_token": -2.633716059666054, "logits_per_char": -0.6105432683771307, "num_chars": 220}, {"sum_logits": -77.05545043945312, "num_tokens": 37, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -108.44430541992188, "logits_per_token": -2.0825797416068412, "logits_per_char": -0.472732824781921, "num_chars": 163}, {"sum_logits": -111.84967041015625, "num_tokens": 35, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -137.32423400878906, "logits_per_token": -3.195704868861607, "logits_per_char": -0.6737931952419052, "num_chars": 166}, {"sum_logits": -112.32584381103516, "num_tokens": 35, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -134.90545654296875, "logits_per_token": -3.209309823172433, "logits_per_char": -0.7341558419021905, "num_chars": 153}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 194, "native_id": 12403, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 89.06019592285156, "incorrect_loss_raw": 74.4944356282552, "correct_loss_per_char": 0.5745819091796875, "incorrect_loss_per_char": 0.6071062692569069, "correct_loss_per_token": 2.619417527142693, "incorrect_loss_per_token": 2.9381626918942785, "correct_loss_uncond": -22.863067626953125, "incorrect_loss_uncond": -22.501358032226562}, "model_output": [{"sum_logits": -91.10965728759766, "num_tokens": 35, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -120.58998107910156, "logits_per_token": -2.603133065359933, "logits_per_char": -0.5694353580474854, "num_chars": 160}, {"sum_logits": -69.9167709350586, "num_tokens": 20, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -82.55632019042969, "logits_per_token": -3.4958385467529296, "logits_per_char": -0.6991677093505859, "num_chars": 100}, {"sum_logits": -89.06019592285156, "num_tokens": 34, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -111.92326354980469, "logits_per_token": -2.619417527142693, "logits_per_char": -0.5745819091796875, "num_chars": 155}, {"sum_logits": -62.456878662109375, "num_tokens": 23, "num_tokens_all": 439, "is_greedy": false, "sum_logits_uncond": -87.84107971191406, "logits_per_token": -2.7155164635699727, "logits_per_char": -0.5527157403726494, "num_chars": 113}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 195, "native_id": 47210, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 86.40524291992188, "incorrect_loss_raw": 144.87933349609375, "correct_loss_per_char": 0.4881652142368468, "incorrect_loss_per_char": 0.6601395641155751, "correct_loss_per_token": 1.9637555209073154, "incorrect_loss_per_token": 3.412698590478232, "correct_loss_uncond": -32.209449768066406, "incorrect_loss_uncond": -19.453053792317707}, "model_output": [{"sum_logits": -124.63369750976562, "num_tokens": 32, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -148.28207397460938, "logits_per_token": -3.894803047180176, "logits_per_char": -0.692409430609809, "num_chars": 180}, {"sum_logits": -149.5741424560547, "num_tokens": 43, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -169.678955078125, "logits_per_token": -3.478468429210574, "logits_per_char": -0.740466051762647, "num_chars": 202}, {"sum_logits": -160.43016052246094, "num_tokens": 56, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -175.0361328125, "logits_per_token": -2.8648242950439453, "logits_per_char": -0.5475432099742694, "num_chars": 293}, {"sum_logits": -86.40524291992188, "num_tokens": 44, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -118.61469268798828, "logits_per_token": -1.9637555209073154, "logits_per_char": -0.4881652142368468, "num_chars": 177}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 196, "native_id": 24298, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 67.55486297607422, "incorrect_loss_raw": 91.27711995442708, "correct_loss_per_char": 0.39738154691808364, "incorrect_loss_per_char": 0.6040650093525117, "correct_loss_per_token": 1.8765239715576172, "incorrect_loss_per_token": 2.8197991513510208, "correct_loss_uncond": -29.386795043945312, "incorrect_loss_uncond": -17.864466349283855}, "model_output": [{"sum_logits": -83.46701049804688, "num_tokens": 28, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -92.41119384765625, "logits_per_token": -2.9809646606445312, "logits_per_char": -0.6371527518934876, "num_chars": 131}, {"sum_logits": -80.50239562988281, "num_tokens": 27, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -100.76795959472656, "logits_per_token": -2.9815702085141784, "logits_per_char": -0.6145221040449069, "num_chars": 131}, {"sum_logits": -109.86195373535156, "num_tokens": 44, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -134.24560546875, "logits_per_token": -2.4968625848943535, "logits_per_char": -0.5605201721191406, "num_chars": 196}, {"sum_logits": -67.55486297607422, "num_tokens": 36, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -96.94165802001953, "logits_per_token": -1.8765239715576172, "logits_per_char": -0.39738154691808364, "num_chars": 170}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 197, "native_id": 11905, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 34.63159942626953, "incorrect_loss_raw": 50.02346165974935, "correct_loss_per_char": 0.4497610315099939, "incorrect_loss_per_char": 0.8159820241522012, "correct_loss_per_token": 2.0371529074276196, "incorrect_loss_per_token": 3.744457000795409, "correct_loss_uncond": -32.06517791748047, "incorrect_loss_uncond": -24.112590789794922}, "model_output": [{"sum_logits": -56.449668884277344, "num_tokens": 17, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -77.86136627197266, "logits_per_token": -3.320568757898667, "logits_per_char": -0.7237137036445813, "num_chars": 78}, {"sum_logits": -52.36796951293945, "num_tokens": 19, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -89.72377014160156, "logits_per_token": -2.7562089217336556, "logits_per_char": -0.6386337745480422, "num_chars": 82}, {"sum_logits": -34.63159942626953, "num_tokens": 17, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -66.69677734375, "logits_per_token": -2.0371529074276196, "logits_per_char": -0.4497610315099939, "num_chars": 77}, {"sum_logits": -41.25274658203125, "num_tokens": 8, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -54.823020935058594, "logits_per_token": -5.156593322753906, "logits_per_char": -1.0855985942639803, "num_chars": 38}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 198, "native_id": 18434, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 98.74263000488281, "incorrect_loss_raw": 84.5602633158366, "correct_loss_per_char": 0.5012316243902681, "incorrect_loss_per_char": 0.45972519318166793, "correct_loss_per_token": 2.1942806667751737, "incorrect_loss_per_token": 1.9475482868478704, "correct_loss_uncond": -14.191612243652344, "incorrect_loss_uncond": -13.06973648071289}, "model_output": [{"sum_logits": -48.235294342041016, "num_tokens": 28, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -64.32984924316406, "logits_per_token": -1.722689083644322, "logits_per_char": -0.40877368086475435, "num_chars": 118}, {"sum_logits": -98.74263000488281, "num_tokens": 45, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -112.93424224853516, "logits_per_token": -2.1942806667751737, "logits_per_char": -0.5012316243902681, "num_chars": 197}, {"sum_logits": -77.40202331542969, "num_tokens": 36, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -91.00321960449219, "logits_per_token": -2.1500562032063804, "logits_per_char": -0.49616681612454927, "num_chars": 156}, {"sum_logits": -128.04347229003906, "num_tokens": 65, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -137.5569305419922, "logits_per_token": -1.9698995736929086, "logits_per_char": -0.47423508255570024, "num_chars": 270}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 199, "native_id": 29512, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 47.434661865234375, "incorrect_loss_raw": 99.89631017049153, "correct_loss_per_char": 0.40891949883822737, "incorrect_loss_per_char": 0.6414505791352878, "correct_loss_per_token": 1.897386474609375, "incorrect_loss_per_token": 2.7357487145181785, "correct_loss_uncond": -19.083999633789062, "incorrect_loss_uncond": -25.821941375732422}, "model_output": [{"sum_logits": -117.73575592041016, "num_tokens": 38, "num_tokens_all": 496, "is_greedy": false, "sum_logits_uncond": -147.90359497070312, "logits_per_token": -3.0983093663265833, "logits_per_char": -0.6925632701200597, "num_chars": 170}, {"sum_logits": -46.920047760009766, "num_tokens": 23, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -76.35218811035156, "logits_per_token": -2.0400020765221636, "logits_per_char": -0.5393108937932157, "num_chars": 87}, {"sum_logits": -135.0331268310547, "num_tokens": 44, "num_tokens_all": 502, "is_greedy": false, "sum_logits_uncond": -152.8989715576172, "logits_per_token": -3.0689347007057886, "logits_per_char": -0.6924775734925881, "num_chars": 195}, {"sum_logits": -47.434661865234375, "num_tokens": 25, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -66.51866149902344, "logits_per_token": -1.897386474609375, "logits_per_char": -0.40891949883822737, "num_chars": 116}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 200, "native_id": 4321, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 59.535789489746094, "incorrect_loss_raw": 90.3033447265625, "correct_loss_per_char": 0.454471675494245, "incorrect_loss_per_char": 0.5913724794542432, "correct_loss_per_token": 1.8604934215545654, "incorrect_loss_per_token": 2.796400729225555, "correct_loss_uncond": -17.425132751464844, "incorrect_loss_uncond": -19.28106689453125}, "model_output": [{"sum_logits": -59.535789489746094, "num_tokens": 32, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -76.96092224121094, "logits_per_token": -1.8604934215545654, "logits_per_char": -0.454471675494245, "num_chars": 131}, {"sum_logits": -88.86164855957031, "num_tokens": 29, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -110.40283203125, "logits_per_token": -3.0641947779162177, "logits_per_char": -0.6347260611397879, "num_chars": 140}, {"sum_logits": -106.17069244384766, "num_tokens": 33, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -120.58468627929688, "logits_per_token": -3.217293710419626, "logits_per_char": -0.6434587420839252, "num_chars": 165}, {"sum_logits": -75.87769317626953, "num_tokens": 36, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -97.76571655273438, "logits_per_token": -2.1077136993408203, "logits_per_char": -0.49593263513901653, "num_chars": 153}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 201, "native_id": 35477, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 82.82015228271484, "incorrect_loss_raw": 154.1362762451172, "correct_loss_per_char": 0.5275168935204767, "incorrect_loss_per_char": 0.6764654085082767, "correct_loss_per_token": 2.6716178155714467, "incorrect_loss_per_token": 3.727292890397329, "correct_loss_uncond": -21.21247100830078, "incorrect_loss_uncond": -15.385248819986979}, "model_output": [{"sum_logits": -82.82015228271484, "num_tokens": 31, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -104.03262329101562, "logits_per_token": -2.6716178155714467, "logits_per_char": -0.5275168935204767, "num_chars": 157}, {"sum_logits": -135.10983276367188, "num_tokens": 35, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -144.0020751953125, "logits_per_token": -3.860280936104911, "logits_per_char": -0.7225124746720422, "num_chars": 187}, {"sum_logits": -168.9638671875, "num_tokens": 42, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -186.60581970214844, "logits_per_token": -4.02294921875, "logits_per_char": -0.6785697477409639, "num_chars": 249}, {"sum_logits": -158.3351287841797, "num_tokens": 48, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -177.95668029785156, "logits_per_token": -3.2986485163370767, "logits_per_char": -0.6283140031118242, "num_chars": 252}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 202, "native_id": 41288, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 35.32814407348633, "incorrect_loss_raw": 67.56913375854492, "correct_loss_per_char": 0.4529249240190555, "incorrect_loss_per_char": 0.5437622519977464, "correct_loss_per_token": 2.2080090045928955, "incorrect_loss_per_token": 2.876675227650425, "correct_loss_uncond": -37.7373161315918, "incorrect_loss_uncond": -40.56343205769857}, "model_output": [{"sum_logits": -61.50676345825195, "num_tokens": 19, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -103.41413879394531, "logits_per_token": -3.237198076750103, "logits_per_char": -0.5591523950750178, "num_chars": 110}, {"sum_logits": -47.23094177246094, "num_tokens": 20, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -92.07543182373047, "logits_per_token": -2.361547088623047, "logits_per_char": -0.44981849307105654, "num_chars": 105}, {"sum_logits": -35.32814407348633, "num_tokens": 16, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -73.06546020507812, "logits_per_token": -2.2080090045928955, "logits_per_char": -0.4529249240190555, "num_chars": 78}, {"sum_logits": -93.96969604492188, "num_tokens": 31, "num_tokens_all": 431, "is_greedy": false, "sum_logits_uncond": -128.9081268310547, "logits_per_token": -3.031280517578125, "logits_per_char": -0.6223158678471647, "num_chars": 151}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 203, "native_id": 22183, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 84.3298568725586, "incorrect_loss_raw": 124.52212524414062, "correct_loss_per_char": 0.5856240060594347, "incorrect_loss_per_char": 0.6949467843413534, "correct_loss_per_token": 2.108246421813965, "incorrect_loss_per_token": 3.5488567054290248, "correct_loss_uncond": -21.47093963623047, "incorrect_loss_uncond": -11.148574829101562}, "model_output": [{"sum_logits": -110.54049682617188, "num_tokens": 34, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -110.72309875488281, "logits_per_token": -3.251191083122702, "logits_per_char": -0.7040795976189291, "num_chars": 157}, {"sum_logits": -112.27182006835938, "num_tokens": 35, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -142.59115600585938, "logits_per_token": -3.2077662876674107, "logits_per_char": -0.6307405621817942, "num_chars": 178}, {"sum_logits": -84.3298568725586, "num_tokens": 40, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -105.80079650878906, "logits_per_token": -2.108246421813965, "logits_per_char": -0.5856240060594347, "num_chars": 144}, {"sum_logits": -150.75405883789062, "num_tokens": 36, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -153.69784545898438, "logits_per_token": -4.187612745496962, "logits_per_char": -0.7500201932233365, "num_chars": 201}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 204, "native_id": 13484, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 69.48678588867188, "incorrect_loss_raw": 99.02903493245442, "correct_loss_per_char": 0.6555357159308668, "incorrect_loss_per_char": 0.6224355113903041, "correct_loss_per_token": 2.6725686880258412, "incorrect_loss_per_token": 2.80205850298684, "correct_loss_uncond": -22.069656372070312, "incorrect_loss_uncond": -19.45831807454427}, "model_output": [{"sum_logits": -84.89739990234375, "num_tokens": 38, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -113.36659240722656, "logits_per_token": -2.2341421026932564, "logits_per_char": -0.5775333326690051, "num_chars": 147}, {"sum_logits": -69.48678588867188, "num_tokens": 26, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -91.55644226074219, "logits_per_token": -2.6725686880258412, "logits_per_char": -0.6555357159308668, "num_chars": 106}, {"sum_logits": -110.0384750366211, "num_tokens": 33, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -125.49266815185547, "logits_per_token": -3.3344992435339726, "logits_per_char": -0.6668998487067945, "num_chars": 165}, {"sum_logits": -102.15122985839844, "num_tokens": 36, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -116.60279846191406, "logits_per_token": -2.8375341627332897, "logits_per_char": -0.6228733527951125, "num_chars": 164}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 205, "native_id": 586, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 38.210243225097656, "incorrect_loss_raw": 72.79983520507812, "correct_loss_per_char": 0.5381724397901079, "incorrect_loss_per_char": 0.8617428227744002, "correct_loss_per_token": 2.939249478853666, "incorrect_loss_per_token": 3.725032013026398, "correct_loss_uncond": -24.304534912109375, "incorrect_loss_uncond": -24.278575897216797}, "model_output": [{"sum_logits": -89.333251953125, "num_tokens": 32, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -130.8262939453125, "logits_per_token": -2.7916641235351562, "logits_per_char": -0.5618443519064465, "num_chars": 159}, {"sum_logits": -72.29389953613281, "num_tokens": 18, "num_tokens_all": 435, "is_greedy": false, "sum_logits_uncond": -100.75193786621094, "logits_per_token": -4.016327752007379, "logits_per_char": -0.840626738792242, "num_chars": 86}, {"sum_logits": -38.210243225097656, "num_tokens": 13, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -62.51477813720703, "logits_per_token": -2.939249478853666, "logits_per_char": -0.5381724397901079, "num_chars": 71}, {"sum_logits": -56.77235412597656, "num_tokens": 13, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -59.65700149536133, "logits_per_token": -4.367104163536658, "logits_per_char": -1.1827573776245117, "num_chars": 48}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 206, "native_id": 17259, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.572412490844727, "incorrect_loss_raw": 29.545170466105144, "correct_loss_per_char": 0.32037613608620386, "incorrect_loss_per_char": 0.6567930082678135, "correct_loss_per_token": 1.3215515613555908, "incorrect_loss_per_token": 2.671825558618695, "correct_loss_uncond": -29.066457748413086, "incorrect_loss_uncond": -26.8902645111084}, "model_output": [{"sum_logits": -12.32017707824707, "num_tokens": 9, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -42.83396911621094, "logits_per_token": -1.3689085642496746, "logits_per_char": -0.30800442695617675, "num_chars": 40}, {"sum_logits": -20.820053100585938, "num_tokens": 13, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -53.9388427734375, "logits_per_token": -1.6015425461989183, "logits_per_char": -0.37178666251046316, "num_chars": 56}, {"sum_logits": -10.572412490844727, "num_tokens": 8, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -39.63887023925781, "logits_per_token": -1.3215515613555908, "logits_per_char": -0.32037613608620386, "num_chars": 33}, {"sum_logits": -55.49528121948242, "num_tokens": 11, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -72.53349304199219, "logits_per_token": -5.045025565407493, "logits_per_char": -1.2905879353368006, "num_chars": 43}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 207, "native_id": 34173, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 57.33755874633789, "incorrect_loss_raw": 89.99927266438802, "correct_loss_per_char": 0.45870046997070313, "incorrect_loss_per_char": 0.5484860112975453, "correct_loss_per_token": 1.9771571981495824, "incorrect_loss_per_token": 2.687066366383341, "correct_loss_uncond": -24.909862518310547, "incorrect_loss_uncond": -23.356646219889324}, "model_output": [{"sum_logits": -80.41975402832031, "num_tokens": 35, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -110.859619140625, "logits_per_token": -2.297707257952009, "logits_per_char": -0.43706388058869733, "num_chars": 184}, {"sum_logits": -85.807861328125, "num_tokens": 29, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -106.7716293334961, "logits_per_token": -2.9588917699353448, "logits_per_char": -0.6085663923980497, "num_chars": 141}, {"sum_logits": -103.77020263671875, "num_tokens": 37, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -122.43650817871094, "logits_per_token": -2.804600071262669, "logits_per_char": -0.5998277609058887, "num_chars": 173}, {"sum_logits": -57.33755874633789, "num_tokens": 29, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -82.24742126464844, "logits_per_token": -1.9771571981495824, "logits_per_char": -0.45870046997070313, "num_chars": 125}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 208, "native_id": 23905, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 73.1359634399414, "incorrect_loss_raw": 110.06996154785156, "correct_loss_per_char": 0.4459509965850086, "incorrect_loss_per_char": 0.6230379111450632, "correct_loss_per_token": 2.089598955426897, "incorrect_loss_per_token": 2.9855390702761135, "correct_loss_uncond": -12.684852600097656, "incorrect_loss_uncond": -16.36871592203776}, "model_output": [{"sum_logits": -170.92144775390625, "num_tokens": 50, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -190.68052673339844, "logits_per_token": -3.418428955078125, "logits_per_char": -0.6891993861044606, "num_chars": 248}, {"sum_logits": -77.71221160888672, "num_tokens": 26, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -92.05712127685547, "logits_per_token": -2.9889312157264123, "logits_per_char": -0.5756460119176794, "num_chars": 135}, {"sum_logits": -73.1359634399414, "num_tokens": 35, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -85.82081604003906, "logits_per_token": -2.089598955426897, "logits_per_char": -0.4459509965850086, "num_chars": 164}, {"sum_logits": -81.57622528076172, "num_tokens": 32, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -96.57838439941406, "logits_per_token": -2.5492570400238037, "logits_per_char": -0.6042683354130498, "num_chars": 135}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 209, "native_id": 12482, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 82.38965606689453, "incorrect_loss_raw": 126.35885874430339, "correct_loss_per_char": 0.5085781238697193, "incorrect_loss_per_char": 0.6009057130689074, "correct_loss_per_token": 2.288601557413737, "incorrect_loss_per_token": 2.750082166243305, "correct_loss_uncond": -19.369766235351562, "incorrect_loss_uncond": -26.174845377604168}, "model_output": [{"sum_logits": -178.84042358398438, "num_tokens": 57, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -202.94512939453125, "logits_per_token": -3.137551290947094, "logits_per_char": -0.6800016105854919, "num_chars": 263}, {"sum_logits": -82.38965606689453, "num_tokens": 36, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -101.7594223022461, "logits_per_token": -2.288601557413737, "logits_per_char": -0.5085781238697193, "num_chars": 162}, {"sum_logits": -62.93175506591797, "num_tokens": 26, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -89.01599884033203, "logits_per_token": -2.420452117919922, "logits_per_char": -0.48409042358398435, "num_chars": 130}, {"sum_logits": -137.3043975830078, "num_tokens": 51, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -165.63998413085938, "logits_per_token": -2.6922430898628984, "logits_per_char": -0.6386251050372457, "num_chars": 215}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 210, "native_id": 28112, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 69.1268310546875, "incorrect_loss_raw": 120.58352661132812, "correct_loss_per_char": 0.5009190656136775, "incorrect_loss_per_char": 0.6812282236572513, "correct_loss_per_token": 2.6587242713341346, "incorrect_loss_per_token": 3.09192852325558, "correct_loss_uncond": -21.55908203125, "incorrect_loss_uncond": -26.402201334635418}, "model_output": [{"sum_logits": -64.05772399902344, "num_tokens": 30, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -89.71804809570312, "logits_per_token": -2.1352574666341146, "logits_per_char": -0.5043915275513656, "num_chars": 127}, {"sum_logits": -69.1268310546875, "num_tokens": 26, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -90.6859130859375, "logits_per_token": -2.6587242713341346, "logits_per_char": -0.5009190656136775, "num_chars": 138}, {"sum_logits": -128.62496948242188, "num_tokens": 39, "num_tokens_all": 498, "is_greedy": false, "sum_logits_uncond": -157.09701538085938, "logits_per_token": -3.2980761405749197, "logits_per_char": -0.7185752485051501, "num_chars": 179}, {"sum_logits": -169.06788635253906, "num_tokens": 44, "num_tokens_all": 503, "is_greedy": false, "sum_logits_uncond": -194.14212036132812, "logits_per_token": -3.842451962557706, "logits_per_char": -0.8207178949152382, "num_chars": 206}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 211, "native_id": 34111, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 42.150978088378906, "incorrect_loss_raw": 93.44865417480469, "correct_loss_per_char": 0.35125815073649086, "incorrect_loss_per_char": 0.5725230521980303, "correct_loss_per_token": 1.5611473366066262, "incorrect_loss_per_token": 2.5245520759554125, "correct_loss_uncond": -34.60997772216797, "incorrect_loss_uncond": -28.569478352864582}, "model_output": [{"sum_logits": -42.150978088378906, "num_tokens": 27, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -76.76095581054688, "logits_per_token": -1.5611473366066262, "logits_per_char": -0.35125815073649086, "num_chars": 120}, {"sum_logits": -111.01750183105469, "num_tokens": 38, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -137.1573944091797, "logits_per_token": -2.9215132060803866, "logits_per_char": -0.7026424166522448, "num_chars": 158}, {"sum_logits": -78.9985122680664, "num_tokens": 33, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -106.79691314697266, "logits_per_token": -2.3938943111535274, "logits_per_char": -0.5266567484537761, "num_chars": 150}, {"sum_logits": -90.32994842529297, "num_tokens": 40, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -122.10009002685547, "logits_per_token": -2.258248710632324, "logits_per_char": -0.4882699914880701, "num_chars": 185}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 212, "native_id": 46479, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 28.78481674194336, "incorrect_loss_raw": 25.891825993855793, "correct_loss_per_char": 0.7020687010230088, "incorrect_loss_per_char": 0.6210692320696746, "correct_loss_per_token": 2.8784816741943358, "incorrect_loss_per_token": 2.499426188613429, "correct_loss_uncond": -24.53985595703125, "incorrect_loss_uncond": -21.618977228800457}, "model_output": [{"sum_logits": -22.56806182861328, "num_tokens": 10, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -44.050594329833984, "logits_per_token": -2.256806182861328, "logits_per_char": -0.5373348054431734, "num_chars": 42}, {"sum_logits": -29.61961555480957, "num_tokens": 11, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -49.743770599365234, "logits_per_token": -2.6926923231645064, "logits_per_char": -0.7594773219181941, "num_chars": 39}, {"sum_logits": -25.48780059814453, "num_tokens": 10, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -48.73804473876953, "logits_per_token": -2.548780059814453, "logits_per_char": -0.5663955688476563, "num_chars": 45}, {"sum_logits": -28.78481674194336, "num_tokens": 10, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -53.32467269897461, "logits_per_token": -2.8784816741943358, "logits_per_char": -0.7020687010230088, "num_chars": 41}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 213, "native_id": 24491, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 90.48336791992188, "incorrect_loss_raw": 89.83094278971355, "correct_loss_per_char": 0.4762282522101151, "incorrect_loss_per_char": 0.495173535606098, "correct_loss_per_token": 2.154365902855283, "incorrect_loss_per_token": 2.2118714976514506, "correct_loss_uncond": -17.15355682373047, "incorrect_loss_uncond": -19.603182474772137}, "model_output": [{"sum_logits": -90.48336791992188, "num_tokens": 42, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -107.63692474365234, "logits_per_token": -2.154365902855283, "logits_per_char": -0.4762282522101151, "num_chars": 190}, {"sum_logits": -121.56764221191406, "num_tokens": 48, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -135.70481872558594, "logits_per_token": -2.5326592127482095, "logits_per_char": -0.5331914132101494, "num_chars": 228}, {"sum_logits": -55.268890380859375, "num_tokens": 32, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -79.45613861083984, "logits_per_token": -1.7271528244018555, "logits_per_char": -0.43178820610046387, "num_chars": 128}, {"sum_logits": -92.65629577636719, "num_tokens": 39, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -113.14141845703125, "logits_per_token": -2.375802455804287, "logits_per_char": -0.5205409875076809, "num_chars": 178}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 214, "native_id": 33964, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 97.45059204101562, "incorrect_loss_raw": 101.29381306966145, "correct_loss_per_char": 0.6411223160593134, "incorrect_loss_per_char": 0.7142371165616438, "correct_loss_per_token": 2.95304824366714, "incorrect_loss_per_token": 2.8621830229292797, "correct_loss_uncond": -36.045623779296875, "incorrect_loss_uncond": -27.715970357259113}, "model_output": [{"sum_logits": -69.73644256591797, "num_tokens": 27, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -106.27648162841797, "logits_per_token": -2.58283120614511, "logits_per_char": -0.7044085107668482, "num_chars": 99}, {"sum_logits": -112.31266784667969, "num_tokens": 39, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -129.78643798828125, "logits_per_token": -2.87981199606871, "logits_per_char": -0.7340697244881025, "num_chars": 153}, {"sum_logits": -97.45059204101562, "num_tokens": 33, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -133.4962158203125, "logits_per_token": -2.95304824366714, "logits_per_char": -0.6411223160593134, "num_chars": 152}, {"sum_logits": -121.83232879638672, "num_tokens": 39, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -150.9664306640625, "logits_per_token": -3.1239058665740185, "logits_per_char": -0.704233114429981, "num_chars": 173}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 215, "native_id": 10391, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 80.51461791992188, "incorrect_loss_raw": 94.38794708251953, "correct_loss_per_char": 0.41717418611358487, "incorrect_loss_per_char": 0.5666716993911945, "correct_loss_per_token": 1.8298776799982244, "incorrect_loss_per_token": 2.572940964918633, "correct_loss_uncond": -36.31629943847656, "incorrect_loss_uncond": -23.65960947672526}, "model_output": [{"sum_logits": -124.33944702148438, "num_tokens": 46, "num_tokens_all": 497, "is_greedy": false, "sum_logits_uncond": -147.7628173828125, "logits_per_token": -2.7030314569887905, "logits_per_char": -0.6248213418165044, "num_chars": 199}, {"sum_logits": -49.164207458496094, "num_tokens": 21, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -66.68818664550781, "logits_per_token": -2.3411527361188615, "logits_per_char": -0.5402660160274296, "num_chars": 91}, {"sum_logits": -80.51461791992188, "num_tokens": 44, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -116.83091735839844, "logits_per_token": -1.8298776799982244, "logits_per_char": -0.41717418611358487, "num_chars": 193}, {"sum_logits": -109.66018676757812, "num_tokens": 41, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -139.69166564941406, "logits_per_token": -2.674638701648247, "logits_per_char": -0.5349277403296494, "num_chars": 205}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 216, "native_id": 47807, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 85.25357055664062, "incorrect_loss_raw": 118.61148579915364, "correct_loss_per_char": 0.40791182084517047, "incorrect_loss_per_char": 0.6533017601628166, "correct_loss_per_token": 1.8533384903617527, "incorrect_loss_per_token": 3.0393406649046093, "correct_loss_uncond": -31.003936767578125, "incorrect_loss_uncond": -19.548817952473957}, "model_output": [{"sum_logits": -127.12408447265625, "num_tokens": 37, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -139.95774841308594, "logits_per_token": -3.4357860668285474, "logits_per_char": -0.7182151665121822, "num_chars": 177}, {"sum_logits": -109.95450592041016, "num_tokens": 43, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -134.1707763671875, "logits_per_token": -2.5570815330327945, "logits_per_char": -0.538992676080442, "num_chars": 204}, {"sum_logits": -85.25357055664062, "num_tokens": 46, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -116.25750732421875, "logits_per_token": -1.8533384903617527, "logits_per_char": -0.40791182084517047, "num_chars": 209}, {"sum_logits": -118.75586700439453, "num_tokens": 38, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -140.35238647460938, "logits_per_token": -3.125154394852488, "logits_per_char": -0.7026974378958256, "num_chars": 169}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 217, "native_id": 20824, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 97.83704376220703, "incorrect_loss_raw": 108.2797342936198, "correct_loss_per_char": 0.5317230639250382, "incorrect_loss_per_char": 0.7028292725793067, "correct_loss_per_token": 2.3294534229096913, "incorrect_loss_per_token": 3.04031501089988, "correct_loss_uncond": -28.70293426513672, "incorrect_loss_uncond": -20.384618123372395}, "model_output": [{"sum_logits": -84.90528869628906, "num_tokens": 39, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -110.4189453125, "logits_per_token": -2.177058684520232, "logits_per_char": -0.4907820155854859, "num_chars": 173}, {"sum_logits": -103.41862487792969, "num_tokens": 28, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -112.39994812011719, "logits_per_token": -3.6935223170689175, "logits_per_char": -0.9316993232245918, "num_chars": 111}, {"sum_logits": -97.83704376220703, "num_tokens": 42, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -126.53997802734375, "logits_per_token": -2.3294534229096913, "logits_per_char": -0.5317230639250382, "num_chars": 184}, {"sum_logits": -136.51528930664062, "num_tokens": 42, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -163.17416381835938, "logits_per_token": -3.250364031110491, "logits_per_char": -0.6860064789278424, "num_chars": 199}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 218, "native_id": 48990, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 141.45648193359375, "incorrect_loss_raw": 130.5732218424479, "correct_loss_per_char": 0.7991891634666314, "incorrect_loss_per_char": 0.7353227755944114, "correct_loss_per_token": 2.886866978236607, "incorrect_loss_per_token": 3.2128722259459646, "correct_loss_uncond": -34.448272705078125, "incorrect_loss_uncond": -14.10614013671875}, "model_output": [{"sum_logits": -143.86502075195312, "num_tokens": 47, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -160.6683807373047, "logits_per_token": -3.0609578883394284, "logits_per_char": -0.7415722719172841, "num_chars": 194}, {"sum_logits": -106.76194763183594, "num_tokens": 35, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -118.62046813964844, "logits_per_token": -3.050341360909598, "logits_per_char": -0.6672621726989746, "num_chars": 160}, {"sum_logits": -141.0926971435547, "num_tokens": 40, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -154.74923706054688, "logits_per_token": -3.527317428588867, "logits_per_char": -0.7971338821669757, "num_chars": 177}, {"sum_logits": -141.45648193359375, "num_tokens": 49, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -175.90475463867188, "logits_per_token": -2.886866978236607, "logits_per_char": -0.7991891634666314, "num_chars": 177}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 219, "native_id": 18110, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 48.3927001953125, "incorrect_loss_raw": 80.8557357788086, "correct_loss_per_char": 0.45226822612441586, "incorrect_loss_per_char": 0.6676425878552422, "correct_loss_per_token": 1.9357080078125, "incorrect_loss_per_token": 2.952149585512807, "correct_loss_uncond": -32.93987274169922, "incorrect_loss_uncond": -18.679845174153645}, "model_output": [{"sum_logits": -79.51676177978516, "num_tokens": 26, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -95.57258605957031, "logits_per_token": -3.058336991530198, "logits_per_char": -0.6914501024329144, "num_chars": 115}, {"sum_logits": -105.77264404296875, "num_tokens": 38, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -126.25064849853516, "logits_per_token": -2.7834906327097038, "logits_per_char": -0.6295990716843378, "num_chars": 168}, {"sum_logits": -57.277801513671875, "num_tokens": 19, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -76.78350830078125, "logits_per_token": -3.0146211322985197, "logits_per_char": -0.6818785894484747, "num_chars": 84}, {"sum_logits": -48.3927001953125, "num_tokens": 25, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -81.33257293701172, "logits_per_token": -1.9357080078125, "logits_per_char": -0.45226822612441586, "num_chars": 107}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 220, "native_id": 1129, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 84.71614837646484, "incorrect_loss_raw": 110.22304026285808, "correct_loss_per_char": 0.543052233182467, "incorrect_loss_per_char": 0.6053860884763504, "correct_loss_per_token": 2.4204613821847096, "incorrect_loss_per_token": 2.93878107189265, "correct_loss_uncond": -23.467529296875, "incorrect_loss_uncond": -12.909591674804688}, "model_output": [{"sum_logits": -84.71614837646484, "num_tokens": 35, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -108.18367767333984, "logits_per_token": -2.4204613821847096, "logits_per_char": -0.543052233182467, "num_chars": 156}, {"sum_logits": -104.09200286865234, "num_tokens": 38, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -120.59026336669922, "logits_per_token": -2.739263233385588, "logits_per_char": -0.584786532969957, "num_chars": 178}, {"sum_logits": -80.29568481445312, "num_tokens": 32, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -95.04841613769531, "logits_per_token": -2.50924015045166, "logits_per_char": -0.46146945295662717, "num_chars": 174}, {"sum_logits": -146.28143310546875, "num_tokens": 41, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -153.75921630859375, "logits_per_token": -3.5678398318407014, "logits_per_char": -0.7699022795024671, "num_chars": 190}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 221, "native_id": 42143, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 120.2701416015625, "incorrect_loss_raw": 71.3087984720866, "correct_loss_per_char": 0.639734795752992, "incorrect_loss_per_char": 0.6470525904668181, "correct_loss_per_token": 2.9334180878429876, "incorrect_loss_per_token": 3.349285812088938, "correct_loss_uncond": -44.75611877441406, "incorrect_loss_uncond": -25.792306264241535}, "model_output": [{"sum_logits": -83.51036071777344, "num_tokens": 26, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -113.39198303222656, "logits_per_token": -3.2119369506835938, "logits_per_char": -0.6734706509497858, "num_chars": 124}, {"sum_logits": -77.15145874023438, "num_tokens": 22, "num_tokens_all": 439, "is_greedy": false, "sum_logits_uncond": -105.87249755859375, "logits_per_token": -3.5068844881924717, "logits_per_char": -0.6483315860523897, "num_chars": 119}, {"sum_logits": -53.26457595825195, "num_tokens": 16, "num_tokens_all": 433, "is_greedy": false, "sum_logits_uncond": -72.03883361816406, "logits_per_token": -3.329035997390747, "logits_per_char": -0.6193555343982785, "num_chars": 86}, {"sum_logits": -120.2701416015625, "num_tokens": 41, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -165.02626037597656, "logits_per_token": -2.9334180878429876, "logits_per_char": -0.639734795752992, "num_chars": 188}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 222, "native_id": 38774, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 86.68795776367188, "incorrect_loss_raw": 110.70937856038411, "correct_loss_per_char": 0.6421330204716436, "incorrect_loss_per_char": 0.5262560913334277, "correct_loss_per_token": 2.708998680114746, "incorrect_loss_per_token": 2.3928913515168415, "correct_loss_uncond": -21.25788116455078, "incorrect_loss_uncond": -20.16070810953776}, "model_output": [{"sum_logits": -139.0269317626953, "num_tokens": 53, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -161.25405883789062, "logits_per_token": -2.6231496558999114, "logits_per_char": -0.5651501291166476, "num_chars": 246}, {"sum_logits": -101.43494415283203, "num_tokens": 46, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -120.289306640625, "logits_per_token": -2.205107481583305, "logits_per_char": -0.5071747207641601, "num_chars": 200}, {"sum_logits": -86.68795776367188, "num_tokens": 32, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -107.94583892822266, "logits_per_token": -2.708998680114746, "logits_per_char": -0.6421330204716436, "num_chars": 135}, {"sum_logits": -91.666259765625, "num_tokens": 39, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -111.06689453125, "logits_per_token": -2.3504169170673075, "logits_per_char": -0.5064434241194752, "num_chars": 181}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 223, "native_id": 44846, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 97.70091247558594, "incorrect_loss_raw": 131.98968251546225, "correct_loss_per_char": 0.5680285609045694, "incorrect_loss_per_char": 0.6695599222134447, "correct_loss_per_token": 2.505151601938101, "incorrect_loss_per_token": 3.043042419303177, "correct_loss_uncond": -14.551315307617188, "incorrect_loss_uncond": -14.078656514485678}, "model_output": [{"sum_logits": -102.40298461914062, "num_tokens": 30, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -112.03495788574219, "logits_per_token": -3.413432820638021, "logits_per_char": -0.7062274801320043, "num_chars": 145}, {"sum_logits": -196.7954864501953, "num_tokens": 65, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -204.62185668945312, "logits_per_token": -3.0276228684645434, "logits_per_char": -0.7398326558278019, "num_chars": 266}, {"sum_logits": -96.77057647705078, "num_tokens": 36, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -121.54820251464844, "logits_per_token": -2.6880715688069663, "logits_per_char": -0.5626196306805278, "num_chars": 172}, {"sum_logits": -97.70091247558594, "num_tokens": 39, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -112.25222778320312, "logits_per_token": -2.505151601938101, "logits_per_char": -0.5680285609045694, "num_chars": 172}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 224, "native_id": 31597, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 77.9513931274414, "incorrect_loss_raw": 117.16753896077473, "correct_loss_per_char": 0.5339136515578179, "incorrect_loss_per_char": 0.6699562357632654, "correct_loss_per_token": 2.5983797709147134, "incorrect_loss_per_token": 2.8894036972229546, "correct_loss_uncond": -21.655372619628906, "incorrect_loss_uncond": -29.49797821044922}, "model_output": [{"sum_logits": -131.97935485839844, "num_tokens": 49, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -165.33624267578125, "logits_per_token": -2.693456221599968, "logits_per_char": -0.6469576218548942, "num_chars": 204}, {"sum_logits": -77.9513931274414, "num_tokens": 30, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -99.60676574707031, "logits_per_token": -2.5983797709147134, "logits_per_char": -0.5339136515578179, "num_chars": 146}, {"sum_logits": -101.46670532226562, "num_tokens": 39, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -138.83981323242188, "logits_per_token": -2.601710392878606, "logits_per_char": -0.6341669082641601, "num_chars": 160}, {"sum_logits": -118.05655670166016, "num_tokens": 35, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -135.82049560546875, "logits_per_token": -3.37304447719029, "logits_per_char": -0.7287441771707417, "num_chars": 162}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 225, "native_id": 33536, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 54.987030029296875, "incorrect_loss_raw": 80.17089589436848, "correct_loss_per_char": 0.3352867684713224, "incorrect_loss_per_char": 0.5131071487937295, "correct_loss_per_token": 1.4861359467377533, "incorrect_loss_per_token": 2.384582921293004, "correct_loss_uncond": -16.24224853515625, "incorrect_loss_uncond": -15.339810689290365}, "model_output": [{"sum_logits": -101.52291870117188, "num_tokens": 43, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -116.3804702758789, "logits_per_token": -2.3609981093295787, "logits_per_char": -0.5429033085624164, "num_chars": 187}, {"sum_logits": -63.82953643798828, "num_tokens": 29, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -76.36370086669922, "logits_per_token": -2.2010184978616647, "logits_per_char": -0.49866825342178345, "num_chars": 128}, {"sum_logits": -75.16023254394531, "num_tokens": 29, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -93.78794860839844, "logits_per_token": -2.5917321566877694, "logits_per_char": -0.4977498843969888, "num_chars": 151}, {"sum_logits": -54.987030029296875, "num_tokens": 37, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -71.22927856445312, "logits_per_token": -1.4861359467377533, "logits_per_char": -0.3352867684713224, "num_chars": 164}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 226, "native_id": 6584, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 27.378211975097656, "incorrect_loss_raw": 37.276885986328125, "correct_loss_per_char": 0.5475642395019531, "incorrect_loss_per_char": 0.6098448007357514, "correct_loss_per_token": 2.281517664591471, "incorrect_loss_per_token": 2.9824643615983493, "correct_loss_uncond": -30.326690673828125, "incorrect_loss_uncond": -27.14678955078125}, "model_output": [{"sum_logits": -27.378211975097656, "num_tokens": 12, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -57.70490264892578, "logits_per_token": -2.281517664591471, "logits_per_char": -0.5475642395019531, "num_chars": 50}, {"sum_logits": -46.976318359375, "num_tokens": 13, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -68.53413391113281, "logits_per_token": -3.6135629507211537, "logits_per_char": -0.7227125901442307, "num_chars": 65}, {"sum_logits": -34.54811477661133, "num_tokens": 15, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -71.01596069335938, "logits_per_token": -2.3032076517740885, "logits_per_char": -0.5006973156030627, "num_chars": 69}, {"sum_logits": -30.306224822998047, "num_tokens": 10, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -53.72093200683594, "logits_per_token": -3.0306224822998047, "logits_per_char": -0.6061244964599609, "num_chars": 50}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 227, "native_id": 32716, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 95.48199462890625, "incorrect_loss_raw": 128.84195454915366, "correct_loss_per_char": 0.4133419680905032, "incorrect_loss_per_char": 0.5579322985652285, "correct_loss_per_token": 2.3870498657226564, "incorrect_loss_per_token": 2.8128675482313183, "correct_loss_uncond": -28.496009826660156, "incorrect_loss_uncond": -15.22833506266276}, "model_output": [{"sum_logits": -91.00408172607422, "num_tokens": 35, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -102.33000946044922, "logits_per_token": -2.6001166207449775, "logits_per_char": -0.5141473543846001, "num_chars": 177}, {"sum_logits": -114.59575653076172, "num_tokens": 37, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -139.13238525390625, "logits_per_token": -3.097182608939506, "logits_per_char": -0.5906997759317615, "num_chars": 194}, {"sum_logits": -180.926025390625, "num_tokens": 66, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -190.74847412109375, "logits_per_token": -2.7413034150094697, "logits_per_char": -0.5689497653793238, "num_chars": 318}, {"sum_logits": -95.48199462890625, "num_tokens": 40, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -123.9780044555664, "logits_per_token": -2.3870498657226564, "logits_per_char": -0.4133419680905032, "num_chars": 231}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 228, "native_id": 26207, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 59.21097183227539, "incorrect_loss_raw": 58.51919428507487, "correct_loss_per_char": 0.5639140174502418, "incorrect_loss_per_char": 0.7074521683933517, "correct_loss_per_token": 2.5743900796641475, "incorrect_loss_per_token": 2.9447534981594288, "correct_loss_uncond": -25.717296600341797, "incorrect_loss_uncond": -18.915167490641277}, "model_output": [{"sum_logits": -35.98535919189453, "num_tokens": 13, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -51.19422912597656, "logits_per_token": -2.7681045532226562, "logits_per_char": -0.5370949133118587, "num_chars": 67}, {"sum_logits": -16.862125396728516, "num_tokens": 8, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -36.226959228515625, "logits_per_token": -2.1077656745910645, "logits_per_char": -0.6485432844895583, "num_chars": 26}, {"sum_logits": -59.21097183227539, "num_tokens": 23, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -84.92826843261719, "logits_per_token": -2.5743900796641475, "logits_per_char": -0.5639140174502418, "num_chars": 105}, {"sum_logits": -122.71009826660156, "num_tokens": 31, "num_tokens_all": 437, "is_greedy": false, "sum_logits_uncond": -144.88189697265625, "logits_per_token": -3.9583902666645665, "logits_per_char": -0.9367183073786379, "num_chars": 131}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 229, "native_id": 6232, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 32.304115295410156, "incorrect_loss_raw": 50.12181027730306, "correct_loss_per_char": 0.44252212733438573, "incorrect_loss_per_char": 0.47903566948522475, "correct_loss_per_token": 2.3074368068150113, "incorrect_loss_per_token": 2.347367898011819, "correct_loss_uncond": -29.755287170410156, "incorrect_loss_uncond": -26.883644739786785}, "model_output": [{"sum_logits": -13.888162612915039, "num_tokens": 13, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -40.23267364501953, "logits_per_token": -1.0683202009934645, "logits_per_char": -0.2525120475075462, "num_chars": 55}, {"sum_logits": -32.304115295410156, "num_tokens": 14, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -62.05940246582031, "logits_per_token": -2.3074368068150113, "logits_per_char": -0.44252212733438573, "num_chars": 73}, {"sum_logits": -99.08306121826172, "num_tokens": 30, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -125.39341735839844, "logits_per_token": -3.3027687072753906, "logits_per_char": -0.7394258299870278, "num_chars": 134}, {"sum_logits": -37.39420700073242, "num_tokens": 14, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -65.39027404785156, "logits_per_token": -2.6710147857666016, "logits_per_char": -0.4451691309611003, "num_chars": 84}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 230, "native_id": 20471, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 141.689208984375, "incorrect_loss_raw": 90.80433654785156, "correct_loss_per_char": 0.5534734725952148, "incorrect_loss_per_char": 0.5247475855941267, "correct_loss_per_token": 2.623874240451389, "incorrect_loss_per_token": 2.6333082701667903, "correct_loss_uncond": -25.3548583984375, "incorrect_loss_uncond": -19.6074956258138}, "model_output": [{"sum_logits": -111.06660461425781, "num_tokens": 45, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -126.2925796508789, "logits_per_token": -2.4681467692057293, "logits_per_char": -0.49583305631365093, "num_chars": 224}, {"sum_logits": -71.91456604003906, "num_tokens": 25, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -94.90902709960938, "logits_per_token": -2.8765826416015625, "logits_per_char": -0.4700298433989481, "num_chars": 153}, {"sum_logits": -141.689208984375, "num_tokens": 54, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -167.0440673828125, "logits_per_token": -2.623874240451389, "logits_per_char": -0.5534734725952148, "num_chars": 256}, {"sum_logits": -89.43183898925781, "num_tokens": 35, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -110.03388977050781, "logits_per_token": -2.5551953996930803, "logits_per_char": -0.608379857069781, "num_chars": 147}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 231, "native_id": 33713, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 69.1914291381836, "incorrect_loss_raw": 127.18106587727864, "correct_loss_per_char": 0.45520677064594467, "incorrect_loss_per_char": 0.5895904974149596, "correct_loss_per_token": 2.035042033475988, "incorrect_loss_per_token": 2.9515465953362683, "correct_loss_uncond": -33.190406799316406, "incorrect_loss_uncond": -23.258555094401043}, "model_output": [{"sum_logits": -161.1693115234375, "num_tokens": 63, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -184.860595703125, "logits_per_token": -2.5582430400545637, "logits_per_char": -0.5301622089586759, "num_chars": 304}, {"sum_logits": -69.1914291381836, "num_tokens": 34, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -102.3818359375, "logits_per_token": -2.035042033475988, "logits_per_char": -0.45520677064594467, "num_chars": 152}, {"sum_logits": -118.42985534667969, "num_tokens": 35, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -141.30526733398438, "logits_per_token": -3.383710152762277, "logits_per_char": -0.6845656378420791, "num_chars": 173}, {"sum_logits": -101.94403076171875, "num_tokens": 35, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -125.15299987792969, "logits_per_token": -2.9126865931919643, "logits_per_char": -0.5540436454441237, "num_chars": 184}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 232, "native_id": 15278, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 19.6336669921875, "incorrect_loss_raw": 39.08354822794596, "correct_loss_per_char": 0.46746826171875, "incorrect_loss_per_char": 0.7915473391498599, "correct_loss_per_token": 2.1815185546875, "incorrect_loss_per_token": 3.051846019805424, "correct_loss_uncond": -22.532798767089844, "incorrect_loss_uncond": -19.944683074951172}, "model_output": [{"sum_logits": -27.968158721923828, "num_tokens": 12, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -53.54822540283203, "logits_per_token": -2.3306798934936523, "logits_per_char": -0.621514638264974, "num_chars": 45}, {"sum_logits": -62.262718200683594, "num_tokens": 21, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -82.02523803710938, "logits_per_token": -2.964891342889695, "logits_per_char": -0.7881356734263746, "num_chars": 79}, {"sum_logits": -19.6336669921875, "num_tokens": 9, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -42.166465759277344, "logits_per_token": -2.1815185546875, "logits_per_char": -0.46746826171875, "num_chars": 42}, {"sum_logits": -27.01976776123047, "num_tokens": 7, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -41.51123046875, "logits_per_token": -3.859966823032924, "logits_per_char": -0.964991705758231, "num_chars": 28}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 233, "native_id": 35403, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.2961297035217285, "incorrect_loss_raw": 29.76980972290039, "correct_loss_per_char": 0.25184518814086915, "incorrect_loss_per_char": 0.7054521409325747, "correct_loss_per_token": 1.0493549505869548, "incorrect_loss_per_token": 2.8657394414225585, "correct_loss_uncond": -20.613755702972412, "incorrect_loss_uncond": -20.32391611735026}, "model_output": [{"sum_logits": -43.29747772216797, "num_tokens": 14, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -62.69757080078125, "logits_per_token": -3.092676980154855, "logits_per_char": -0.6983464148736769, "num_chars": 62}, {"sum_logits": -17.78058624267578, "num_tokens": 9, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -37.61384201049805, "logits_per_token": -1.9756206936306424, "logits_per_char": -0.44451465606689455, "num_chars": 40}, {"sum_logits": -28.231365203857422, "num_tokens": 8, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -49.969764709472656, "logits_per_token": -3.5289206504821777, "logits_per_char": -0.9734953518571525, "num_chars": 29}, {"sum_logits": -6.2961297035217285, "num_tokens": 6, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -26.90988540649414, "logits_per_token": -1.0493549505869548, "logits_per_char": -0.25184518814086915, "num_chars": 25}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 234, "native_id": 47282, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 60.75503921508789, "incorrect_loss_raw": 88.74298604329427, "correct_loss_per_char": 0.41050702172356685, "incorrect_loss_per_char": 0.5762963450747013, "correct_loss_per_token": 2.09500135224441, "incorrect_loss_per_token": 2.6364914952662954, "correct_loss_uncond": -18.30660629272461, "incorrect_loss_uncond": -17.749664306640625}, "model_output": [{"sum_logits": -60.75503921508789, "num_tokens": 29, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -79.0616455078125, "logits_per_token": -2.09500135224441, "logits_per_char": -0.41050702172356685, "num_chars": 148}, {"sum_logits": -83.0809326171875, "num_tokens": 32, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -100.54556274414062, "logits_per_token": -2.5962791442871094, "logits_per_char": -0.5651764123618197, "num_chars": 147}, {"sum_logits": -100.01811981201172, "num_tokens": 32, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -106.86367797851562, "logits_per_token": -3.125566244125366, "logits_per_char": -0.6537132013856976, "num_chars": 153}, {"sum_logits": -83.1299057006836, "num_tokens": 38, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -112.06871032714844, "logits_per_token": -2.1876290973864103, "logits_per_char": -0.5099994214765865, "num_chars": 163}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 235, "native_id": 16464, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 53.7332763671875, "incorrect_loss_raw": 67.29735565185547, "correct_loss_per_char": 0.4165370261022287, "incorrect_loss_per_char": 0.5316785033857552, "correct_loss_per_token": 1.7911092122395833, "incorrect_loss_per_token": 2.481606617680302, "correct_loss_uncond": -23.48956298828125, "incorrect_loss_uncond": -28.419898986816406}, "model_output": [{"sum_logits": -81.43843078613281, "num_tokens": 36, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -119.22554016113281, "logits_per_token": -2.2621786329481335, "logits_per_char": -0.44746390541831216, "num_chars": 182}, {"sum_logits": -64.80313110351562, "num_tokens": 27, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -93.80618286132812, "logits_per_token": -2.400115966796875, "logits_per_char": -0.5491790771484375, "num_chars": 118}, {"sum_logits": -53.7332763671875, "num_tokens": 30, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -77.22283935546875, "logits_per_token": -1.7911092122395833, "logits_per_char": -0.4165370261022287, "num_chars": 129}, {"sum_logits": -55.65050506591797, "num_tokens": 20, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -74.12004089355469, "logits_per_token": -2.7825252532958986, "logits_per_char": -0.5983925275905158, "num_chars": 93}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 236, "native_id": 18395, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 101.52369689941406, "incorrect_loss_raw": 76.07755025227864, "correct_loss_per_char": 0.5001167334946506, "incorrect_loss_per_char": 0.5878828702027924, "correct_loss_per_token": 2.5380924224853514, "incorrect_loss_per_token": 2.7182677310446035, "correct_loss_uncond": -26.903030395507812, "incorrect_loss_uncond": -27.619537353515625}, "model_output": [{"sum_logits": -74.35061645507812, "num_tokens": 32, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -108.78547668457031, "logits_per_token": -2.3234567642211914, "logits_per_char": -0.5199343808047421, "num_chars": 143}, {"sum_logits": -101.52369689941406, "num_tokens": 40, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -128.42672729492188, "logits_per_token": -2.5380924224853514, "logits_per_char": -0.5001167334946506, "num_chars": 203}, {"sum_logits": -84.6902847290039, "num_tokens": 30, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -108.00537872314453, "logits_per_token": -2.823009490966797, "logits_per_char": -0.6367690581128114, "num_chars": 133}, {"sum_logits": -69.1917495727539, "num_tokens": 23, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -94.30040740966797, "logits_per_token": -3.008336937945822, "logits_per_char": -0.6069451716908237, "num_chars": 114}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 237, "native_id": 11517, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 98.66410064697266, "incorrect_loss_raw": 153.1951446533203, "correct_loss_per_char": 0.4385071139865451, "incorrect_loss_per_char": 0.7357921798507889, "correct_loss_per_token": 2.05550209681193, "incorrect_loss_per_token": 3.327351337463206, "correct_loss_uncond": -26.325790405273438, "incorrect_loss_uncond": -21.273325602213543}, "model_output": [{"sum_logits": -116.15217590332031, "num_tokens": 41, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -141.8931884765625, "logits_per_token": -2.8329799000809834, "logits_per_char": -0.6913819994245257, "num_chars": 168}, {"sum_logits": -208.01124572753906, "num_tokens": 53, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -229.7193145751953, "logits_per_token": -3.924740485425265, "logits_per_char": -0.8320449829101563, "num_chars": 250}, {"sum_logits": -98.66410064697266, "num_tokens": 48, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -124.9898910522461, "logits_per_token": -2.05550209681193, "logits_per_char": -0.4385071139865451, "num_chars": 225}, {"sum_logits": -135.42201232910156, "num_tokens": 42, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -151.79290771484375, "logits_per_token": -3.2243336268833707, "logits_per_char": -0.6839495572176847, "num_chars": 198}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 238, "native_id": 12495, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 158.43203735351562, "incorrect_loss_raw": 123.44061279296875, "correct_loss_per_char": 0.6440326721687627, "incorrect_loss_per_char": 0.5854003543268937, "correct_loss_per_token": 3.1065105363434435, "incorrect_loss_per_token": 2.859813975661147, "correct_loss_uncond": -26.125869750976562, "incorrect_loss_uncond": -18.554583231608074}, "model_output": [{"sum_logits": -158.43203735351562, "num_tokens": 51, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -184.5579071044922, "logits_per_token": -3.1065105363434435, "logits_per_char": -0.6440326721687627, "num_chars": 246}, {"sum_logits": -75.27301025390625, "num_tokens": 34, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -97.93389129638672, "logits_per_token": -2.2139120662913605, "logits_per_char": -0.4205196103570182, "num_chars": 179}, {"sum_logits": -153.56008911132812, "num_tokens": 41, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -161.24212646484375, "logits_per_token": -3.745368027105564, "logits_per_char": -0.8211769471194017, "num_chars": 187}, {"sum_logits": -141.48873901367188, "num_tokens": 54, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -166.8095703125, "logits_per_token": -2.620161833586516, "logits_per_char": -0.5145045055042614, "num_chars": 275}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 239, "native_id": 46596, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 121.33552551269531, "incorrect_loss_raw": 107.77375030517578, "correct_loss_per_char": 0.47958705736243207, "incorrect_loss_per_char": 0.6099756938457688, "correct_loss_per_token": 2.6377288154933765, "incorrect_loss_per_token": 2.952771202892522, "correct_loss_uncond": -19.413436889648438, "incorrect_loss_uncond": -17.93834686279297}, "model_output": [{"sum_logits": -119.99307250976562, "num_tokens": 43, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -140.5275421142578, "logits_per_token": -2.7905365699945492, "logits_per_char": -0.6185209923183795, "num_chars": 194}, {"sum_logits": -98.21595001220703, "num_tokens": 33, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -113.45697021484375, "logits_per_token": -2.9762409094608193, "logits_per_char": -0.5743622807731406, "num_chars": 171}, {"sum_logits": -105.11222839355469, "num_tokens": 34, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -123.15177917480469, "logits_per_token": -3.0915361292221966, "logits_per_char": -0.637043808445786, "num_chars": 165}, {"sum_logits": -121.33552551269531, "num_tokens": 46, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -140.74896240234375, "logits_per_token": -2.6377288154933765, "logits_per_char": -0.47958705736243207, "num_chars": 253}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 240, "native_id": 16343, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 104.4254379272461, "incorrect_loss_raw": 112.05188242594402, "correct_loss_per_char": 0.5069196015885733, "incorrect_loss_per_char": 0.6140261406129391, "correct_loss_per_token": 2.6106359481811525, "incorrect_loss_per_token": 2.5745672546098137, "correct_loss_uncond": -17.452545166015625, "incorrect_loss_uncond": -19.86382802327474}, "model_output": [{"sum_logits": -136.10272216796875, "num_tokens": 51, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -155.39718627929688, "logits_per_token": -2.6686808268229165, "logits_per_char": -0.6243244136145355, "num_chars": 218}, {"sum_logits": -133.818115234375, "num_tokens": 47, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -156.7126922607422, "logits_per_token": -2.847193941156915, "logits_per_char": -0.7043058696546053, "num_chars": 190}, {"sum_logits": -104.4254379272461, "num_tokens": 40, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -121.87798309326172, "logits_per_token": -2.6106359481811525, "logits_per_char": -0.5069196015885733, "num_chars": 206}, {"sum_logits": -66.23480987548828, "num_tokens": 30, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -83.63725280761719, "logits_per_token": -2.2078269958496093, "logits_per_char": -0.5134481385696766, "num_chars": 129}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 241, "native_id": 44174, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 54.35227966308594, "incorrect_loss_raw": 95.1489766438802, "correct_loss_per_char": 0.46454939882979435, "incorrect_loss_per_char": 0.6207616177601738, "correct_loss_per_token": 1.811742655436198, "incorrect_loss_per_token": 2.7166323138566733, "correct_loss_uncond": -26.822105407714844, "incorrect_loss_uncond": -10.697669982910156}, "model_output": [{"sum_logits": -70.49405670166016, "num_tokens": 28, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -83.83606719970703, "logits_per_token": -2.5176448822021484, "logits_per_char": -0.5422619746281551, "num_chars": 130}, {"sum_logits": -146.30508422851562, "num_tokens": 46, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -148.69192504882812, "logits_per_token": -3.180545309315557, "logits_per_char": -0.8038740891676682, "num_chars": 182}, {"sum_logits": -54.35227966308594, "num_tokens": 30, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -81.17438507080078, "logits_per_token": -1.811742655436198, "logits_per_char": -0.46454939882979435, "num_chars": 117}, {"sum_logits": -68.64778900146484, "num_tokens": 28, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -85.01194763183594, "logits_per_token": -2.451706750052316, "logits_per_char": -0.5161487894846981, "num_chars": 133}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 242, "native_id": 12045, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 73.5050048828125, "incorrect_loss_raw": 75.55774688720703, "correct_loss_per_char": 0.6125417073567708, "incorrect_loss_per_char": 0.5723335212178365, "correct_loss_per_token": 2.371129189768145, "incorrect_loss_per_token": 2.2483594784478806, "correct_loss_uncond": -18.607864379882812, "incorrect_loss_uncond": -20.130760192871094}, "model_output": [{"sum_logits": -73.5050048828125, "num_tokens": 31, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -92.11286926269531, "logits_per_token": -2.371129189768145, "logits_per_char": -0.6125417073567708, "num_chars": 120}, {"sum_logits": -87.94352722167969, "num_tokens": 34, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -108.80282592773438, "logits_per_token": -2.5865743300494026, "logits_per_char": -0.6237129590190049, "num_chars": 141}, {"sum_logits": -87.76106262207031, "num_tokens": 33, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -105.99639892578125, "logits_per_token": -2.659426140062737, "logits_per_char": -0.650081945348669, "num_chars": 135}, {"sum_logits": -50.968650817871094, "num_tokens": 34, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -72.26629638671875, "logits_per_token": -1.4990779652315027, "logits_per_char": -0.4432056592858356, "num_chars": 115}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 243, "native_id": 44119, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 120.13674926757812, "incorrect_loss_raw": 100.6608174641927, "correct_loss_per_char": 0.6160858936798879, "incorrect_loss_per_char": 0.6687181761070734, "correct_loss_per_token": 2.669705539279514, "incorrect_loss_per_token": 3.0093074132241013, "correct_loss_uncond": -22.259368896484375, "incorrect_loss_uncond": -20.779917399088543}, "model_output": [{"sum_logits": -111.52889251708984, "num_tokens": 40, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -122.32609558105469, "logits_per_token": -2.788222312927246, "logits_per_char": -0.536196598639855, "num_chars": 208}, {"sum_logits": -78.39366149902344, "num_tokens": 33, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -110.05484008789062, "logits_per_token": -2.3755654999704072, "logits_per_char": -0.46942312274864334, "num_chars": 167}, {"sum_logits": -120.13674926757812, "num_tokens": 45, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -142.3961181640625, "logits_per_token": -2.669705539279514, "logits_per_char": -0.6160858936798879, "num_chars": 195}, {"sum_logits": -112.05989837646484, "num_tokens": 29, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -131.94126892089844, "logits_per_token": -3.86413442677465, "logits_per_char": -1.0005348069327218, "num_chars": 112}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 244, "native_id": 49259, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 12.554256439208984, "incorrect_loss_raw": 36.19780731201172, "correct_loss_per_char": 0.6277128219604492, "incorrect_loss_per_char": 0.8329727519809914, "correct_loss_per_token": 2.510851287841797, "incorrect_loss_per_token": 3.7851221579092518, "correct_loss_uncond": -15.690200805664062, "incorrect_loss_uncond": -16.257008870442707}, "model_output": [{"sum_logits": -29.51932144165039, "num_tokens": 10, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -47.244529724121094, "logits_per_token": -2.9519321441650392, "logits_per_char": -0.6864958474802416, "num_chars": 43}, {"sum_logits": -34.431915283203125, "num_tokens": 10, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -53.23557662963867, "logits_per_token": -3.4431915283203125, "logits_per_char": -0.6376280608000578, "num_chars": 54}, {"sum_logits": -44.64218521118164, "num_tokens": 9, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -56.884342193603516, "logits_per_token": -4.960242801242405, "logits_per_char": -1.1747943476626748, "num_chars": 38}, {"sum_logits": -12.554256439208984, "num_tokens": 5, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -28.244457244873047, "logits_per_token": -2.510851287841797, "logits_per_char": -0.6277128219604492, "num_chars": 20}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 245, "native_id": 37607, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 98.13504028320312, "incorrect_loss_raw": 117.96157836914062, "correct_loss_per_char": 0.5276077434580814, "incorrect_loss_per_char": 0.6519157923947816, "correct_loss_per_token": 2.5162830841846957, "incorrect_loss_per_token": 2.8260001754867923, "correct_loss_uncond": -31.73065185546875, "incorrect_loss_uncond": -17.56561533610026}, "model_output": [{"sum_logits": -128.62753295898438, "num_tokens": 39, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -147.6409149169922, "logits_per_token": -3.2981418707431893, "logits_per_char": -0.8245354676857973, "num_chars": 156}, {"sum_logits": -118.52357482910156, "num_tokens": 46, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -140.97592163085938, "logits_per_token": -2.5765994528065557, "logits_per_char": -0.5838599745275939, "num_chars": 203}, {"sum_logits": -98.13504028320312, "num_tokens": 39, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -129.86569213867188, "logits_per_token": -2.5162830841846957, "logits_per_char": -0.5276077434580814, "num_chars": 186}, {"sum_logits": -106.73362731933594, "num_tokens": 41, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -117.9647445678711, "logits_per_token": -2.6032592029106327, "logits_per_char": -0.5473519349709536, "num_chars": 195}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 246, "native_id": 10175, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 110.6102294921875, "incorrect_loss_raw": 104.65460459391277, "correct_loss_per_char": 0.7000647436214399, "incorrect_loss_per_char": 0.7174949973767274, "correct_loss_per_token": 2.83615973056891, "incorrect_loss_per_token": 3.6308270343885334, "correct_loss_uncond": -10.403564453125, "incorrect_loss_uncond": -14.659459431966146}, "model_output": [{"sum_logits": -127.07797241210938, "num_tokens": 36, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -141.85189819335938, "logits_per_token": -3.529943678114149, "logits_per_char": -0.7345547538272218, "num_chars": 173}, {"sum_logits": -110.6102294921875, "num_tokens": 39, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -121.0137939453125, "logits_per_token": -2.83615973056891, "logits_per_char": -0.7000647436214399, "num_chars": 158}, {"sum_logits": -100.044921875, "num_tokens": 31, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -117.52945709228516, "logits_per_token": -3.2272555443548385, "logits_per_char": -0.6063328598484848, "num_chars": 165}, {"sum_logits": -86.8409194946289, "num_tokens": 21, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -98.56083679199219, "logits_per_token": -4.135281880696614, "logits_per_char": -0.8115973784544758, "num_chars": 107}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 247, "native_id": 16955, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 31.303075790405273, "incorrect_loss_raw": 58.112448374430336, "correct_loss_per_char": 0.42880925740281195, "incorrect_loss_per_char": 0.7030791835408764, "correct_loss_per_token": 1.8413573994356043, "incorrect_loss_per_token": 3.15429123304623, "correct_loss_uncond": -19.565690994262695, "incorrect_loss_uncond": -25.38546371459961}, "model_output": [{"sum_logits": -55.145381927490234, "num_tokens": 20, "num_tokens_all": 432, "is_greedy": false, "sum_logits_uncond": -72.5860824584961, "logits_per_token": -2.757269096374512, "logits_per_char": -0.7877911703927176, "num_chars": 70}, {"sum_logits": -49.57984161376953, "num_tokens": 14, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -79.99591064453125, "logits_per_token": -3.541417258126395, "logits_per_char": -0.6886089113023546, "num_chars": 72}, {"sum_logits": -31.303075790405273, "num_tokens": 17, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -50.86876678466797, "logits_per_token": -1.8413573994356043, "logits_per_char": -0.42880925740281195, "num_chars": 73}, {"sum_logits": -69.61212158203125, "num_tokens": 22, "num_tokens_all": 434, "is_greedy": false, "sum_logits_uncond": -97.9117431640625, "logits_per_token": -3.164187344637784, "logits_per_char": -0.6328374689275568, "num_chars": 110}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 248, "native_id": 40811, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 48.391197204589844, "incorrect_loss_raw": 98.62795003255208, "correct_loss_per_char": 0.4937877265774474, "incorrect_loss_per_char": 0.5288831378829937, "correct_loss_per_token": 2.199599872935902, "incorrect_loss_per_token": 2.5315524672260703, "correct_loss_uncond": -18.93450927734375, "incorrect_loss_uncond": -16.134788513183594}, "model_output": [{"sum_logits": -48.391197204589844, "num_tokens": 22, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -67.3257064819336, "logits_per_token": -2.199599872935902, "logits_per_char": -0.4937877265774474, "num_chars": 98}, {"sum_logits": -98.64462280273438, "num_tokens": 39, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -109.87651062011719, "logits_per_token": -2.529349302634215, "logits_per_char": -0.5332141773120777, "num_chars": 185}, {"sum_logits": -95.150390625, "num_tokens": 40, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -118.98899841308594, "logits_per_token": -2.378759765625, "logits_per_char": -0.47338502798507465, "num_chars": 201}, {"sum_logits": -102.08883666992188, "num_tokens": 38, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -115.4227066040039, "logits_per_token": -2.6865483334189966, "logits_per_char": -0.5800502083518289, "num_chars": 176}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 249, "native_id": 49207, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 89.37562561035156, "incorrect_loss_raw": 131.80687459309897, "correct_loss_per_char": 0.46549805005391437, "incorrect_loss_per_char": 0.7169757731354673, "correct_loss_per_token": 1.9429483828337297, "incorrect_loss_per_token": 3.261101567847097, "correct_loss_uncond": -28.808067321777344, "incorrect_loss_uncond": -15.081459045410156}, "model_output": [{"sum_logits": -184.60598754882812, "num_tokens": 48, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -194.47787475585938, "logits_per_token": -3.8459580739339194, "logits_per_char": -0.8666947772245452, "num_chars": 213}, {"sum_logits": -89.37562561035156, "num_tokens": 46, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -118.1836929321289, "logits_per_token": -1.9429483828337297, "logits_per_char": -0.46549805005391437, "num_chars": 192}, {"sum_logits": -96.73428344726562, "num_tokens": 39, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -119.32561492919922, "logits_per_token": -2.4803662422375803, "logits_per_char": -0.5434510306026159, "num_chars": 178}, {"sum_logits": -114.08035278320312, "num_tokens": 33, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -126.86151123046875, "logits_per_token": -3.4569803873697915, "logits_per_char": -0.7407815115792411, "num_chars": 154}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 250, "native_id": 777, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 82.5485610961914, "incorrect_loss_raw": 74.54717508951823, "correct_loss_per_char": 0.4637559612145585, "incorrect_loss_per_char": 0.5708684035444859, "correct_loss_per_token": 1.9654419308617002, "incorrect_loss_per_token": 2.669172127191608, "correct_loss_uncond": -19.37413787841797, "incorrect_loss_uncond": -25.8429438273112}, "model_output": [{"sum_logits": -82.5485610961914, "num_tokens": 42, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -101.92269897460938, "logits_per_token": -1.9654419308617002, "logits_per_char": -0.4637559612145585, "num_chars": 178}, {"sum_logits": -52.395835876464844, "num_tokens": 22, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -78.53997039794922, "logits_per_token": -2.381628903475675, "logits_per_char": -0.4851466284857856, "num_chars": 108}, {"sum_logits": -65.11207580566406, "num_tokens": 26, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -92.53651428222656, "logits_per_token": -2.5043106079101562, "logits_per_char": -0.5919279618696733, "num_chars": 110}, {"sum_logits": -106.13361358642578, "num_tokens": 34, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -130.0938720703125, "logits_per_token": -3.1215768701889934, "logits_per_char": -0.6355306202779987, "num_chars": 167}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 251, "native_id": 17293, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 54.209041595458984, "incorrect_loss_raw": 103.04803212483723, "correct_loss_per_char": 0.49733065683907324, "incorrect_loss_per_char": 0.5180849250939247, "correct_loss_per_token": 2.2587100664774575, "incorrect_loss_per_token": 2.4373090017409553, "correct_loss_uncond": -16.892887115478516, "incorrect_loss_uncond": -25.472890218098957}, "model_output": [{"sum_logits": -54.209041595458984, "num_tokens": 24, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -71.1019287109375, "logits_per_token": -2.2587100664774575, "logits_per_char": -0.49733065683907324, "num_chars": 109}, {"sum_logits": -153.08616638183594, "num_tokens": 56, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -180.03060913085938, "logits_per_token": -2.7336815425327847, "logits_per_char": -0.5712170387381938, "num_chars": 268}, {"sum_logits": -92.89019012451172, "num_tokens": 40, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -116.8499984741211, "logits_per_token": -2.322254753112793, "logits_per_char": -0.5562286833803097, "num_chars": 167}, {"sum_logits": -63.16773986816406, "num_tokens": 28, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -88.68215942382812, "logits_per_token": -2.2559907095772878, "logits_per_char": -0.42680905316327067, "num_chars": 148}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 252, "native_id": 37708, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 126.28013610839844, "incorrect_loss_raw": 100.3545405069987, "correct_loss_per_char": 0.5466672558805127, "incorrect_loss_per_char": 0.5737736496295652, "correct_loss_per_token": 2.8062252468532987, "incorrect_loss_per_token": 2.6947192924023877, "correct_loss_uncond": -19.021408081054688, "incorrect_loss_uncond": -22.533114115397137}, "model_output": [{"sum_logits": -126.28013610839844, "num_tokens": 45, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -145.30154418945312, "logits_per_token": -2.8062252468532987, "logits_per_char": -0.5466672558805127, "num_chars": 231}, {"sum_logits": -99.1696548461914, "num_tokens": 43, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -122.80172729492188, "logits_per_token": -2.306271042934684, "logits_per_char": -0.4790804581941614, "num_chars": 207}, {"sum_logits": -105.32608032226562, "num_tokens": 29, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -122.29963684082031, "logits_per_token": -3.631933804216056, "logits_per_char": -0.7801931875723379, "num_chars": 135}, {"sum_logits": -96.56788635253906, "num_tokens": 45, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -123.56159973144531, "logits_per_token": -2.1459530300564236, "logits_per_char": -0.4620473031221965, "num_chars": 209}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 253, "native_id": 16773, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 67.4973373413086, "incorrect_loss_raw": 136.93880462646484, "correct_loss_per_char": 0.32765697738499316, "incorrect_loss_per_char": 0.6653925138088997, "correct_loss_per_token": 1.4673334204632302, "incorrect_loss_per_token": 2.9407130273257294, "correct_loss_uncond": -48.40972137451172, "incorrect_loss_uncond": -20.32459767659505}, "model_output": [{"sum_logits": -147.7869110107422, "num_tokens": 52, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -175.79336547851562, "logits_per_token": -2.8420559809758115, "logits_per_char": -0.6481882061874658, "num_chars": 228}, {"sum_logits": -179.7742919921875, "num_tokens": 52, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -185.91180419921875, "logits_per_token": -3.4571979229266825, "logits_per_char": -0.7816273564877717, "num_chars": 230}, {"sum_logits": -83.25521087646484, "num_tokens": 33, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -110.08503723144531, "logits_per_token": -2.5228851780746924, "logits_per_char": -0.5663619787514615, "num_chars": 147}, {"sum_logits": -67.4973373413086, "num_tokens": 46, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -115.90705871582031, "logits_per_token": -1.4673334204632302, "logits_per_char": -0.32765697738499316, "num_chars": 206}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 254, "native_id": 15071, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 88.00878143310547, "incorrect_loss_raw": 83.06903584798177, "correct_loss_per_char": 0.4681318161335397, "incorrect_loss_per_char": 0.4964563653441643, "correct_loss_per_token": 1.833516279856364, "incorrect_loss_per_token": 2.243603507551567, "correct_loss_uncond": -16.665489196777344, "incorrect_loss_uncond": -24.27989451090495}, "model_output": [{"sum_logits": -83.78410339355469, "num_tokens": 35, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -104.13062286376953, "logits_per_token": -2.393831525530134, "logits_per_char": -0.5302791354022448, "num_chars": 158}, {"sum_logits": -73.208740234375, "num_tokens": 32, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -99.21992492675781, "logits_per_token": -2.2877731323242188, "logits_per_char": -0.4980186410501701, "num_chars": 147}, {"sum_logits": -92.21426391601562, "num_tokens": 45, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -118.69624328613281, "logits_per_token": -2.049205864800347, "logits_per_char": -0.46107131958007813, "num_chars": 200}, {"sum_logits": -88.00878143310547, "num_tokens": 48, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -104.67427062988281, "logits_per_token": -1.833516279856364, "logits_per_char": -0.4681318161335397, "num_chars": 188}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 255, "native_id": 31654, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 88.91027069091797, "incorrect_loss_raw": 175.88167317708334, "correct_loss_per_char": 0.5849359913876182, "incorrect_loss_per_char": 0.7363775257979256, "correct_loss_per_token": 2.615007961497587, "incorrect_loss_per_token": 3.2844525385959717, "correct_loss_uncond": -39.21778106689453, "incorrect_loss_uncond": -16.476053873697918}, "model_output": [{"sum_logits": -142.53646850585938, "num_tokens": 40, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -146.95413208007812, "logits_per_token": -3.5634117126464844, "logits_per_char": -0.7962931201444657, "num_chars": 179}, {"sum_logits": -166.45669555664062, "num_tokens": 55, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -191.03277587890625, "logits_per_token": -3.0264853737571022, "logits_per_char": -0.6402180598332332, "num_chars": 260}, {"sum_logits": -88.91027069091797, "num_tokens": 34, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -128.1280517578125, "logits_per_token": -2.615007961497587, "logits_per_char": -0.5849359913876182, "num_chars": 152}, {"sum_logits": -218.65185546875, "num_tokens": 67, "num_tokens_all": 496, "is_greedy": false, "sum_logits_uncond": -239.08627319335938, "logits_per_token": -3.263460529384328, "logits_per_char": -0.7726213974160777, "num_chars": 283}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 256, "native_id": 8627, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 103.44397735595703, "incorrect_loss_raw": 139.2269744873047, "correct_loss_per_char": 0.525096331756127, "incorrect_loss_per_char": 0.6205628052417332, "correct_loss_per_token": 2.4056738919990006, "incorrect_loss_per_token": 3.203771196044047, "correct_loss_uncond": -22.72663116455078, "incorrect_loss_uncond": -15.89337412516276}, "model_output": [{"sum_logits": -85.94447326660156, "num_tokens": 30, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -109.48772430419922, "logits_per_token": -2.8648157755533856, "logits_per_char": -0.5371529579162597, "num_chars": 160}, {"sum_logits": -174.37933349609375, "num_tokens": 62, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -190.71987915039062, "logits_per_token": -2.8125698950982865, "logits_per_char": -0.568010858293465, "num_chars": 307}, {"sum_logits": -157.35711669921875, "num_tokens": 40, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -165.1534423828125, "logits_per_token": -3.9339279174804687, "logits_per_char": -0.7565245995154748, "num_chars": 208}, {"sum_logits": -103.44397735595703, "num_tokens": 43, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -126.17060852050781, "logits_per_token": -2.4056738919990006, "logits_per_char": -0.525096331756127, "num_chars": 197}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 257, "native_id": 20598, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 35.43183517456055, "incorrect_loss_raw": 48.996477127075195, "correct_loss_per_char": 0.49903993203606406, "incorrect_loss_per_char": 0.5357859607396781, "correct_loss_per_token": 2.362122344970703, "incorrect_loss_per_token": 2.6577848590482867, "correct_loss_uncond": -18.674312591552734, "incorrect_loss_uncond": -22.207131067911785}, "model_output": [{"sum_logits": -35.43183517456055, "num_tokens": 15, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -54.10614776611328, "logits_per_token": -2.362122344970703, "logits_per_char": -0.49903993203606406, "num_chars": 71}, {"sum_logits": -28.62629508972168, "num_tokens": 13, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -53.20866394042969, "logits_per_token": -2.20202269920936, "logits_per_char": -0.5111838408878872, "num_chars": 56}, {"sum_logits": -61.666473388671875, "num_tokens": 21, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -80.44160461425781, "logits_per_token": -2.9364987327938987, "logits_per_char": -0.5457210034395741, "num_chars": 113}, {"sum_logits": -56.69666290283203, "num_tokens": 20, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -79.96055603027344, "logits_per_token": -2.8348331451416016, "logits_per_char": -0.5504530378915731, "num_chars": 103}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 258, "native_id": 39875, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 79.34910583496094, "incorrect_loss_raw": 75.67888641357422, "correct_loss_per_char": 0.5627596158507868, "incorrect_loss_per_char": 0.5749563804088244, "correct_loss_per_token": 2.2671173095703123, "incorrect_loss_per_token": 2.436091096580497, "correct_loss_uncond": -27.152320861816406, "incorrect_loss_uncond": -21.757486979166668}, "model_output": [{"sum_logits": -79.34910583496094, "num_tokens": 35, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -106.50142669677734, "logits_per_token": -2.2671173095703123, "logits_per_char": -0.5627596158507868, "num_chars": 141}, {"sum_logits": -63.613616943359375, "num_tokens": 27, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -82.29824829101562, "logits_per_token": -2.356059886791088, "logits_per_char": -0.5531618864639946, "num_chars": 115}, {"sum_logits": -77.59773254394531, "num_tokens": 33, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -99.26368713378906, "logits_per_token": -2.3514464407256157, "logits_per_char": -0.5878616101814039, "num_chars": 132}, {"sum_logits": -85.82530975341797, "num_tokens": 33, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -110.74718475341797, "logits_per_token": -2.600766962224787, "logits_per_char": -0.5838456445810746, "num_chars": 147}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 259, "native_id": 47441, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 51.33628845214844, "incorrect_loss_raw": 98.30986785888672, "correct_loss_per_char": 0.6667050448330966, "incorrect_loss_per_char": 0.48863979611580494, "correct_loss_per_token": 2.7019099185341284, "incorrect_loss_per_token": 2.3276800384568084, "correct_loss_uncond": -19.894676208496094, "incorrect_loss_uncond": -23.083106994628906}, "model_output": [{"sum_logits": -121.09017944335938, "num_tokens": 38, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -130.87277221679688, "logits_per_token": -3.1865836695620886, "logits_per_char": -0.6475410665420287, "num_chars": 187}, {"sum_logits": -51.33628845214844, "num_tokens": 19, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -71.23096466064453, "logits_per_token": -2.7019099185341284, "logits_per_char": -0.6667050448330966, "num_chars": 77}, {"sum_logits": -67.37908935546875, "num_tokens": 44, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -99.48371887207031, "logits_per_token": -1.531342939897017, "logits_per_char": -0.3473148935848905, "num_chars": 194}, {"sum_logits": -106.46033477783203, "num_tokens": 47, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -133.8224334716797, "logits_per_token": -2.26511350591132, "logits_per_char": -0.47106342822049574, "num_chars": 226}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 260, "native_id": 46524, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 131.46359252929688, "incorrect_loss_raw": 97.30613454182942, "correct_loss_per_char": 0.5300951311665196, "incorrect_loss_per_char": 0.6031823308441872, "correct_loss_per_token": 2.9878089211203833, "incorrect_loss_per_token": 2.86685426089926, "correct_loss_uncond": -29.491012573242188, "incorrect_loss_uncond": -21.263699849446613}, "model_output": [{"sum_logits": -131.46359252929688, "num_tokens": 44, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -160.95460510253906, "logits_per_token": -2.9878089211203833, "logits_per_char": -0.5300951311665196, "num_chars": 248}, {"sum_logits": -79.6280288696289, "num_tokens": 36, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -106.22647857666016, "logits_per_token": -2.2118896908230252, "logits_per_char": -0.4576323498254535, "num_chars": 174}, {"sum_logits": -102.53886413574219, "num_tokens": 36, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -118.23925018310547, "logits_per_token": -2.848301781548394, "logits_per_char": -0.6103503817603702, "num_chars": 168}, {"sum_logits": -109.75151062011719, "num_tokens": 31, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -131.2437744140625, "logits_per_token": -3.540371310326361, "logits_per_char": -0.7415642609467378, "num_chars": 148}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 261, "native_id": 42442, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 43.84831237792969, "incorrect_loss_raw": 125.79564412434895, "correct_loss_per_char": 0.4520444575044298, "incorrect_loss_per_char": 0.7943789331218598, "correct_loss_per_token": 1.9064483642578125, "incorrect_loss_per_token": 3.863675042542662, "correct_loss_uncond": -38.27960205078125, "incorrect_loss_uncond": -15.650260925292969}, "model_output": [{"sum_logits": -131.24412536621094, "num_tokens": 35, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -154.30792236328125, "logits_per_token": -3.7498321533203125, "logits_per_char": -0.805178683228288, "num_chars": 163}, {"sum_logits": -43.84831237792969, "num_tokens": 23, "num_tokens_all": 447, "is_greedy": false, "sum_logits_uncond": -82.12791442871094, "logits_per_token": -1.9064483642578125, "logits_per_char": -0.4520444575044298, "num_chars": 97}, {"sum_logits": -139.77183532714844, "num_tokens": 41, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -159.9693603515625, "logits_per_token": -3.4090691543206937, "logits_per_char": -0.6988591766357422, "num_chars": 200}, {"sum_logits": -106.3709716796875, "num_tokens": 24, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -110.06043243408203, "logits_per_token": -4.4321238199869795, "logits_per_char": -0.8790989395015496, "num_chars": 121}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 262, "native_id": 30165, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 43.95500946044922, "incorrect_loss_raw": 32.33158620198568, "correct_loss_per_char": 0.7089517654911164, "incorrect_loss_per_char": 0.6576113376968568, "correct_loss_per_token": 3.381154573880709, "incorrect_loss_per_token": 3.00748567870169, "correct_loss_uncond": -36.002784729003906, "incorrect_loss_uncond": -22.20760218302409}, "model_output": [{"sum_logits": -33.70298767089844, "num_tokens": 14, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -54.278953552246094, "logits_per_token": -2.4073562622070312, "logits_per_char": -0.5030296667298274, "num_chars": 67}, {"sum_logits": -43.95500946044922, "num_tokens": 13, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -79.95779418945312, "logits_per_token": -3.381154573880709, "logits_per_char": -0.7089517654911164, "num_chars": 62}, {"sum_logits": -38.02687072753906, "num_tokens": 11, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -62.90034103393555, "logits_per_token": -3.4569882479580967, "logits_per_char": -0.7042013097692419, "num_chars": 54}, {"sum_logits": -25.26490020751953, "num_tokens": 8, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -46.438270568847656, "logits_per_token": -3.1581125259399414, "logits_per_char": -0.765603036591501, "num_chars": 33}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 263, "native_id": 5964, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 114.14147186279297, "incorrect_loss_raw": 126.38711547851562, "correct_loss_per_char": 0.6794135229928153, "incorrect_loss_per_char": 0.7434203781691197, "correct_loss_per_token": 3.0037229437577095, "incorrect_loss_per_token": 3.2929698353200987, "correct_loss_uncond": -37.243812561035156, "incorrect_loss_uncond": -27.151214599609375}, "model_output": [{"sum_logits": -116.54136657714844, "num_tokens": 43, "num_tokens_all": 504, "is_greedy": false, "sum_logits_uncond": -140.4443359375, "logits_per_token": -2.710264339003452, "logits_per_char": -0.6510690870231757, "num_chars": 179}, {"sum_logits": -97.01286315917969, "num_tokens": 31, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -136.52340698242188, "logits_per_token": -3.1294471986832155, "logits_per_char": -0.6382425207840768, "num_chars": 152}, {"sum_logits": -114.14147186279297, "num_tokens": 38, "num_tokens_all": 499, "is_greedy": false, "sum_logits_uncond": -151.38528442382812, "logits_per_token": -3.0037229437577095, "logits_per_char": -0.6794135229928153, "num_chars": 168}, {"sum_logits": -165.60711669921875, "num_tokens": 41, "num_tokens_all": 502, "is_greedy": false, "sum_logits_uncond": -183.64724731445312, "logits_per_token": -4.039197968273628, "logits_per_char": -0.9409495267001066, "num_chars": 176}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 264, "native_id": 7324, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 86.84378051757812, "incorrect_loss_raw": 123.07462056477864, "correct_loss_per_char": 0.38426451556450497, "incorrect_loss_per_char": 0.6001817081925143, "correct_loss_per_token": 1.9737222844904119, "incorrect_loss_per_token": 3.1261404244474664, "correct_loss_uncond": -23.112762451171875, "incorrect_loss_uncond": -17.70562235514323}, "model_output": [{"sum_logits": -86.84378051757812, "num_tokens": 44, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -109.95654296875, "logits_per_token": -1.9737222844904119, "logits_per_char": -0.38426451556450497, "num_chars": 226}, {"sum_logits": -150.27450561523438, "num_tokens": 40, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -164.15899658203125, "logits_per_token": -3.7568626403808594, "logits_per_char": -0.7022173159590391, "num_chars": 214}, {"sum_logits": -66.26229858398438, "num_tokens": 32, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -83.13172912597656, "logits_per_token": -2.0706968307495117, "logits_per_char": -0.48016158394191577, "num_chars": 138}, {"sum_logits": -152.6870574951172, "num_tokens": 43, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -175.0500030517578, "logits_per_token": -3.5508618022120277, "logits_per_char": -0.6181662246765878, "num_chars": 247}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 265, "native_id": 21139, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 111.19157409667969, "incorrect_loss_raw": 79.07511393229167, "correct_loss_per_char": 0.5269742848183872, "incorrect_loss_per_char": 0.4711046802390057, "correct_loss_per_token": 2.31649112701416, "incorrect_loss_per_token": 2.3484922077525483, "correct_loss_uncond": -38.18983459472656, "incorrect_loss_uncond": -29.557225545247395}, "model_output": [{"sum_logits": -55.30998992919922, "num_tokens": 23, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -86.19596862792969, "logits_per_token": -2.4047821708347485, "logits_per_char": -0.4460483058806389, "num_chars": 124}, {"sum_logits": -98.72075653076172, "num_tokens": 45, "num_tokens_all": 496, "is_greedy": false, "sum_logits_uncond": -125.05955505371094, "logits_per_token": -2.193794589572483, "logits_per_char": -0.44071766308375765, "num_chars": 224}, {"sum_logits": -83.19459533691406, "num_tokens": 34, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -114.64149475097656, "logits_per_token": -2.4468998628504135, "logits_per_char": -0.5265480717526206, "num_chars": 158}, {"sum_logits": -111.19157409667969, "num_tokens": 48, "num_tokens_all": 499, "is_greedy": false, "sum_logits_uncond": -149.38140869140625, "logits_per_token": -2.31649112701416, "logits_per_char": -0.5269742848183872, "num_chars": 211}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 266, "native_id": 34360, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 33.79216003417969, "incorrect_loss_raw": 51.79561996459961, "correct_loss_per_char": 0.6758432006835937, "incorrect_loss_per_char": 0.6395261655340042, "correct_loss_per_token": 2.8160133361816406, "incorrect_loss_per_token": 2.9882928104442192, "correct_loss_uncond": -10.628135681152344, "incorrect_loss_uncond": -25.65762456258138}, "model_output": [{"sum_logits": -54.43025207519531, "num_tokens": 17, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -77.19424438476562, "logits_per_token": -3.2017795338350186, "logits_per_char": -0.697823744553786, "num_chars": 78}, {"sum_logits": -33.79216003417969, "num_tokens": 12, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -44.42029571533203, "logits_per_token": -2.8160133361816406, "logits_per_char": -0.6758432006835937, "num_chars": 50}, {"sum_logits": -47.24592971801758, "num_tokens": 17, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -78.60256958007812, "logits_per_token": -2.7791723363539753, "logits_per_char": -0.5493712757909021, "num_chars": 86}, {"sum_logits": -53.71067810058594, "num_tokens": 18, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -76.56291961669922, "logits_per_token": -2.9839265611436634, "logits_per_char": -0.6713834762573242, "num_chars": 80}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 267, "native_id": 6587, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 18.42434310913086, "incorrect_loss_raw": 25.52245585123698, "correct_loss_per_char": 0.5757607221603394, "incorrect_loss_per_char": 0.7673335369484969, "correct_loss_per_token": 2.3030428886413574, "incorrect_loss_per_token": 2.8687911719413486, "correct_loss_uncond": -15.029407501220703, "incorrect_loss_uncond": -16.20868174235026}, "model_output": [{"sum_logits": -18.42434310913086, "num_tokens": 8, "num_tokens_all": 395, "is_greedy": false, "sum_logits_uncond": -33.45375061035156, "logits_per_token": -2.3030428886413574, "logits_per_char": -0.5757607221603394, "num_chars": 32}, {"sum_logits": -32.171295166015625, "num_tokens": 11, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -45.57229232788086, "logits_per_token": -2.9246631969105112, "logits_per_char": -0.8936470879448785, "num_chars": 36}, {"sum_logits": -23.587621688842773, "num_tokens": 7, "num_tokens_all": 394, "is_greedy": false, "sum_logits_uncond": -40.560176849365234, "logits_per_token": -3.3696602412632535, "logits_per_char": -0.7371131777763367, "num_chars": 32}, {"sum_logits": -20.80845069885254, "num_tokens": 9, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -39.060943603515625, "logits_per_token": -2.312050077650282, "logits_per_char": -0.6712403451242754, "num_chars": 31}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 268, "native_id": 23850, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 61.70538330078125, "incorrect_loss_raw": 131.9036661783854, "correct_loss_per_char": 0.5273964384682158, "incorrect_loss_per_char": 0.7623881409299088, "correct_loss_per_token": 2.5710576375325522, "incorrect_loss_per_token": 3.384260656013586, "correct_loss_uncond": -18.65149688720703, "incorrect_loss_uncond": -17.145853678385418}, "model_output": [{"sum_logits": -61.70538330078125, "num_tokens": 24, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -80.35688018798828, "logits_per_token": -2.5710576375325522, "logits_per_char": -0.5273964384682158, "num_chars": 117}, {"sum_logits": -185.955322265625, "num_tokens": 40, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -201.7948455810547, "logits_per_token": -4.648883056640625, "logits_per_char": -1.0330851236979166, "num_chars": 180}, {"sum_logits": -119.17262268066406, "num_tokens": 39, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -132.78070068359375, "logits_per_token": -3.055708273863181, "logits_per_char": -0.677117174321955, "num_chars": 176}, {"sum_logits": -90.58305358886719, "num_tokens": 37, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -112.57301330566406, "logits_per_token": -2.448190637536951, "logits_per_char": -0.5769621247698546, "num_chars": 157}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 269, "native_id": 8557, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 15.289948463439941, "incorrect_loss_raw": 25.886017481486004, "correct_loss_per_char": 0.4932241439819336, "incorrect_loss_per_char": 0.6774696071643348, "correct_loss_per_token": 1.6988831626044378, "incorrect_loss_per_token": 2.867133389171372, "correct_loss_uncond": -20.43856716156006, "incorrect_loss_uncond": -16.845060348510742}, "model_output": [{"sum_logits": -12.94843864440918, "num_tokens": 6, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -24.465293884277344, "logits_per_token": -2.15807310740153, "logits_per_char": -0.647421932220459, "num_chars": 20}, {"sum_logits": -34.223793029785156, "num_tokens": 13, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -55.250511169433594, "logits_per_token": -2.6325994638296275, "logits_per_char": -0.5610457873735272, "num_chars": 61}, {"sum_logits": -15.289948463439941, "num_tokens": 9, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -35.728515625, "logits_per_token": -1.6988831626044378, "logits_per_char": -0.4932241439819336, "num_chars": 31}, {"sum_logits": -30.485820770263672, "num_tokens": 8, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -48.4774284362793, "logits_per_token": -3.810727596282959, "logits_per_char": -0.8239411018990181, "num_chars": 37}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 270, "native_id": 10153, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 11.71104621887207, "incorrect_loss_raw": 24.467971801757812, "correct_loss_per_char": 0.3903682072957357, "incorrect_loss_per_char": 0.5769909595014407, "correct_loss_per_token": 1.67300660269601, "incorrect_loss_per_token": 2.9125795568738666, "correct_loss_uncond": -24.485013961791992, "incorrect_loss_uncond": -19.76570002237956}, "model_output": [{"sum_logits": -35.92698669433594, "num_tokens": 14, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -61.434906005859375, "logits_per_token": -2.5662133353097096, "logits_per_char": -0.5702696300688244, "num_chars": 63}, {"sum_logits": -19.825456619262695, "num_tokens": 5, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -37.03834915161133, "logits_per_token": -3.965091323852539, "logits_per_char": -0.6836364351469895, "num_chars": 29}, {"sum_logits": -17.651472091674805, "num_tokens": 8, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -34.227760314941406, "logits_per_token": -2.2064340114593506, "logits_per_char": -0.47706681328850825, "num_chars": 37}, {"sum_logits": -11.71104621887207, "num_tokens": 7, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -36.19606018066406, "logits_per_token": -1.67300660269601, "logits_per_char": -0.3903682072957357, "num_chars": 30}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 271, "native_id": 1443, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 107.38009643554688, "incorrect_loss_raw": 94.82615661621094, "correct_loss_per_char": 0.5289659922933344, "incorrect_loss_per_char": 0.6193495280351095, "correct_loss_per_token": 2.82579201146176, "incorrect_loss_per_token": 3.080218278815233, "correct_loss_uncond": -34.43853759765625, "incorrect_loss_uncond": -18.108858744303387}, "model_output": [{"sum_logits": -85.0224380493164, "num_tokens": 34, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -92.7862319946289, "logits_per_token": -2.500659942626953, "logits_per_char": -0.5091163955048886, "num_chars": 167}, {"sum_logits": -107.38009643554688, "num_tokens": 38, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -141.81863403320312, "logits_per_token": -2.82579201146176, "logits_per_char": -0.5289659922933344, "num_chars": 203}, {"sum_logits": -85.29411315917969, "num_tokens": 26, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -105.95455932617188, "logits_per_token": -3.2805428138146033, "logits_per_char": -0.741687940514606, "num_chars": 115}, {"sum_logits": -114.16191864013672, "num_tokens": 33, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -140.0642547607422, "logits_per_token": -3.459452080004143, "logits_per_char": -0.6072442480858337, "num_chars": 188}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 272, "native_id": 38908, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 28.854171752929688, "incorrect_loss_raw": 61.06222152709961, "correct_loss_per_char": 0.3847222900390625, "incorrect_loss_per_char": 0.5982896839487429, "correct_loss_per_token": 1.5186406185752468, "incorrect_loss_per_token": 2.8077218424934145, "correct_loss_uncond": -33.72864532470703, "incorrect_loss_uncond": -34.909890492757164}, "model_output": [{"sum_logits": -88.40892791748047, "num_tokens": 23, "num_tokens_all": 433, "is_greedy": false, "sum_logits_uncond": -130.41766357421875, "logits_per_token": -3.843866431194803, "logits_per_char": -0.7687732862389606, "num_chars": 115}, {"sum_logits": -37.790008544921875, "num_tokens": 19, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -70.10643005371094, "logits_per_token": -1.988947818153783, "logits_per_char": -0.4445883358226103, "num_chars": 85}, {"sum_logits": -28.854171752929688, "num_tokens": 19, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -62.58281707763672, "logits_per_token": -1.5186406185752468, "logits_per_char": -0.3847222900390625, "num_chars": 75}, {"sum_logits": -56.987728118896484, "num_tokens": 22, "num_tokens_all": 432, "is_greedy": false, "sum_logits_uncond": -87.39224243164062, "logits_per_token": -2.590351278131658, "logits_per_char": -0.581507429784658, "num_chars": 98}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 273, "native_id": 14062, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 106.5882568359375, "incorrect_loss_raw": 97.21617635091145, "correct_loss_per_char": 0.5730551442792339, "incorrect_loss_per_char": 0.5750421794693975, "correct_loss_per_token": 3.13494873046875, "incorrect_loss_per_token": 2.838680065187633, "correct_loss_uncond": -14.48248291015625, "incorrect_loss_uncond": -21.365966796875}, "model_output": [{"sum_logits": -103.30706787109375, "num_tokens": 40, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -128.21986389160156, "logits_per_token": -2.5826766967773436, "logits_per_char": -0.4919384184337798, "num_chars": 210}, {"sum_logits": -106.5882568359375, "num_tokens": 34, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -121.07073974609375, "logits_per_token": -3.13494873046875, "logits_per_char": -0.5730551442792339, "num_chars": 186}, {"sum_logits": -102.22203063964844, "num_tokens": 39, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -131.4864501953125, "logits_per_token": -2.6210777087089343, "logits_per_char": -0.4710692656204997, "num_chars": 217}, {"sum_logits": -86.11943054199219, "num_tokens": 26, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -96.04011535644531, "logits_per_token": -3.3122857900766225, "logits_per_char": -0.7621188543539131, "num_chars": 113}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 274, "native_id": 38350, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 82.8407211303711, "incorrect_loss_raw": 132.22791544596353, "correct_loss_per_char": 0.6846340589286867, "incorrect_loss_per_char": 0.6621841081791314, "correct_loss_per_token": 2.5887725353240967, "incorrect_loss_per_token": 2.952084337727584, "correct_loss_uncond": -24.174293518066406, "incorrect_loss_uncond": -22.419164021809895}, "model_output": [{"sum_logits": -137.4922637939453, "num_tokens": 45, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -158.4167022705078, "logits_per_token": -3.0553836398654513, "logits_per_char": -0.7161055405934652, "num_chars": 192}, {"sum_logits": -86.35032653808594, "num_tokens": 34, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -100.28118896484375, "logits_per_token": -2.539715486414292, "logits_per_char": -0.6302943542925981, "num_chars": 137}, {"sum_logits": -82.8407211303711, "num_tokens": 32, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -107.0150146484375, "logits_per_token": -2.5887725353240967, "logits_per_char": -0.6846340589286867, "num_chars": 121}, {"sum_logits": -172.84115600585938, "num_tokens": 53, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -205.24334716796875, "logits_per_token": -3.2611538869030072, "logits_per_char": -0.640152429651331, "num_chars": 270}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 275, "native_id": 19201, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 95.13410186767578, "incorrect_loss_raw": 114.88739013671875, "correct_loss_per_char": 0.47096090033502863, "incorrect_loss_per_char": 0.7710869121471841, "correct_loss_per_token": 2.2124209736668785, "incorrect_loss_per_token": 2.959785088632802, "correct_loss_uncond": -27.90019989013672, "incorrect_loss_uncond": -12.488855997721354}, "model_output": [{"sum_logits": -161.53469848632812, "num_tokens": 46, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -169.18789672851562, "logits_per_token": -3.511623880137568, "logits_per_char": -0.8501826236122533, "num_chars": 190}, {"sum_logits": -95.13410186767578, "num_tokens": 43, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -123.0343017578125, "logits_per_token": -2.2124209736668785, "logits_per_char": -0.47096090033502863, "num_chars": 202}, {"sum_logits": -80.8868637084961, "num_tokens": 32, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -96.15336608886719, "logits_per_token": -2.527714490890503, "logits_per_char": -0.7222041402544294, "num_chars": 112}, {"sum_logits": -102.24060821533203, "num_tokens": 36, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -116.7874755859375, "logits_per_token": -2.840016894870334, "logits_per_char": -0.7408739725748698, "num_chars": 138}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 276, "native_id": 28779, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 90.90054321289062, "incorrect_loss_raw": 123.68400319417317, "correct_loss_per_char": 0.3835465958349815, "incorrect_loss_per_char": 0.6857357170486019, "correct_loss_per_token": 1.9340541109125664, "incorrect_loss_per_token": 2.99935925367121, "correct_loss_uncond": -12.219970703125, "incorrect_loss_uncond": -18.82727305094401}, "model_output": [{"sum_logits": -110.82984924316406, "num_tokens": 37, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -124.97224426269531, "logits_per_token": -2.995401330896326, "logits_per_char": -0.7860272995969082, "num_chars": 141}, {"sum_logits": -90.90054321289062, "num_tokens": 47, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -103.12051391601562, "logits_per_token": -1.9340541109125664, "logits_per_char": -0.3835465958349815, "num_chars": 237}, {"sum_logits": -117.14012908935547, "num_tokens": 38, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -135.42630004882812, "logits_per_token": -3.08263497603567, "logits_per_char": -0.6436270829085465, "num_chars": 182}, {"sum_logits": -143.08203125, "num_tokens": 49, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -167.13528442382812, "logits_per_token": -2.9200414540816326, "logits_per_char": -0.6275527686403509, "num_chars": 228}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 277, "native_id": 38322, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 113.02230072021484, "incorrect_loss_raw": 135.62340037027994, "correct_loss_per_char": 0.5623000035831585, "incorrect_loss_per_char": 0.712382364737242, "correct_loss_per_token": 2.1324962400040537, "incorrect_loss_per_token": 3.055738959381859, "correct_loss_uncond": -24.08533477783203, "incorrect_loss_uncond": -24.89819081624349}, "model_output": [{"sum_logits": -177.73297119140625, "num_tokens": 48, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -199.0661163330078, "logits_per_token": -3.702770233154297, "logits_per_char": -0.9256925582885742, "num_chars": 192}, {"sum_logits": -109.54072570800781, "num_tokens": 43, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -131.22921752929688, "logits_per_token": -2.5474587373955306, "logits_per_char": -0.5241183048230039, "num_chars": 209}, {"sum_logits": -119.59650421142578, "num_tokens": 41, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -151.26943969726562, "logits_per_token": -2.9169879075957508, "logits_per_char": -0.6873362311001482, "num_chars": 174}, {"sum_logits": -113.02230072021484, "num_tokens": 53, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -137.10763549804688, "logits_per_token": -2.1324962400040537, "logits_per_char": -0.5623000035831585, "num_chars": 201}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 278, "native_id": 16169, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 39.62956237792969, "incorrect_loss_raw": 58.200904846191406, "correct_loss_per_char": 0.521441610235917, "incorrect_loss_per_char": 1.0442998678586752, "correct_loss_per_token": 2.830683026994978, "incorrect_loss_per_token": 4.745515568546517, "correct_loss_uncond": -45.84740447998047, "incorrect_loss_uncond": -16.552880605061848}, "model_output": [{"sum_logits": -50.30931091308594, "num_tokens": 14, "num_tokens_all": 424, "is_greedy": false, "sum_logits_uncond": -71.97047424316406, "logits_per_token": -3.593522208077567, "logits_per_char": -0.7739893986628605, "num_chars": 65}, {"sum_logits": -59.13641357421875, "num_tokens": 9, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -61.317508697509766, "logits_per_token": -6.570712619357639, "logits_per_char": -1.4784103393554688, "num_chars": 40}, {"sum_logits": -39.62956237792969, "num_tokens": 14, "num_tokens_all": 424, "is_greedy": false, "sum_logits_uncond": -85.47696685791016, "logits_per_token": -2.830683026994978, "logits_per_char": -0.521441610235917, "num_chars": 76}, {"sum_logits": -65.15699005126953, "num_tokens": 16, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -90.97337341308594, "logits_per_token": -4.072311878204346, "logits_per_char": -0.8804998655576963, "num_chars": 74}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 279, "native_id": 13237, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 65.79597473144531, "incorrect_loss_raw": 107.35301462809245, "correct_loss_per_char": 0.4112248420715332, "incorrect_loss_per_char": 0.6686349602857264, "correct_loss_per_token": 1.9351757273954504, "incorrect_loss_per_token": 3.195305760701497, "correct_loss_uncond": -25.29517364501953, "incorrect_loss_uncond": -14.633132934570312}, "model_output": [{"sum_logits": -65.79597473144531, "num_tokens": 34, "num_tokens_all": 447, "is_greedy": false, "sum_logits_uncond": -91.09114837646484, "logits_per_token": -1.9351757273954504, "logits_per_char": -0.4112248420715332, "num_chars": 160}, {"sum_logits": -162.59487915039062, "num_tokens": 56, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -170.83953857421875, "logits_per_token": -2.903479984828404, "logits_per_char": -0.6112589441744009, "num_chars": 266}, {"sum_logits": -69.11019897460938, "num_tokens": 20, "num_tokens_all": 433, "is_greedy": false, "sum_logits_uncond": -87.64990997314453, "logits_per_token": -3.4555099487304686, "logits_per_char": -0.6775509703393076, "num_chars": 102}, {"sum_logits": -90.35396575927734, "num_tokens": 28, "num_tokens_all": 441, "is_greedy": false, "sum_logits_uncond": -107.468994140625, "logits_per_token": -3.2269273485456194, "logits_per_char": -0.717094966343471, "num_chars": 126}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 280, "native_id": 7862, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 68.78927612304688, "incorrect_loss_raw": 111.66944122314453, "correct_loss_per_char": 0.3738547615382982, "incorrect_loss_per_char": 0.5563785692579456, "correct_loss_per_token": 1.910813225640191, "incorrect_loss_per_token": 2.8886983250126694, "correct_loss_uncond": -30.438888549804688, "incorrect_loss_uncond": -20.16272735595703}, "model_output": [{"sum_logits": -133.348876953125, "num_tokens": 44, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -153.48379516601562, "logits_per_token": -3.0306562943892046, "logits_per_char": -0.5874399865776432, "num_chars": 227}, {"sum_logits": -68.78927612304688, "num_tokens": 36, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -99.22816467285156, "logits_per_token": -1.910813225640191, "logits_per_char": -0.3738547615382982, "num_chars": 184}, {"sum_logits": -95.03240203857422, "num_tokens": 32, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -111.0799560546875, "logits_per_token": -2.9697625637054443, "logits_per_char": -0.5590141296386719, "num_chars": 170}, {"sum_logits": -106.62704467773438, "num_tokens": 40, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -130.93275451660156, "logits_per_token": -2.6656761169433594, "logits_per_char": -0.5226815915575215, "num_chars": 204}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 281, "native_id": 34856, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 92.70830535888672, "incorrect_loss_raw": 174.12337493896484, "correct_loss_per_char": 0.5267517349936746, "incorrect_loss_per_char": 0.6137909213009884, "correct_loss_per_token": 2.8093425866329307, "incorrect_loss_per_token": 3.712096960857661, "correct_loss_uncond": -24.64471435546875, "incorrect_loss_uncond": -17.701566060384113}, "model_output": [{"sum_logits": -134.18894958496094, "num_tokens": 38, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -146.6637725830078, "logits_per_token": -3.5312881469726562, "logits_per_char": -0.5638191159031972, "num_chars": 238}, {"sum_logits": -92.70830535888672, "num_tokens": 33, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -117.35301971435547, "logits_per_token": -2.8093425866329307, "logits_per_char": -0.5267517349936746, "num_chars": 176}, {"sum_logits": -104.89635467529297, "num_tokens": 33, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -125.82920837402344, "logits_per_token": -3.1786774144028174, "logits_per_char": -0.5763535971169943, "num_chars": 182}, {"sum_logits": -283.2848205566406, "num_tokens": 64, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -302.9818420410156, "logits_per_token": -4.42632532119751, "logits_per_char": -0.7012000508827738, "num_chars": 404}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 282, "native_id": 46259, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 38.33000946044922, "incorrect_loss_raw": 94.47525278727214, "correct_loss_per_char": 0.3833000946044922, "incorrect_loss_per_char": 0.6710051066082712, "correct_loss_per_token": 1.4742311330942006, "incorrect_loss_per_token": 2.8494932542068128, "correct_loss_uncond": -32.60868835449219, "incorrect_loss_uncond": -27.713729858398438}, "model_output": [{"sum_logits": -38.33000946044922, "num_tokens": 26, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -70.9386978149414, "logits_per_token": -1.4742311330942006, "logits_per_char": -0.3833000946044922, "num_chars": 100}, {"sum_logits": -76.55641174316406, "num_tokens": 40, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -105.88741302490234, "logits_per_token": -1.9139102935791015, "logits_per_char": -0.45842162720457524, "num_chars": 167}, {"sum_logits": -87.51798248291016, "num_tokens": 29, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -118.27389526367188, "logits_per_token": -3.0178614649279365, "logits_per_char": -0.6435145770802218, "num_chars": 136}, {"sum_logits": -119.35136413574219, "num_tokens": 33, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -142.4056396484375, "logits_per_token": -3.6167080041133994, "logits_per_char": -0.9110791155400167, "num_chars": 131}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 283, "native_id": 2747, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 102.02386474609375, "incorrect_loss_raw": 82.81875101725261, "correct_loss_per_char": 0.5897333222317558, "incorrect_loss_per_char": 0.512201069579202, "correct_loss_per_token": 2.318724198774858, "incorrect_loss_per_token": 2.2021225563793223, "correct_loss_uncond": -55.13336181640625, "incorrect_loss_uncond": -40.03983052571615}, "model_output": [{"sum_logits": -102.02386474609375, "num_tokens": 44, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -157.1572265625, "logits_per_token": -2.318724198774858, "logits_per_char": -0.5897333222317558, "num_chars": 173}, {"sum_logits": -86.49394226074219, "num_tokens": 28, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -125.73753356933594, "logits_per_token": -3.089069366455078, "logits_per_char": -0.7329995106842558, "num_chars": 118}, {"sum_logits": -62.31183624267578, "num_tokens": 42, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -102.50401306152344, "logits_per_token": -1.4836151486351377, "logits_per_char": -0.3665402131922105, "num_chars": 170}, {"sum_logits": -99.65047454833984, "num_tokens": 49, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -140.33419799804688, "logits_per_token": -2.033683154047752, "logits_per_char": -0.43706348486113966, "num_chars": 228}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 284, "native_id": 18273, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 148.2530517578125, "incorrect_loss_raw": 167.1009063720703, "correct_loss_per_char": 0.7267306458716299, "incorrect_loss_per_char": 0.7712167751572606, "correct_loss_per_token": 3.0255724848533165, "incorrect_loss_per_token": 3.459843946857576, "correct_loss_uncond": -38.09625244140625, "incorrect_loss_uncond": -29.73583984375}, "model_output": [{"sum_logits": -117.6954345703125, "num_tokens": 40, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -148.23162841796875, "logits_per_token": -2.9423858642578127, "logits_per_char": -0.6687240600585938, "num_chars": 176}, {"sum_logits": -216.24502563476562, "num_tokens": 61, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -249.29551696777344, "logits_per_token": -3.5450004202420593, "logits_per_char": -0.8285250024320522, "num_chars": 261}, {"sum_logits": -167.3622589111328, "num_tokens": 43, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -192.98309326171875, "logits_per_token": -3.892145556072856, "logits_per_char": -0.8164012629811357, "num_chars": 205}, {"sum_logits": -148.2530517578125, "num_tokens": 49, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -186.34930419921875, "logits_per_token": -3.0255724848533165, "logits_per_char": -0.7267306458716299, "num_chars": 204}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 285, "native_id": 37003, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 85.98193359375, "incorrect_loss_raw": 148.10637919108072, "correct_loss_per_char": 0.728660454184322, "incorrect_loss_per_char": 0.8157501229743097, "correct_loss_per_token": 3.3069974459134617, "incorrect_loss_per_token": 3.9367720593063997, "correct_loss_uncond": -17.47803497314453, "incorrect_loss_uncond": -10.884933471679688}, "model_output": [{"sum_logits": -129.3953857421875, "num_tokens": 30, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -141.92916870117188, "logits_per_token": -4.313179524739583, "logits_per_char": -0.9112351108604754, "num_chars": 142}, {"sum_logits": -162.13470458984375, "num_tokens": 43, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -171.08026123046875, "logits_per_token": -3.7705745253452037, "logits_per_char": -0.7720700218563988, "num_chars": 210}, {"sum_logits": -152.78904724121094, "num_tokens": 41, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -163.96450805664062, "logits_per_token": -3.726562127834413, "logits_per_char": -0.7639452362060547, "num_chars": 200}, {"sum_logits": -85.98193359375, "num_tokens": 26, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -103.45996856689453, "logits_per_token": -3.3069974459134617, "logits_per_char": -0.728660454184322, "num_chars": 118}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 286, "native_id": 19783, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 20.456546783447266, "incorrect_loss_raw": 43.27388254801432, "correct_loss_per_char": 0.305321593782795, "incorrect_loss_per_char": 0.5828936366237487, "correct_loss_per_token": 1.363769785563151, "incorrect_loss_per_token": 2.469918717685928, "correct_loss_uncond": -35.790443420410156, "incorrect_loss_uncond": -33.348121643066406}, "model_output": [{"sum_logits": -41.58488082885742, "num_tokens": 16, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -71.84693145751953, "logits_per_token": -2.599055051803589, "logits_per_char": -0.6115423651302562, "num_chars": 68}, {"sum_logits": -56.65837097167969, "num_tokens": 26, "num_tokens_all": 435, "is_greedy": false, "sum_logits_uncond": -101.55516052246094, "logits_per_token": -2.1791681142953725, "logits_per_char": -0.49268148671025813, "num_chars": 115}, {"sum_logits": -31.57839584350586, "num_tokens": 12, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -56.46392059326172, "logits_per_token": -2.6315329869588218, "logits_per_char": -0.6444570580307318, "num_chars": 49}, {"sum_logits": -20.456546783447266, "num_tokens": 15, "num_tokens_all": 424, "is_greedy": false, "sum_logits_uncond": -56.24699020385742, "logits_per_token": -1.363769785563151, "logits_per_char": -0.305321593782795, "num_chars": 67}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 287, "native_id": 4126, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 87.25810241699219, "incorrect_loss_raw": 47.36939493815104, "correct_loss_per_char": 0.9089385668436686, "incorrect_loss_per_char": 0.7197920003854397, "correct_loss_per_token": 3.966277382590554, "incorrect_loss_per_token": 3.2636815869733655, "correct_loss_uncond": -27.426589965820312, "incorrect_loss_uncond": -24.380892435709637}, "model_output": [{"sum_logits": -61.07445526123047, "num_tokens": 18, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -86.31749725341797, "logits_per_token": -3.3930252922905817, "logits_per_char": -0.8366363734415133, "num_chars": 73}, {"sum_logits": -87.25810241699219, "num_tokens": 22, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -114.6846923828125, "logits_per_token": -3.966277382590554, "logits_per_char": -0.9089385668436686, "num_chars": 96}, {"sum_logits": -31.30799102783203, "num_tokens": 11, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -58.45161437988281, "logits_per_token": -2.8461810025301846, "logits_per_char": -0.6020767505352314, "num_chars": 52}, {"sum_logits": -49.725738525390625, "num_tokens": 14, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -70.48175048828125, "logits_per_token": -3.5518384660993303, "logits_per_char": -0.7206628771795742, "num_chars": 69}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 288, "native_id": 46868, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 97.74205017089844, "incorrect_loss_raw": 90.54418436686198, "correct_loss_per_char": 0.4383051577170334, "incorrect_loss_per_char": 0.6523367799217904, "correct_loss_per_token": 2.07961808874252, "incorrect_loss_per_token": 3.1678362087426546, "correct_loss_uncond": -24.10748291015625, "incorrect_loss_uncond": -14.153490702311197}, "model_output": [{"sum_logits": -86.51139831542969, "num_tokens": 22, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -94.21121215820312, "logits_per_token": -3.932336287064986, "logits_per_char": -0.808517741265698, "num_chars": 107}, {"sum_logits": -108.95185089111328, "num_tokens": 37, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -131.805908203125, "logits_per_token": -2.9446446186787374, "logits_per_char": -0.6120890499500746, "num_chars": 178}, {"sum_logits": -97.74205017089844, "num_tokens": 47, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -121.84953308105469, "logits_per_token": -2.07961808874252, "logits_per_char": -0.4383051577170334, "num_chars": 223}, {"sum_logits": -76.16930389404297, "num_tokens": 29, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -88.0759048461914, "logits_per_token": -2.62652772048424, "logits_per_char": -0.5364035485495984, "num_chars": 142}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 289, "native_id": 16392, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 77.51275634765625, "incorrect_loss_raw": 104.84118398030598, "correct_loss_per_char": 0.5420472471864073, "incorrect_loss_per_char": 0.6566047641748515, "correct_loss_per_token": 2.2797869514016544, "incorrect_loss_per_token": 2.996408909902609, "correct_loss_uncond": -34.27959442138672, "incorrect_loss_uncond": -16.571393330891926}, "model_output": [{"sum_logits": -146.5150604248047, "num_tokens": 34, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -155.93540954589844, "logits_per_token": -4.3092664830824905, "logits_per_char": -0.9452584543535786, "num_chars": 155}, {"sum_logits": -98.46324920654297, "num_tokens": 33, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -114.09564208984375, "logits_per_token": -2.983734824440696, "logits_per_char": -0.6271544535448597, "num_chars": 157}, {"sum_logits": -77.51275634765625, "num_tokens": 34, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -111.79235076904297, "logits_per_token": -2.2797869514016544, "logits_per_char": -0.5420472471864073, "num_chars": 143}, {"sum_logits": -69.54524230957031, "num_tokens": 41, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -94.20668029785156, "logits_per_token": -1.6962254221846418, "logits_per_char": -0.39740138462611607, "num_chars": 175}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 290, "native_id": 44353, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 68.96231079101562, "incorrect_loss_raw": 106.48670959472656, "correct_loss_per_char": 0.46283430061084313, "incorrect_loss_per_char": 0.6528044293414323, "correct_loss_per_token": 2.3780107169315734, "incorrect_loss_per_token": 3.015299658642675, "correct_loss_uncond": -34.34612274169922, "incorrect_loss_uncond": -22.838328043619793}, "model_output": [{"sum_logits": -91.95146179199219, "num_tokens": 33, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -113.67961120605469, "logits_per_token": -2.7864079330906724, "logits_per_char": -0.6966019832726681, "num_chars": 132}, {"sum_logits": -68.96231079101562, "num_tokens": 29, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -103.30843353271484, "logits_per_token": -2.3780107169315734, "logits_per_char": -0.46283430061084313, "num_chars": 149}, {"sum_logits": -77.74563598632812, "num_tokens": 28, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -98.06098937988281, "logits_per_token": -2.776629856654576, "logits_per_char": -0.5716590881347656, "num_chars": 136}, {"sum_logits": -149.76303100585938, "num_tokens": 43, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -176.23451232910156, "logits_per_token": -3.482861186182776, "logits_per_char": -0.6901522166168634, "num_chars": 217}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 291, "native_id": 3926, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 44.20491027832031, "incorrect_loss_raw": 112.67179107666016, "correct_loss_per_char": 0.43767237899327044, "incorrect_loss_per_char": 0.641741305161137, "correct_loss_per_token": 2.009314103560014, "incorrect_loss_per_token": 3.2348041300996297, "correct_loss_uncond": -27.514007568359375, "incorrect_loss_uncond": -15.37701670328776}, "model_output": [{"sum_logits": -44.20491027832031, "num_tokens": 22, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -71.71891784667969, "logits_per_token": -2.009314103560014, "logits_per_char": -0.43767237899327044, "num_chars": 101}, {"sum_logits": -119.84290313720703, "num_tokens": 36, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -133.5669403076172, "logits_per_token": -3.328969531589084, "logits_per_char": -0.6443166835333711, "num_chars": 186}, {"sum_logits": -146.32626342773438, "num_tokens": 45, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -161.534912109375, "logits_per_token": -3.251694742838542, "logits_per_char": -0.6967917306082589, "num_chars": 210}, {"sum_logits": -71.84620666503906, "num_tokens": 23, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -89.04457092285156, "logits_per_token": -3.1237481158712637, "logits_per_char": -0.584115501341781, "num_chars": 123}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 292, "native_id": 40476, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 53.981788635253906, "incorrect_loss_raw": 64.70270029703777, "correct_loss_per_char": 0.8056983378396105, "incorrect_loss_per_char": 0.7622674903696022, "correct_loss_per_token": 4.498482386271159, "incorrect_loss_per_token": 3.479228196140039, "correct_loss_uncond": -16.316932678222656, "incorrect_loss_uncond": -25.29272715250651}, "model_output": [{"sum_logits": -87.42530059814453, "num_tokens": 23, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -112.2015380859375, "logits_per_token": -3.801100026006284, "logits_per_char": -0.8326219104585193, "num_chars": 105}, {"sum_logits": -53.981788635253906, "num_tokens": 12, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -70.29872131347656, "logits_per_token": -4.498482386271159, "logits_per_char": -0.8056983378396105, "num_chars": 67}, {"sum_logits": -28.649307250976562, "num_tokens": 14, "num_tokens_all": 427, "is_greedy": false, "sum_logits_uncond": -62.000640869140625, "logits_per_token": -2.0463790893554688, "logits_per_char": -0.4407585730919471, "num_chars": 65}, {"sum_logits": -78.03349304199219, "num_tokens": 17, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -95.78410339355469, "logits_per_token": -4.590205473058364, "logits_per_char": -1.0134219875583401, "num_chars": 77}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 293, "native_id": 33991, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 45.380767822265625, "incorrect_loss_raw": 95.0483169555664, "correct_loss_per_char": 0.5602563928674769, "incorrect_loss_per_char": 0.6828365848305777, "correct_loss_per_token": 2.3884614643297697, "incorrect_loss_per_token": 2.929149102912378, "correct_loss_uncond": -28.89667510986328, "incorrect_loss_uncond": -19.601844787597656}, "model_output": [{"sum_logits": -45.380767822265625, "num_tokens": 19, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -74.2774429321289, "logits_per_token": -2.3884614643297697, "logits_per_char": -0.5602563928674769, "num_chars": 81}, {"sum_logits": -99.36984252929688, "num_tokens": 36, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -117.75849914550781, "logits_per_token": -2.76027340359158, "logits_per_char": -0.6806153597897047, "num_chars": 146}, {"sum_logits": -67.36619567871094, "num_tokens": 21, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -86.44439697265625, "logits_per_token": -3.207914079938616, "logits_per_char": -0.7243676954700101, "num_chars": 93}, {"sum_logits": -118.4089126586914, "num_tokens": 42, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -139.74758911132812, "logits_per_token": -2.819259825206938, "logits_per_char": -0.6435266992320186, "num_chars": 184}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 294, "native_id": 36374, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 56.61663818359375, "incorrect_loss_raw": 70.10554631551106, "correct_loss_per_char": 0.4880744670999461, "incorrect_loss_per_char": 0.6085470116295536, "correct_loss_per_token": 2.0220227922712053, "incorrect_loss_per_token": 2.700456645554468, "correct_loss_uncond": -30.570602416992188, "incorrect_loss_uncond": -17.516735076904297}, "model_output": [{"sum_logits": -66.06420135498047, "num_tokens": 28, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -92.77603149414062, "logits_per_token": -2.3594357626778737, "logits_per_char": -0.5598661131778006, "num_chars": 118}, {"sum_logits": -57.71535873413086, "num_tokens": 19, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -70.12908935546875, "logits_per_token": -3.037650459691098, "logits_per_char": -0.703845838221108, "num_chars": 82}, {"sum_logits": -56.61663818359375, "num_tokens": 28, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -87.18724060058594, "logits_per_token": -2.0220227922712053, "logits_per_char": -0.4880744670999461, "num_chars": 116}, {"sum_logits": -86.53707885742188, "num_tokens": 32, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -99.96172332763672, "logits_per_token": -2.7042837142944336, "logits_per_char": -0.5619290834897525, "num_chars": 154}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 295, "native_id": 35362, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.278026580810547, "incorrect_loss_raw": 33.595542907714844, "correct_loss_per_char": 0.5580921173095703, "incorrect_loss_per_char": 0.7352745105572441, "correct_loss_per_token": 2.455605316162109, "incorrect_loss_per_token": 3.551096537756541, "correct_loss_uncond": -16.73251724243164, "incorrect_loss_uncond": -21.677181243896484}, "model_output": [{"sum_logits": -31.022720336914062, "num_tokens": 7, "num_tokens_all": 394, "is_greedy": false, "sum_logits_uncond": -51.89038848876953, "logits_per_token": -4.431817190987723, "logits_per_char": -0.9124329510857078, "num_chars": 34}, {"sum_logits": -12.278026580810547, "num_tokens": 5, "num_tokens_all": 392, "is_greedy": false, "sum_logits_uncond": -29.010543823242188, "logits_per_token": -2.455605316162109, "logits_per_char": -0.5580921173095703, "num_chars": 22}, {"sum_logits": -54.05845642089844, "num_tokens": 15, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -76.38255310058594, "logits_per_token": -3.6038970947265625, "logits_per_char": -0.8446633815765381, "num_chars": 64}, {"sum_logits": -15.705451965332031, "num_tokens": 6, "num_tokens_all": 393, "is_greedy": false, "sum_logits_uncond": -37.545230865478516, "logits_per_token": -2.6175753275553384, "logits_per_char": -0.4487271990094866, "num_chars": 35}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 296, "native_id": 40557, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 119.28316497802734, "incorrect_loss_raw": 91.95544942220052, "correct_loss_per_char": 0.5421962044455788, "incorrect_loss_per_char": 0.6041820754813839, "correct_loss_per_token": 2.5931122821310293, "incorrect_loss_per_token": 2.6622161522901613, "correct_loss_uncond": -32.50446319580078, "incorrect_loss_uncond": -16.528249104817707}, "model_output": [{"sum_logits": -105.27999877929688, "num_tokens": 35, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -114.80926513671875, "logits_per_token": -3.007999965122768, "logits_per_char": -0.6792257985761089, "num_chars": 155}, {"sum_logits": -82.38158416748047, "num_tokens": 31, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -107.18515014648438, "logits_per_token": -2.6574704570154988, "logits_per_char": -0.5642574258046608, "num_chars": 146}, {"sum_logits": -88.20476531982422, "num_tokens": 38, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -103.45668029785156, "logits_per_token": -2.3211780347322164, "logits_per_char": -0.569063002063382, "num_chars": 155}, {"sum_logits": -119.28316497802734, "num_tokens": 46, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -151.78762817382812, "logits_per_token": -2.5931122821310293, "logits_per_char": -0.5421962044455788, "num_chars": 220}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 297, "native_id": 16009, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 42.555545806884766, "incorrect_loss_raw": 44.06246439615885, "correct_loss_per_char": 0.4948319279870322, "incorrect_loss_per_char": 0.8048687131522052, "correct_loss_per_token": 2.1277772903442385, "incorrect_loss_per_token": 3.5949742551251647, "correct_loss_uncond": -39.64509201049805, "incorrect_loss_uncond": -17.933787027994793}, "model_output": [{"sum_logits": -40.37907409667969, "num_tokens": 10, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -54.59812927246094, "logits_per_token": -4.037907409667969, "logits_per_char": -0.8591292360995678, "num_chars": 47}, {"sum_logits": -42.555545806884766, "num_tokens": 20, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -82.20063781738281, "logits_per_token": -2.1277772903442385, "logits_per_char": -0.4948319279870322, "num_chars": 86}, {"sum_logits": -57.35967254638672, "num_tokens": 14, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -76.70864868164062, "logits_per_token": -4.097119467599051, "logits_per_char": -0.940322500760438, "num_chars": 61}, {"sum_logits": -34.448646545410156, "num_tokens": 13, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -54.681976318359375, "logits_per_token": -2.6498958881084738, "logits_per_char": -0.6151544025966099, "num_chars": 56}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 298, "native_id": 36789, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 28.75410270690918, "incorrect_loss_raw": 41.59533500671387, "correct_loss_per_char": 0.5134661197662354, "incorrect_loss_per_char": 0.9183390496096514, "correct_loss_per_token": 2.396175225575765, "incorrect_loss_per_token": 3.8461581368237634, "correct_loss_uncond": -26.211030960083008, "incorrect_loss_uncond": -14.425073623657227}, "model_output": [{"sum_logits": -31.809343338012695, "num_tokens": 6, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -39.116355895996094, "logits_per_token": -5.301557223002116, "logits_per_char": -1.4458792426369407, "num_chars": 22}, {"sum_logits": -28.75410270690918, "num_tokens": 12, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -54.96513366699219, "logits_per_token": -2.396175225575765, "logits_per_char": -0.5134661197662354, "num_chars": 56}, {"sum_logits": -62.66718673706055, "num_tokens": 18, "num_tokens_all": 428, "is_greedy": false, "sum_logits_uncond": -79.30586242675781, "logits_per_token": -3.4815103742811413, "logits_per_char": -0.7372610204360064, "num_chars": 85}, {"sum_logits": -30.30947494506836, "num_tokens": 11, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -49.639007568359375, "logits_per_token": -2.7554068131880327, "logits_per_char": -0.5718768857560068, "num_chars": 53}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 299, "native_id": 40417, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 81.28111267089844, "incorrect_loss_raw": 150.05096944173178, "correct_loss_per_char": 0.4441590856333248, "incorrect_loss_per_char": 0.6748550497031056, "correct_loss_per_token": 2.2578086853027344, "incorrect_loss_per_token": 3.0199970046883653, "correct_loss_uncond": -30.5640869140625, "incorrect_loss_uncond": -20.060206095377605}, "model_output": [{"sum_logits": -198.8897705078125, "num_tokens": 53, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -214.9888916015625, "logits_per_token": -3.7526371793926887, "logits_per_char": -0.8287073771158854, "num_chars": 240}, {"sum_logits": -147.82974243164062, "num_tokens": 50, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -171.1831512451172, "logits_per_token": -2.9565948486328124, "logits_per_char": -0.6427380105723506, "num_chars": 230}, {"sum_logits": -103.43339538574219, "num_tokens": 44, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -124.16148376464844, "logits_per_token": -2.350758986039595, "logits_per_char": -0.5531197614210812, "num_chars": 187}, {"sum_logits": -81.28111267089844, "num_tokens": 36, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -111.84519958496094, "logits_per_token": -2.2578086853027344, "logits_per_char": -0.4441590856333248, "num_chars": 183}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 300, "native_id": 5682, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 70.61518859863281, "incorrect_loss_raw": 99.06692250569661, "correct_loss_per_char": 0.6855843553265322, "incorrect_loss_per_char": 0.6376301138202901, "correct_loss_per_token": 2.9422995249430337, "incorrect_loss_per_token": 2.618845071578795, "correct_loss_uncond": -24.052413940429688, "incorrect_loss_uncond": -21.136512756347656}, "model_output": [{"sum_logits": -91.54304504394531, "num_tokens": 33, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -114.68753051757812, "logits_per_token": -2.7740316679983428, "logits_per_char": -0.7041772695688101, "num_chars": 130}, {"sum_logits": -108.99691772460938, "num_tokens": 40, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -131.29653930664062, "logits_per_token": -2.724922943115234, "logits_per_char": -0.6264190673828125, "num_chars": 174}, {"sum_logits": -70.61518859863281, "num_tokens": 24, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -94.6676025390625, "logits_per_token": -2.9422995249430337, "logits_per_char": -0.6855843553265322, "num_chars": 103}, {"sum_logits": -96.66080474853516, "num_tokens": 41, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -114.62623596191406, "logits_per_token": -2.3575806036228086, "logits_per_char": -0.582294004509248, "num_chars": 166}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 301, "native_id": 40800, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 100.08979797363281, "incorrect_loss_raw": 48.92986933390299, "correct_loss_per_char": 0.6902744687836746, "incorrect_loss_per_char": 0.6945060586213662, "correct_loss_per_token": 3.1278061866760254, "incorrect_loss_per_token": 3.335243475742829, "correct_loss_uncond": -36.29203796386719, "incorrect_loss_uncond": -20.55837122599284}, "model_output": [{"sum_logits": -100.08979797363281, "num_tokens": 32, "num_tokens_all": 432, "is_greedy": false, "sum_logits_uncond": -136.3818359375, "logits_per_token": -3.1278061866760254, "logits_per_char": -0.6902744687836746, "num_chars": 145}, {"sum_logits": -54.2888069152832, "num_tokens": 16, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -65.1908950805664, "logits_per_token": -3.3930504322052, "logits_per_char": -0.6702321841392989, "num_chars": 81}, {"sum_logits": -57.64234161376953, "num_tokens": 13, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -82.24868774414062, "logits_per_token": -4.434026277982271, "logits_per_char": -0.9006615877151489, "num_chars": 64}, {"sum_logits": -34.85845947265625, "num_tokens": 16, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -61.02513885498047, "logits_per_token": -2.1786537170410156, "logits_per_char": -0.5126244040096507, "num_chars": 68}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 302, "native_id": 37259, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 70.71675109863281, "incorrect_loss_raw": 120.3961664835612, "correct_loss_per_char": 0.6313995633806501, "incorrect_loss_per_char": 0.7023413376925186, "correct_loss_per_token": 2.619138929578993, "incorrect_loss_per_token": 2.846514129504257, "correct_loss_uncond": -31.416595458984375, "incorrect_loss_uncond": -15.669698079427084}, "model_output": [{"sum_logits": -126.03059387207031, "num_tokens": 43, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -143.08297729492188, "logits_per_token": -2.930944043536519, "logits_per_char": -0.6924757905058808, "num_chars": 182}, {"sum_logits": -116.20421600341797, "num_tokens": 40, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -124.89775848388672, "logits_per_token": -2.905105400085449, "logits_per_char": -0.785163621644716, "num_chars": 148}, {"sum_logits": -70.71675109863281, "num_tokens": 27, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -102.13334655761719, "logits_per_token": -2.619138929578993, "logits_per_char": -0.6313995633806501, "num_chars": 112}, {"sum_logits": -118.95368957519531, "num_tokens": 44, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -140.21685791015625, "logits_per_token": -2.7034929448908027, "logits_per_char": -0.6293846009269594, "num_chars": 189}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 303, "native_id": 37076, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 55.971622467041016, "incorrect_loss_raw": 123.41853841145833, "correct_loss_per_char": 0.39979730333600727, "incorrect_loss_per_char": 0.5635024179746023, "correct_loss_per_token": 1.9300559471393455, "incorrect_loss_per_token": 2.835922675052481, "correct_loss_uncond": -24.113567352294922, "incorrect_loss_uncond": -24.439682006835938}, "model_output": [{"sum_logits": -128.97402954101562, "num_tokens": 39, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -159.0723876953125, "logits_per_token": -3.3070263984875803, "logits_per_char": -0.6546905052843432, "num_chars": 197}, {"sum_logits": -55.971622467041016, "num_tokens": 29, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -80.08518981933594, "logits_per_token": -1.9300559471393455, "logits_per_char": -0.39979730333600727, "num_chars": 140}, {"sum_logits": -155.76956176757812, "num_tokens": 58, "num_tokens_all": 498, "is_greedy": false, "sum_logits_uncond": -172.91989135742188, "logits_per_token": -2.685682099441002, "logits_per_char": -0.5298284413863201, "num_chars": 294}, {"sum_logits": -85.51202392578125, "num_tokens": 34, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -111.58238220214844, "logits_per_token": -2.5150595272288605, "logits_per_char": -0.5059883072531435, "num_chars": 169}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 304, "native_id": 23713, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 73.21607971191406, "incorrect_loss_raw": 119.43429819742839, "correct_loss_per_char": 0.5344239395030224, "incorrect_loss_per_char": 0.7375777570106056, "correct_loss_per_token": 2.218669082179214, "incorrect_loss_per_token": 3.2437261059152083, "correct_loss_uncond": -28.996810913085938, "incorrect_loss_uncond": -14.473721822102865}, "model_output": [{"sum_logits": -102.99555969238281, "num_tokens": 33, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -116.11895751953125, "logits_per_token": -3.1210775664358428, "logits_per_char": -0.6776023663972554, "num_chars": 152}, {"sum_logits": -73.21607971191406, "num_tokens": 33, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -102.212890625, "logits_per_token": -2.218669082179214, "logits_per_char": -0.5344239395030224, "num_chars": 137}, {"sum_logits": -143.11476135253906, "num_tokens": 40, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -161.33853149414062, "logits_per_token": -3.5778690338134767, "logits_per_char": -0.8468329074114738, "num_chars": 169}, {"sum_logits": -112.19257354736328, "num_tokens": 37, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -124.26657104492188, "logits_per_token": -3.032231717496305, "logits_per_char": -0.6882979972230876, "num_chars": 163}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 305, "native_id": 15682, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 83.81272888183594, "incorrect_loss_raw": 169.52264404296875, "correct_loss_per_char": 0.540727283108619, "incorrect_loss_per_char": 0.776594535560602, "correct_loss_per_token": 2.4650802612304688, "incorrect_loss_per_token": 4.159155201631434, "correct_loss_uncond": -26.427825927734375, "incorrect_loss_uncond": -7.2323760986328125}, "model_output": [{"sum_logits": -139.35308837890625, "num_tokens": 34, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -149.31427001953125, "logits_per_token": -4.098620246438419, "logits_per_char": -0.7873055840616172, "num_chars": 177}, {"sum_logits": -105.6707763671875, "num_tokens": 34, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -111.72776794433594, "logits_per_token": -3.1079640107996322, "logits_per_char": -0.5591046368634259, "num_chars": 189}, {"sum_logits": -263.5440673828125, "num_tokens": 50, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -269.2230224609375, "logits_per_token": -5.27088134765625, "logits_per_char": -0.983373385756763, "num_chars": 268}, {"sum_logits": -83.81272888183594, "num_tokens": 34, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -110.24055480957031, "logits_per_token": -2.4650802612304688, "logits_per_char": -0.540727283108619, "num_chars": 155}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 306, "native_id": 32474, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 23.96788215637207, "incorrect_loss_raw": 21.746278762817383, "correct_loss_per_char": 0.5326196034749349, "incorrect_loss_per_char": 0.5884206130490665, "correct_loss_per_token": 2.6630980173746743, "incorrect_loss_per_token": 2.5196530554029675, "correct_loss_uncond": -26.962236404418945, "incorrect_loss_uncond": -26.80365562438965}, "model_output": [{"sum_logits": -26.992305755615234, "num_tokens": 12, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -62.87499237060547, "logits_per_token": -2.249358812967936, "logits_per_char": -0.4735492237827234, "num_chars": 57}, {"sum_logits": -23.96788215637207, "num_tokens": 9, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -50.930118560791016, "logits_per_token": -2.6630980173746743, "logits_per_char": -0.5326196034749349, "num_chars": 45}, {"sum_logits": -20.05462074279785, "num_tokens": 12, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -48.84992218017578, "logits_per_token": -1.6712183952331543, "logits_per_char": -0.3342436790466309, "num_chars": 60}, {"sum_logits": -18.191909790039062, "num_tokens": 5, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -33.924888610839844, "logits_per_token": -3.6383819580078125, "logits_per_char": -0.9574689363178454, "num_chars": 19}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 307, "native_id": 44046, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 74.88175964355469, "incorrect_loss_raw": 97.70623779296875, "correct_loss_per_char": 0.618857517715328, "incorrect_loss_per_char": 0.751864902671096, "correct_loss_per_token": 2.880067678598257, "incorrect_loss_per_token": 3.4123153108539004, "correct_loss_uncond": -13.699615478515625, "incorrect_loss_uncond": -9.943209330240885}, "model_output": [{"sum_logits": -75.81982421875, "num_tokens": 22, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -85.47310638427734, "logits_per_token": -3.4463556463068183, "logits_per_char": -0.7581982421875, "num_chars": 100}, {"sum_logits": -74.88175964355469, "num_tokens": 26, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -88.58137512207031, "logits_per_token": -2.880067678598257, "logits_per_char": -0.618857517715328, "num_chars": 121}, {"sum_logits": -86.93229675292969, "num_tokens": 32, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -94.99479675292969, "logits_per_token": -2.7166342735290527, "logits_per_char": -0.6165411117229056, "num_chars": 141}, {"sum_logits": -130.36659240722656, "num_tokens": 32, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -142.48043823242188, "logits_per_token": -4.07395601272583, "logits_per_char": -0.8808553541028822, "num_chars": 148}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 308, "native_id": 44647, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 81.90067291259766, "incorrect_loss_raw": 75.09777450561523, "correct_loss_per_char": 0.4875040054321289, "incorrect_loss_per_char": 0.5633896450047653, "correct_loss_per_token": 1.997577388112138, "incorrect_loss_per_token": 2.3148579279581707, "correct_loss_uncond": -31.15624237060547, "incorrect_loss_uncond": -22.730531056722004}, "model_output": [{"sum_logits": -107.49757385253906, "num_tokens": 40, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -123.86819458007812, "logits_per_token": -2.6874393463134765, "logits_per_char": -0.6515004475911458, "num_chars": 165}, {"sum_logits": -81.90067291259766, "num_tokens": 41, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -113.05691528320312, "logits_per_token": -1.997577388112138, "logits_per_char": -0.4875040054321289, "num_chars": 168}, {"sum_logits": -70.69033813476562, "num_tokens": 32, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -86.52368927001953, "logits_per_token": -2.209073066711426, "logits_per_char": -0.5479871173237645, "num_chars": 129}, {"sum_logits": -47.105411529541016, "num_tokens": 23, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -83.09303283691406, "logits_per_token": -2.0480613708496094, "logits_per_char": -0.49068137009938556, "num_chars": 96}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 309, "native_id": 40921, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 41.05886459350586, "incorrect_loss_raw": 91.3774922688802, "correct_loss_per_char": 0.6730961408771452, "incorrect_loss_per_char": 0.5527116353363047, "correct_loss_per_token": 2.566179037094116, "incorrect_loss_per_token": 2.250791676538586, "correct_loss_uncond": -21.122356414794922, "incorrect_loss_uncond": -23.18413798014323}, "model_output": [{"sum_logits": -118.35737609863281, "num_tokens": 40, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -138.54440307617188, "logits_per_token": -2.9589344024658204, "logits_per_char": -0.7261188717707534, "num_chars": 163}, {"sum_logits": -82.08438110351562, "num_tokens": 47, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -102.52459716796875, "logits_per_token": -1.7464761936918218, "logits_per_char": -0.45350486797522443, "num_chars": 181}, {"sum_logits": -73.69071960449219, "num_tokens": 36, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -102.61589050292969, "logits_per_token": -2.0469644334581165, "logits_per_char": -0.47851116626293627, "num_chars": 154}, {"sum_logits": -41.05886459350586, "num_tokens": 16, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -62.18122100830078, "logits_per_token": -2.566179037094116, "logits_per_char": -0.6730961408771452, "num_chars": 61}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 310, "native_id": 50320, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 79.1051254272461, "incorrect_loss_raw": 131.35387674967447, "correct_loss_per_char": 0.4572550602730988, "incorrect_loss_per_char": 0.5178303839268837, "correct_loss_per_token": 2.3971250129468515, "incorrect_loss_per_token": 2.5573052400830147, "correct_loss_uncond": -35.780731201171875, "incorrect_loss_uncond": -21.153940836588543}, "model_output": [{"sum_logits": -58.964202880859375, "num_tokens": 29, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -78.94091796875, "logits_per_token": -2.0332483752020476, "logits_per_char": -0.4038644032935574, "num_chars": 146}, {"sum_logits": -79.1051254272461, "num_tokens": 33, "num_tokens_all": 447, "is_greedy": false, "sum_logits_uncond": -114.88585662841797, "logits_per_token": -2.3971250129468515, "logits_per_char": -0.4572550602730988, "num_chars": 173}, {"sum_logits": -139.19732666015625, "num_tokens": 54, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -154.08370971679688, "logits_per_token": -2.577728271484375, "logits_per_char": -0.5523703438895089, "num_chars": 252}, {"sum_logits": -195.9001007080078, "num_tokens": 64, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -224.4988250732422, "logits_per_token": -3.060939073562622, "logits_per_char": -0.5972564045975848, "num_chars": 328}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 311, "native_id": 48854, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 64.19746398925781, "incorrect_loss_raw": 104.58096567789714, "correct_loss_per_char": 0.4553011630443817, "incorrect_loss_per_char": 0.6869490432495744, "correct_loss_per_token": 2.070885935137349, "incorrect_loss_per_token": 3.245995837096831, "correct_loss_uncond": -25.292221069335938, "incorrect_loss_uncond": -14.76764170328776}, "model_output": [{"sum_logits": -64.19746398925781, "num_tokens": 31, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -89.48968505859375, "logits_per_token": -2.070885935137349, "logits_per_char": -0.4553011630443817, "num_chars": 141}, {"sum_logits": -127.37226104736328, "num_tokens": 39, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -138.03135681152344, "logits_per_token": -3.2659554114708533, "logits_per_char": -0.6884987083641259, "num_chars": 185}, {"sum_logits": -104.46658325195312, "num_tokens": 38, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -124.83403015136719, "logits_per_token": -2.7491206118935034, "logits_per_char": -0.5771634433809565, "num_chars": 181}, {"sum_logits": -81.904052734375, "num_tokens": 22, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -95.18043518066406, "logits_per_token": -3.7229114879261362, "logits_per_char": -0.7951849780036407, "num_chars": 103}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 312, "native_id": 30232, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 119.77777099609375, "incorrect_loss_raw": 120.06997172037761, "correct_loss_per_char": 0.5162834956728178, "incorrect_loss_per_char": 0.6705517072334904, "correct_loss_per_token": 2.548463212682846, "incorrect_loss_per_token": 3.2771899944412244, "correct_loss_uncond": -41.555877685546875, "incorrect_loss_uncond": -22.065938313802082}, "model_output": [{"sum_logits": -64.59806823730469, "num_tokens": 22, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -98.26881408691406, "logits_per_token": -2.936275828968395, "logits_per_char": -0.6459806823730468, "num_chars": 100}, {"sum_logits": -190.28402709960938, "num_tokens": 47, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -189.86770629882812, "logits_per_token": -4.048596321268285, "logits_per_char": -0.7073755654260572, "num_chars": 269}, {"sum_logits": -105.32781982421875, "num_tokens": 37, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -138.27120971679688, "logits_per_token": -2.8466978330869934, "logits_per_char": -0.6582988739013672, "num_chars": 160}, {"sum_logits": -119.77777099609375, "num_tokens": 47, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -161.33364868164062, "logits_per_token": -2.548463212682846, "logits_per_char": -0.5162834956728178, "num_chars": 232}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 313, "native_id": 9373, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 59.1209831237793, "incorrect_loss_raw": 64.71907170613606, "correct_loss_per_char": 0.5326214695835972, "incorrect_loss_per_char": 0.6105286012215726, "correct_loss_per_token": 2.570477527120839, "incorrect_loss_per_token": 2.9547654871355022, "correct_loss_uncond": -43.49110794067383, "incorrect_loss_uncond": -38.40768559773763}, "model_output": [{"sum_logits": -59.1209831237793, "num_tokens": 23, "num_tokens_all": 424, "is_greedy": false, "sum_logits_uncond": -102.61209106445312, "logits_per_token": -2.570477527120839, "logits_per_char": -0.5326214695835972, "num_chars": 111}, {"sum_logits": -40.9356689453125, "num_tokens": 16, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -71.76065063476562, "logits_per_token": -2.5584793090820312, "logits_per_char": -0.5248162685296475, "num_chars": 78}, {"sum_logits": -103.94539642333984, "num_tokens": 28, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -148.9466094970703, "logits_per_token": -3.7123355865478516, "logits_per_char": -0.7934763085751133, "num_chars": 131}, {"sum_logits": -49.27614974975586, "num_tokens": 19, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -88.67301177978516, "logits_per_token": -2.5934815657766244, "logits_per_char": -0.5132932265599569, "num_chars": 96}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 314, "native_id": 21115, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 86.4891357421875, "incorrect_loss_raw": 150.22748311360678, "correct_loss_per_char": 0.46250874728442515, "incorrect_loss_per_char": 0.6793734930365165, "correct_loss_per_token": 2.217670147235577, "incorrect_loss_per_token": 3.32492907271426, "correct_loss_uncond": -19.51728057861328, "incorrect_loss_uncond": -22.419957478841145}, "model_output": [{"sum_logits": -107.21694946289062, "num_tokens": 39, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -121.65814208984375, "logits_per_token": -2.7491525503305287, "logits_per_char": -0.6126682826450893, "num_chars": 175}, {"sum_logits": -176.18785095214844, "num_tokens": 60, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -206.64227294921875, "logits_per_token": -2.9364641825358073, "logits_per_char": -0.5932250873809711, "num_chars": 297}, {"sum_logits": -167.27764892578125, "num_tokens": 39, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -189.64190673828125, "logits_per_token": -4.2891704852764425, "logits_per_char": -0.8322271090834888, "num_chars": 201}, {"sum_logits": -86.4891357421875, "num_tokens": 39, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -106.00641632080078, "logits_per_token": -2.217670147235577, "logits_per_char": -0.46250874728442515, "num_chars": 187}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 315, "native_id": 16087, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 29.88248634338379, "incorrect_loss_raw": 25.690627733866375, "correct_loss_per_char": 0.5976497268676758, "incorrect_loss_per_char": 0.39849869797869425, "correct_loss_per_token": 2.7165896675803443, "incorrect_loss_per_token": 1.7397790099635266, "correct_loss_uncond": -31.45485496520996, "incorrect_loss_uncond": -32.5968287785848}, "model_output": [{"sum_logits": -40.01923370361328, "num_tokens": 17, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -71.34649658203125, "logits_per_token": -2.3540725708007812, "logits_per_char": -0.47641944885253906, "num_chars": 84}, {"sum_logits": -11.223860740661621, "num_tokens": 11, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -47.679805755615234, "logits_per_token": -1.0203509764237837, "logits_per_char": -0.27375270099174687, "num_chars": 41}, {"sum_logits": -25.82878875732422, "num_tokens": 14, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -55.83606719970703, "logits_per_token": -1.8449134826660156, "logits_per_char": -0.4453239440917969, "num_chars": 58}, {"sum_logits": -29.88248634338379, "num_tokens": 11, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -61.33734130859375, "logits_per_token": -2.7165896675803443, "logits_per_char": -0.5976497268676758, "num_chars": 50}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 316, "native_id": 20767, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 95.79975128173828, "incorrect_loss_raw": 90.14481608072917, "correct_loss_per_char": 0.5569752981496412, "incorrect_loss_per_char": 0.5840582689847608, "correct_loss_per_token": 2.661104202270508, "incorrect_loss_per_token": 2.5746405162508528, "correct_loss_uncond": -12.203697204589844, "incorrect_loss_uncond": -7.3490651448567705}, "model_output": [{"sum_logits": -107.62118530273438, "num_tokens": 42, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -112.4189682006836, "logits_per_token": -2.562409173874628, "logits_per_char": -0.5755143599076704, "num_chars": 187}, {"sum_logits": -95.79975128173828, "num_tokens": 36, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -108.00344848632812, "logits_per_token": -2.661104202270508, "logits_per_char": -0.5569752981496412, "num_chars": 172}, {"sum_logits": -89.80413818359375, "num_tokens": 32, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -104.94917297363281, "logits_per_token": -2.8063793182373047, "logits_per_char": -0.6193388840247844, "num_chars": 145}, {"sum_logits": -73.00912475585938, "num_tokens": 31, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -75.1135025024414, "logits_per_token": -2.355133056640625, "logits_per_char": -0.5573215630218273, "num_chars": 131}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 317, "native_id": 25456, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 69.14921569824219, "incorrect_loss_raw": 91.70445760091145, "correct_loss_per_char": 0.32772140141347006, "incorrect_loss_per_char": 0.566680249573865, "correct_loss_per_token": 1.646409897577195, "incorrect_loss_per_token": 2.572990473669053, "correct_loss_uncond": -24.922691345214844, "incorrect_loss_uncond": -21.130780537923176}, "model_output": [{"sum_logits": -94.92252349853516, "num_tokens": 38, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -114.02376556396484, "logits_per_token": -2.4979611446982934, "logits_per_char": -0.5823467699296635, "num_chars": 163}, {"sum_logits": -69.14921569824219, "num_tokens": 42, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -94.07190704345703, "logits_per_token": -1.646409897577195, "logits_per_char": -0.32772140141347006, "num_chars": 211}, {"sum_logits": -67.09707641601562, "num_tokens": 31, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -87.4661865234375, "logits_per_token": -2.164421819871472, "logits_per_char": -0.516131357046274, "num_chars": 130}, {"sum_logits": -113.0937728881836, "num_tokens": 37, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -137.01576232910156, "logits_per_token": -3.0565884564373946, "logits_per_char": -0.6015626217456574, "num_chars": 188}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 318, "native_id": 10242, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 17.937694549560547, "incorrect_loss_raw": 40.31897226969401, "correct_loss_per_char": 0.3899498815121858, "incorrect_loss_per_char": 0.5439838458404895, "correct_loss_per_token": 2.2422118186950684, "incorrect_loss_per_token": 2.6159484090368736, "correct_loss_uncond": -19.818897247314453, "incorrect_loss_uncond": -23.675430297851562}, "model_output": [{"sum_logits": -17.937694549560547, "num_tokens": 8, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -37.756591796875, "logits_per_token": -2.2422118186950684, "logits_per_char": -0.3899498815121858, "num_chars": 46}, {"sum_logits": -39.95311737060547, "num_tokens": 21, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -66.35977935791016, "logits_per_token": -1.9025293986002605, "logits_per_char": -0.4813628598868129, "num_chars": 83}, {"sum_logits": -48.159767150878906, "num_tokens": 12, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -67.44435119628906, "logits_per_token": -4.013313929239909, "logits_per_char": -0.6879966735839844, "num_chars": 70}, {"sum_logits": -32.844032287597656, "num_tokens": 17, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -58.1790771484375, "logits_per_token": -1.9320018992704504, "logits_per_char": -0.4625920040506712, "num_chars": 71}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 319, "native_id": 49492, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 69.35494995117188, "incorrect_loss_raw": 131.9349365234375, "correct_loss_per_char": 0.49539249965122767, "incorrect_loss_per_char": 0.6348751310934749, "correct_loss_per_token": 2.3915499983162714, "incorrect_loss_per_token": 3.057071918478899, "correct_loss_uncond": -21.64348602294922, "incorrect_loss_uncond": -19.905868530273438}, "model_output": [{"sum_logits": -151.02328491210938, "num_tokens": 48, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -180.16258239746094, "logits_per_token": -3.1463184356689453, "logits_per_char": -0.671214599609375, "num_chars": 225}, {"sum_logits": -79.6396484375, "num_tokens": 30, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -95.06527709960938, "logits_per_token": -2.654654947916667, "logits_per_char": -0.5648202016843972, "num_chars": 141}, {"sum_logits": -69.35494995117188, "num_tokens": 29, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -90.9984359741211, "logits_per_token": -2.3915499983162714, "logits_per_char": -0.49539249965122767, "num_chars": 140}, {"sum_logits": -165.14187622070312, "num_tokens": 49, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -180.2945556640625, "logits_per_token": -3.3702423718510843, "logits_per_char": -0.6685905919866524, "num_chars": 247}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 320, "native_id": 50544, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 79.63739013671875, "incorrect_loss_raw": 127.25386555989583, "correct_loss_per_char": 0.7239762739701705, "incorrect_loss_per_char": 0.7011994126523159, "correct_loss_per_token": 3.18549560546875, "incorrect_loss_per_token": 3.3819092956513614, "correct_loss_uncond": -15.592361450195312, "incorrect_loss_uncond": -10.355242411295572}, "model_output": [{"sum_logits": -190.627685546875, "num_tokens": 56, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -199.45333862304688, "logits_per_token": -3.4040658133370534, "logits_per_char": -0.7446393966674805, "num_chars": 256}, {"sum_logits": -81.03216552734375, "num_tokens": 22, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -97.01982879638672, "logits_per_token": -3.683280251242898, "logits_per_char": -0.7573099581994743, "num_chars": 107}, {"sum_logits": -110.10174560546875, "num_tokens": 36, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -116.35415649414062, "logits_per_token": -3.058381822374132, "logits_per_char": -0.6016488830899932, "num_chars": 183}, {"sum_logits": -79.63739013671875, "num_tokens": 25, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -95.22975158691406, "logits_per_token": -3.18549560546875, "logits_per_char": -0.7239762739701705, "num_chars": 110}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 321, "native_id": 16502, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 18.353296279907227, "incorrect_loss_raw": 30.904608408610027, "correct_loss_per_char": 0.5098137855529785, "incorrect_loss_per_char": 0.5584475293055456, "correct_loss_per_token": 2.2941620349884033, "incorrect_loss_per_token": 2.321887434223593, "correct_loss_uncond": -28.961088180541992, "incorrect_loss_uncond": -18.958126068115234}, "model_output": [{"sum_logits": -18.353296279907227, "num_tokens": 8, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -47.31438446044922, "logits_per_token": -2.2941620349884033, "logits_per_char": -0.5098137855529785, "num_chars": 36}, {"sum_logits": -27.348125457763672, "num_tokens": 11, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -43.106971740722656, "logits_per_token": -2.486193223433061, "logits_per_char": -0.6077361212836372, "num_chars": 45}, {"sum_logits": -27.325157165527344, "num_tokens": 13, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -47.49702835083008, "logits_per_token": -2.1019351665790262, "logits_per_char": -0.5465031433105468, "num_chars": 50}, {"sum_logits": -38.04054260253906, "num_tokens": 16, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -58.98420333862305, "logits_per_token": -2.3775339126586914, "logits_per_char": -0.5211033233224529, "num_chars": 73}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 322, "native_id": 14220, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 87.81234741210938, "incorrect_loss_raw": 82.81981658935547, "correct_loss_per_char": 0.45264096604180093, "incorrect_loss_per_char": 0.5268887349575014, "correct_loss_per_token": 2.042147614235102, "incorrect_loss_per_token": 2.150639790636211, "correct_loss_uncond": -32.428428649902344, "incorrect_loss_uncond": -19.724945068359375}, "model_output": [{"sum_logits": -80.49008178710938, "num_tokens": 32, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -103.55072021484375, "logits_per_token": -2.515315055847168, "logits_per_char": -0.6597547687467982, "num_chars": 122}, {"sum_logits": -87.81234741210938, "num_tokens": 43, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -120.24077606201172, "logits_per_token": -2.042147614235102, "logits_per_char": -0.45264096604180093, "num_chars": 194}, {"sum_logits": -67.1065902709961, "num_tokens": 33, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -81.85881042480469, "logits_per_token": -2.033533038515033, "logits_per_char": -0.4473772684733073, "num_chars": 150}, {"sum_logits": -100.86277770996094, "num_tokens": 53, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -122.2247543334961, "logits_per_token": -1.9030712775464327, "logits_per_char": -0.47353416765239875, "num_chars": 213}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 323, "native_id": 3715, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 43.766876220703125, "incorrect_loss_raw": 95.93151601155598, "correct_loss_per_char": 0.5337423929354039, "incorrect_loss_per_char": 0.7828142282237177, "correct_loss_per_token": 1.9894034645774148, "incorrect_loss_per_token": 3.366170111953366, "correct_loss_uncond": -17.246665954589844, "incorrect_loss_uncond": -14.598925272623697}, "model_output": [{"sum_logits": -149.0841064453125, "num_tokens": 38, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -155.524169921875, "logits_per_token": -3.9232659590871712, "logits_per_char": -0.9035400390625, "num_chars": 165}, {"sum_logits": -66.46796417236328, "num_tokens": 21, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -86.96493530273438, "logits_per_token": -3.165141151064918, "logits_per_char": -0.7224778714387313, "num_chars": 92}, {"sum_logits": -72.24247741699219, "num_tokens": 24, "num_tokens_all": 439, "is_greedy": false, "sum_logits_uncond": -89.10221862792969, "logits_per_token": -3.010103225708008, "logits_per_char": -0.7224247741699219, "num_chars": 100}, {"sum_logits": -43.766876220703125, "num_tokens": 22, "num_tokens_all": 437, "is_greedy": false, "sum_logits_uncond": -61.01354217529297, "logits_per_token": -1.9894034645774148, "logits_per_char": -0.5337423929354039, "num_chars": 82}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 324, "native_id": 48, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 87.5992431640625, "incorrect_loss_raw": 80.91374969482422, "correct_loss_per_char": 0.5245463662518712, "incorrect_loss_per_char": 0.5067328306107112, "correct_loss_per_token": 2.5764483283547794, "incorrect_loss_per_token": 2.278409006152155, "correct_loss_uncond": -22.611541748046875, "incorrect_loss_uncond": -16.114420572916668}, "model_output": [{"sum_logits": -77.9133529663086, "num_tokens": 36, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -90.39598846435547, "logits_per_token": -2.1642598046196833, "logits_per_char": -0.49626339468986364, "num_chars": 157}, {"sum_logits": -87.5992431640625, "num_tokens": 34, "num_tokens_all": 442, "is_greedy": false, "sum_logits_uncond": -110.21078491210938, "logits_per_token": -2.5764483283547794, "logits_per_char": -0.5245463662518712, "num_chars": 167}, {"sum_logits": -113.87971496582031, "num_tokens": 42, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -138.31759643554688, "logits_per_token": -2.711421784900484, "logits_per_char": -0.5993669208727385, "num_chars": 190}, {"sum_logits": -50.94818115234375, "num_tokens": 26, "num_tokens_all": 434, "is_greedy": false, "sum_logits_uncond": -62.37092590332031, "logits_per_token": -1.9595454289362981, "logits_per_char": -0.42456817626953125, "num_chars": 120}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 325, "native_id": 18098, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 95.54191589355469, "incorrect_loss_raw": 118.86138916015625, "correct_loss_per_char": 0.4402853266984087, "incorrect_loss_per_char": 0.670044935742756, "correct_loss_per_token": 1.94983501823581, "incorrect_loss_per_token": 2.954967180231735, "correct_loss_uncond": -30.450523376464844, "incorrect_loss_uncond": -19.193028767903645}, "model_output": [{"sum_logits": -95.54191589355469, "num_tokens": 49, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -125.99243927001953, "logits_per_token": -1.94983501823581, "logits_per_char": -0.4402853266984087, "num_chars": 217}, {"sum_logits": -99.9298095703125, "num_tokens": 38, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -118.03427124023438, "logits_per_token": -2.6297318307976973, "logits_per_char": -0.5983820932354041, "num_chars": 167}, {"sum_logits": -153.1151580810547, "num_tokens": 48, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -183.1252899169922, "logits_per_token": -3.189899126688639, "logits_per_char": -0.7121635259583939, "num_chars": 215}, {"sum_logits": -103.53919982910156, "num_tokens": 34, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -113.00369262695312, "logits_per_token": -3.0452705832088696, "logits_per_char": -0.69958918803447, "num_chars": 148}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 326, "native_id": 39149, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 133.469970703125, "incorrect_loss_raw": 79.48839314778645, "correct_loss_per_char": 0.8089089133522728, "incorrect_loss_per_char": 0.561194603386544, "correct_loss_per_token": 3.336749267578125, "incorrect_loss_per_token": 2.3021405680953806, "correct_loss_uncond": -21.575958251953125, "incorrect_loss_uncond": -26.05860646565755}, "model_output": [{"sum_logits": -80.20431518554688, "num_tokens": 44, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -107.29924011230469, "logits_per_token": -1.8228253451260654, "logits_per_char": -0.42890008120613304, "num_chars": 187}, {"sum_logits": -84.57218933105469, "num_tokens": 29, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -111.01209259033203, "logits_per_token": -2.916282390726024, "logits_per_char": -0.7047682444254557, "num_chars": 120}, {"sum_logits": -73.68867492675781, "num_tokens": 34, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -98.32966613769531, "logits_per_token": -2.1673139684340534, "logits_per_char": -0.5499154845280434, "num_chars": 134}, {"sum_logits": -133.469970703125, "num_tokens": 40, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -155.04592895507812, "logits_per_token": -3.336749267578125, "logits_per_char": -0.8089089133522728, "num_chars": 165}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 327, "native_id": 22070, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 156.15858459472656, "incorrect_loss_raw": 107.8081563313802, "correct_loss_per_char": 0.7692541113040717, "incorrect_loss_per_char": 0.5750088523736985, "correct_loss_per_token": 3.25330384572347, "incorrect_loss_per_token": 2.7128224083506765, "correct_loss_uncond": -27.8572998046875, "incorrect_loss_uncond": -20.238479614257812}, "model_output": [{"sum_logits": -151.86587524414062, "num_tokens": 46, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -167.61907958984375, "logits_per_token": -3.3014320705247964, "logits_per_char": -0.6489994668553019, "num_chars": 234}, {"sum_logits": -96.04668426513672, "num_tokens": 40, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -126.35296630859375, "logits_per_token": -2.401167106628418, "logits_per_char": -0.5248452692083974, "num_chars": 183}, {"sum_logits": -156.15858459472656, "num_tokens": 48, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -184.01588439941406, "logits_per_token": -3.25330384572347, "logits_per_char": -0.7692541113040717, "num_chars": 203}, {"sum_logits": -75.51190948486328, "num_tokens": 31, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -90.16786193847656, "logits_per_token": -2.4358680478988157, "logits_per_char": -0.5511818210573962, "num_chars": 137}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 328, "native_id": 47542, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 115.00961303710938, "incorrect_loss_raw": 120.3456319173177, "correct_loss_per_char": 0.5157381750543021, "incorrect_loss_per_char": 0.7236118678888553, "correct_loss_per_token": 2.396033604939779, "incorrect_loss_per_token": 3.056165949010726, "correct_loss_uncond": -25.838180541992188, "incorrect_loss_uncond": -12.460027058919271}, "model_output": [{"sum_logits": -115.00961303710938, "num_tokens": 48, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -140.84779357910156, "logits_per_token": -2.396033604939779, "logits_per_char": -0.5157381750543021, "num_chars": 223}, {"sum_logits": -138.44580078125, "num_tokens": 43, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -155.3616943359375, "logits_per_token": -3.2196697856104652, "logits_per_char": -0.8390654592803031, "num_chars": 165}, {"sum_logits": -87.23098754882812, "num_tokens": 32, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -98.09169006347656, "logits_per_token": -2.725968360900879, "logits_per_char": -0.6230784824916294, "num_chars": 140}, {"sum_logits": -135.360107421875, "num_tokens": 42, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -144.96359252929688, "logits_per_token": -3.2228597005208335, "logits_per_char": -0.7086916618946335, "num_chars": 191}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 329, "native_id": 35734, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 83.726806640625, "incorrect_loss_raw": 86.18579228719075, "correct_loss_per_char": 0.41655127681902987, "incorrect_loss_per_char": 0.582656160978898, "correct_loss_per_token": 1.9934953962053572, "incorrect_loss_per_token": 2.6909949868822856, "correct_loss_uncond": -43.76073455810547, "incorrect_loss_uncond": -38.63242975870768}, "model_output": [{"sum_logits": -130.0269775390625, "num_tokens": 42, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -162.6197509765625, "logits_per_token": -3.0958804175967263, "logits_per_char": -0.6702421522632087, "num_chars": 194}, {"sum_logits": -55.38760757446289, "num_tokens": 27, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -92.93601989746094, "logits_per_token": -2.0513928731282554, "logits_per_char": -0.4361228942871094, "num_chars": 127}, {"sum_logits": -73.14279174804688, "num_tokens": 25, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -118.89889526367188, "logits_per_token": -2.925711669921875, "logits_per_char": -0.6416034363863761, "num_chars": 114}, {"sum_logits": -83.726806640625, "num_tokens": 42, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -127.48754119873047, "logits_per_token": -1.9934953962053572, "logits_per_char": -0.41655127681902987, "num_chars": 201}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 330, "native_id": 11904, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 47.479393005371094, "incorrect_loss_raw": 45.11078135172526, "correct_loss_per_char": 0.6416134189915013, "incorrect_loss_per_char": 0.651082868844909, "correct_loss_per_token": 3.1652928670247396, "incorrect_loss_per_token": 3.1093610354832237, "correct_loss_uncond": -27.59906005859375, "incorrect_loss_uncond": -22.380849202473957}, "model_output": [{"sum_logits": -47.479393005371094, "num_tokens": 15, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -75.07845306396484, "logits_per_token": -3.1652928670247396, "logits_per_char": -0.6416134189915013, "num_chars": 74}, {"sum_logits": -59.531837463378906, "num_tokens": 14, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -82.74101257324219, "logits_per_token": -4.252274104527065, "logits_per_char": -0.9158744225135216, "num_chars": 65}, {"sum_logits": -52.34661102294922, "num_tokens": 16, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -77.7613296508789, "logits_per_token": -3.271663188934326, "logits_per_char": -0.5948478525335138, "num_chars": 88}, {"sum_logits": -23.453895568847656, "num_tokens": 13, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -41.97254943847656, "logits_per_token": -1.8041458129882812, "logits_per_char": -0.4425263314876916, "num_chars": 53}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 331, "native_id": 6841, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.7304229736328125, "incorrect_loss_raw": 36.952993392944336, "correct_loss_per_char": 0.2122378879123264, "incorrect_loss_per_char": 0.6741706261527999, "correct_loss_per_token": 1.1460845947265625, "incorrect_loss_per_token": 3.2232209683152675, "correct_loss_uncond": -24.871898651123047, "incorrect_loss_uncond": -23.095314661661785}, "model_output": [{"sum_logits": -30.560291290283203, "num_tokens": 11, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -55.17241287231445, "logits_per_token": -2.778208299116655, "logits_per_char": -0.6643541584844175, "num_chars": 46}, {"sum_logits": -51.17167282104492, "num_tokens": 14, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -72.9822998046875, "logits_per_token": -3.6551194872174944, "logits_per_char": -0.6477426939372775, "num_chars": 79}, {"sum_logits": -29.127016067504883, "num_tokens": 9, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -51.990211486816406, "logits_per_token": -3.236335118611654, "logits_per_char": -0.7104150260367045, "num_chars": 41}, {"sum_logits": -5.7304229736328125, "num_tokens": 5, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -30.60232162475586, "logits_per_token": -1.1460845947265625, "logits_per_char": -0.2122378879123264, "num_chars": 27}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 332, "native_id": 30395, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 70.116943359375, "incorrect_loss_raw": 101.79889678955078, "correct_loss_per_char": 0.5008353097098214, "incorrect_loss_per_char": 0.6629412129389155, "correct_loss_per_token": 2.1911544799804688, "incorrect_loss_per_token": 2.960420066866524, "correct_loss_uncond": -25.359222412109375, "incorrect_loss_uncond": -25.0782953898112}, "model_output": [{"sum_logits": -74.22759246826172, "num_tokens": 28, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -88.45130920410156, "logits_per_token": -2.650985445295061, "logits_per_char": -0.6454573258109715, "num_chars": 115}, {"sum_logits": -70.116943359375, "num_tokens": 32, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -95.47616577148438, "logits_per_token": -2.1911544799804688, "logits_per_char": -0.5008353097098214, "num_chars": 140}, {"sum_logits": -139.1055450439453, "num_tokens": 44, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -175.13429260253906, "logits_per_token": -3.161489660089666, "logits_per_char": -0.6351851371869649, "num_chars": 219}, {"sum_logits": -92.06355285644531, "num_tokens": 30, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -117.04597473144531, "logits_per_token": -3.0687850952148437, "logits_per_char": -0.7081811758188101, "num_chars": 130}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 333, "native_id": 19022, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.42997932434082, "incorrect_loss_raw": 9.628815650939941, "correct_loss_per_char": 0.2554539189194188, "incorrect_loss_per_char": 0.31459746778153425, "correct_loss_per_token": 1.2042827606201172, "incorrect_loss_per_token": 1.446169260569981, "correct_loss_uncond": -25.966970443725586, "incorrect_loss_uncond": -25.430118878682453}, "model_output": [{"sum_logits": -12.15086555480957, "num_tokens": 8, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -36.18963623046875, "logits_per_token": -1.5188581943511963, "logits_per_char": -0.35737839867086973, "num_chars": 34}, {"sum_logits": -9.230667114257812, "num_tokens": 7, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -35.47917556762695, "logits_per_token": -1.318666730608259, "logits_per_char": -0.2977634552986391, "num_chars": 31}, {"sum_logits": -8.42997932434082, "num_tokens": 7, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -34.396949768066406, "logits_per_token": -1.2042827606201172, "logits_per_char": -0.2554539189194188, "num_chars": 33}, {"sum_logits": -7.504914283752441, "num_tokens": 5, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -33.507991790771484, "logits_per_token": -1.5009828567504884, "logits_per_char": -0.2886505493750939, "num_chars": 26}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 334, "native_id": 11944, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 70.99061584472656, "incorrect_loss_raw": 109.71944427490234, "correct_loss_per_char": 0.35495307922363284, "incorrect_loss_per_char": 0.6996408158472304, "correct_loss_per_token": 1.8681741011770148, "incorrect_loss_per_token": 3.2832219920661765, "correct_loss_uncond": -37.70380401611328, "incorrect_loss_uncond": -18.667246500651043}, "model_output": [{"sum_logits": -70.99061584472656, "num_tokens": 38, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -108.69441986083984, "logits_per_token": -1.8681741011770148, "logits_per_char": -0.35495307922363284, "num_chars": 200}, {"sum_logits": -108.30577087402344, "num_tokens": 37, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -124.28849029541016, "logits_per_token": -2.927182996595228, "logits_per_char": -0.6685541411976755, "num_chars": 162}, {"sum_logits": -142.25372314453125, "num_tokens": 39, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -162.55474853515625, "logits_per_token": -3.6475313626802883, "logits_per_char": -0.7409048080444336, "num_chars": 192}, {"sum_logits": -78.59883880615234, "num_tokens": 24, "num_tokens_all": 447, "is_greedy": false, "sum_logits_uncond": -98.31683349609375, "logits_per_token": -3.274951616923014, "logits_per_char": -0.689463498299582, "num_chars": 114}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 335, "native_id": 27761, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 93.25202941894531, "incorrect_loss_raw": 88.13752746582031, "correct_loss_per_char": 0.4002233022272331, "incorrect_loss_per_char": 0.6289497729939622, "correct_loss_per_token": 1.695491443980824, "incorrect_loss_per_token": 2.848952955669827, "correct_loss_uncond": -21.055099487304688, "incorrect_loss_uncond": -19.178489685058594}, "model_output": [{"sum_logits": -109.93232727050781, "num_tokens": 30, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -125.35638427734375, "logits_per_token": -3.6644109090169272, "logits_per_char": -0.774171318806393, "num_chars": 142}, {"sum_logits": -93.25202941894531, "num_tokens": 55, "num_tokens_all": 498, "is_greedy": false, "sum_logits_uncond": -114.30712890625, "logits_per_token": -1.695491443980824, "logits_per_char": -0.4002233022272331, "num_chars": 233}, {"sum_logits": -54.500465393066406, "num_tokens": 31, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -71.59400939941406, "logits_per_token": -1.7580795288085938, "logits_per_char": -0.4325433761354477, "num_chars": 126}, {"sum_logits": -99.97978973388672, "num_tokens": 32, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -124.9976577758789, "logits_per_token": -3.12436842918396, "logits_per_char": -0.6801346240400457, "num_chars": 147}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 336, "native_id": 17368, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 97.84467315673828, "incorrect_loss_raw": 126.33288319905598, "correct_loss_per_char": 0.4726795804673347, "incorrect_loss_per_char": 0.5751027090507641, "correct_loss_per_token": 2.2237425717440518, "incorrect_loss_per_token": 2.6041095471255993, "correct_loss_uncond": -25.54161834716797, "incorrect_loss_uncond": -12.553278605143229}, "model_output": [{"sum_logits": -188.48373413085938, "num_tokens": 63, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -204.96243286132812, "logits_per_token": -2.9918053036644343, "logits_per_char": -0.6282791137695313, "num_chars": 300}, {"sum_logits": -71.68238830566406, "num_tokens": 36, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -76.4968032836914, "logits_per_token": -1.991177452935113, "logits_per_char": -0.4715946599056846, "num_chars": 152}, {"sum_logits": -97.84467315673828, "num_tokens": 44, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -123.38629150390625, "logits_per_token": -2.2237425717440518, "logits_per_char": -0.4726795804673347, "num_chars": 207}, {"sum_logits": -118.83252716064453, "num_tokens": 42, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -135.19924926757812, "logits_per_token": -2.8293458847772506, "logits_per_char": -0.6254343534770764, "num_chars": 190}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 337, "native_id": 37365, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 16.70504379272461, "incorrect_loss_raw": 46.63345464070638, "correct_loss_per_char": 0.618705325656467, "incorrect_loss_per_char": 0.846447231589604, "correct_loss_per_token": 2.7841739654541016, "incorrect_loss_per_token": 3.7570508032134087, "correct_loss_uncond": -19.777008056640625, "incorrect_loss_uncond": -18.892114003499348}, "model_output": [{"sum_logits": -39.05869674682617, "num_tokens": 11, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -59.8505859375, "logits_per_token": -3.550790613347834, "logits_per_char": -0.7369565423929466, "num_chars": 53}, {"sum_logits": -50.93659973144531, "num_tokens": 16, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -74.18402099609375, "logits_per_token": -3.183537483215332, "logits_per_char": -0.8782172367490572, "num_chars": 58}, {"sum_logits": -49.905067443847656, "num_tokens": 11, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -62.54209899902344, "logits_per_token": -4.53682431307706, "logits_per_char": -0.9241679156268084, "num_chars": 54}, {"sum_logits": -16.70504379272461, "num_tokens": 6, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -36.482051849365234, "logits_per_token": -2.7841739654541016, "logits_per_char": -0.618705325656467, "num_chars": 27}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 338, "native_id": 34936, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 33.27033996582031, "incorrect_loss_raw": 82.83770370483398, "correct_loss_per_char": 0.3656081314925309, "incorrect_loss_per_char": 0.5277484832051677, "correct_loss_per_token": 1.6635169982910156, "incorrect_loss_per_token": 2.5189561576538533, "correct_loss_uncond": -31.708175659179688, "incorrect_loss_uncond": -28.44467290242513}, "model_output": [{"sum_logits": -62.0338249206543, "num_tokens": 34, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -93.29741668701172, "logits_per_token": -1.8245242623721851, "logits_per_char": -0.38771140575408936, "num_chars": 160}, {"sum_logits": -88.9223861694336, "num_tokens": 27, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -114.88763427734375, "logits_per_token": -3.293421709979022, "logits_per_char": -0.6443651171698086, "num_chars": 138}, {"sum_logits": -33.27033996582031, "num_tokens": 20, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -64.978515625, "logits_per_token": -1.6635169982910156, "logits_per_char": -0.3656081314925309, "num_chars": 91}, {"sum_logits": -97.55690002441406, "num_tokens": 40, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -125.66207885742188, "logits_per_token": -2.4389225006103517, "logits_per_char": -0.5511689266916049, "num_chars": 177}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 339, "native_id": 5550, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 76.30982971191406, "incorrect_loss_raw": 79.64574178059895, "correct_loss_per_char": 0.5529697805211164, "incorrect_loss_per_char": 0.6136669708040041, "correct_loss_per_token": 2.312419082179214, "incorrect_loss_per_token": 2.8024601229682227, "correct_loss_uncond": -18.006072998046875, "incorrect_loss_uncond": -17.40880584716797}, "model_output": [{"sum_logits": -88.81382751464844, "num_tokens": 22, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -96.65802001953125, "logits_per_token": -4.036992159756747, "logits_per_char": -0.944827952283494, "num_chars": 94}, {"sum_logits": -77.77127075195312, "num_tokens": 46, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -101.04776763916016, "logits_per_token": -1.6906797989555027, "logits_per_char": -0.3793720524485518, "num_chars": 205}, {"sum_logits": -72.35212707519531, "num_tokens": 27, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -93.45785522460938, "logits_per_token": -2.679708410192419, "logits_per_char": -0.5168009076799666, "num_chars": 140}, {"sum_logits": -76.30982971191406, "num_tokens": 33, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -94.31590270996094, "logits_per_token": -2.312419082179214, "logits_per_char": -0.5529697805211164, "num_chars": 138}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 340, "native_id": 19355, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 64.92259979248047, "incorrect_loss_raw": 104.30883534749348, "correct_loss_per_char": 0.42995099200318193, "incorrect_loss_per_char": 0.5750261481370318, "correct_loss_per_token": 1.9094882291906021, "incorrect_loss_per_token": 2.481370885699804, "correct_loss_uncond": -20.96947479248047, "incorrect_loss_uncond": -22.236470540364582}, "model_output": [{"sum_logits": -154.90972900390625, "num_tokens": 45, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -172.853759765625, "logits_per_token": -3.442438422309028, "logits_per_char": -0.7341693317720676, "num_chars": 211}, {"sum_logits": -64.92259979248047, "num_tokens": 34, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -85.89207458496094, "logits_per_token": -1.9094882291906021, "logits_per_char": -0.42995099200318193, "num_chars": 151}, {"sum_logits": -83.62057495117188, "num_tokens": 42, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -102.67887878417969, "logits_per_token": -1.990966070265997, "logits_per_char": -0.4778318568638393, "num_chars": 175}, {"sum_logits": -74.39620208740234, "num_tokens": 37, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -104.10327911376953, "logits_per_token": -2.0107081645243876, "logits_per_char": -0.5130772557751886, "num_chars": 145}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 341, "native_id": 47797, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 166.49844360351562, "incorrect_loss_raw": 121.5967508951823, "correct_loss_per_char": 0.6259339985094572, "incorrect_loss_per_char": 0.7530476094529078, "correct_loss_per_token": 2.7294826820248463, "incorrect_loss_per_token": 3.7281761909076505, "correct_loss_uncond": -46.053558349609375, "incorrect_loss_uncond": -18.65789540608724}, "model_output": [{"sum_logits": -159.64996337890625, "num_tokens": 33, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -174.5014190673828, "logits_per_token": -4.837877678148674, "logits_per_char": -0.9559878046641093, "num_chars": 167}, {"sum_logits": -166.49844360351562, "num_tokens": 61, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -212.552001953125, "logits_per_token": -2.7294826820248463, "logits_per_char": -0.6259339985094572, "num_chars": 266}, {"sum_logits": -108.44955444335938, "num_tokens": 36, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -129.22003173828125, "logits_per_token": -3.012487623426649, "logits_per_char": -0.6024975246853299, "num_chars": 180}, {"sum_logits": -96.69073486328125, "num_tokens": 29, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -117.04248809814453, "logits_per_token": -3.3341632711476294, "logits_per_char": -0.7006574990092844, "num_chars": 138}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 342, "native_id": 21081, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 82.56513214111328, "incorrect_loss_raw": 108.88609313964844, "correct_loss_per_char": 0.5361372216955408, "incorrect_loss_per_char": 0.6470533304028373, "correct_loss_per_token": 2.501973701245857, "incorrect_loss_per_token": 3.286780877830156, "correct_loss_uncond": -23.840553283691406, "incorrect_loss_uncond": -18.437759399414062}, "model_output": [{"sum_logits": -94.65464782714844, "num_tokens": 32, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -113.84951782226562, "logits_per_token": -2.9579577445983887, "logits_per_char": -0.5807033609027511, "num_chars": 163}, {"sum_logits": -177.5428466796875, "num_tokens": 48, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -192.873291015625, "logits_per_token": -3.6988093058268228, "logits_per_char": -0.7619864664364271, "num_chars": 233}, {"sum_logits": -82.56513214111328, "num_tokens": 33, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -106.40568542480469, "logits_per_token": -2.501973701245857, "logits_per_char": -0.5361372216955408, "num_chars": 154}, {"sum_logits": -54.460784912109375, "num_tokens": 17, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -75.24874877929688, "logits_per_token": -3.2035755830652572, "logits_per_char": -0.5984701638693338, "num_chars": 91}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 343, "native_id": 26280, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 92.791015625, "incorrect_loss_raw": 116.40069071451823, "correct_loss_per_char": 0.57994384765625, "incorrect_loss_per_char": 0.6578390402904429, "correct_loss_per_token": 3.1996901939655173, "incorrect_loss_per_token": 2.9593730546301025, "correct_loss_uncond": -35.941864013671875, "incorrect_loss_uncond": -23.30536142985026}, "model_output": [{"sum_logits": -143.6308135986328, "num_tokens": 46, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -162.36276245117188, "logits_per_token": -3.1224089912746265, "logits_per_char": -0.7599513947017609, "num_chars": 189}, {"sum_logits": -94.27986145019531, "num_tokens": 31, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -117.08724212646484, "logits_per_token": -3.041285853232107, "logits_per_char": -0.6457524756862693, "num_chars": 146}, {"sum_logits": -111.29139709472656, "num_tokens": 41, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -139.66815185546875, "logits_per_token": -2.714424319383575, "logits_per_char": -0.5678132504832988, "num_chars": 196}, {"sum_logits": -92.791015625, "num_tokens": 29, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -128.73287963867188, "logits_per_token": -3.1996901939655173, "logits_per_char": -0.57994384765625, "num_chars": 160}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 344, "native_id": 43816, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 25.522964477539062, "incorrect_loss_raw": 72.90115610758464, "correct_loss_per_char": 0.5671769883897569, "incorrect_loss_per_char": 0.8875869991926894, "correct_loss_per_token": 2.3202694979580967, "incorrect_loss_per_token": 3.8503299967447915, "correct_loss_uncond": -28.536075592041016, "incorrect_loss_uncond": -24.03107452392578}, "model_output": [{"sum_logits": -25.522964477539062, "num_tokens": 11, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -54.05904006958008, "logits_per_token": -2.3202694979580967, "logits_per_char": -0.5671769883897569, "num_chars": 45}, {"sum_logits": -110.70989227294922, "num_tokens": 25, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -137.49978637695312, "logits_per_token": -4.428395690917969, "logits_per_char": -1.096137547256923, "num_chars": 101}, {"sum_logits": -46.71418762207031, "num_tokens": 10, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -65.38307189941406, "logits_per_token": -4.671418762207031, "logits_per_char": -0.9939188855759641, "num_chars": 47}, {"sum_logits": -61.279388427734375, "num_tokens": 25, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -87.91383361816406, "logits_per_token": -2.451175537109375, "logits_per_char": -0.572704564745181, "num_chars": 107}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 345, "native_id": 17795, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 20.32268524169922, "incorrect_loss_raw": 48.64331309000651, "correct_loss_per_char": 0.5806481497628349, "incorrect_loss_per_char": 0.8228430282765036, "correct_loss_per_token": 2.903240748814174, "incorrect_loss_per_token": 3.9538452816330625, "correct_loss_uncond": -13.036834716796875, "incorrect_loss_uncond": -16.404006958007812}, "model_output": [{"sum_logits": -51.094932556152344, "num_tokens": 18, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -75.25770568847656, "logits_per_token": -2.8386073642306857, "logits_per_char": -0.6812657674153646, "num_chars": 75}, {"sum_logits": -50.662940979003906, "num_tokens": 11, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -66.16754913330078, "logits_per_token": -4.605721907182173, "logits_per_char": -0.9211443814364346, "num_chars": 55}, {"sum_logits": -44.17206573486328, "num_tokens": 10, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -53.716705322265625, "logits_per_token": -4.417206573486328, "logits_per_char": -0.8661189359777114, "num_chars": 51}, {"sum_logits": -20.32268524169922, "num_tokens": 7, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -33.359519958496094, "logits_per_token": -2.903240748814174, "logits_per_char": -0.5806481497628349, "num_chars": 35}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 346, "native_id": 4089, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 35.575958251953125, "incorrect_loss_raw": 45.53131866455078, "correct_loss_per_char": 0.3784676409782247, "incorrect_loss_per_char": 0.5057665992007195, "correct_loss_per_token": 1.694093250093006, "incorrect_loss_per_token": 2.487483346273029, "correct_loss_uncond": -26.720680236816406, "incorrect_loss_uncond": -23.648967742919922}, "model_output": [{"sum_logits": -24.508716583251953, "num_tokens": 16, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -51.81644058227539, "logits_per_token": -1.531794786453247, "logits_per_char": -0.3403988414340549, "num_chars": 72}, {"sum_logits": -42.35068130493164, "num_tokens": 14, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -61.82201385498047, "logits_per_token": -3.0250486646379744, "logits_per_char": -0.6320997209691289, "num_chars": 67}, {"sum_logits": -35.575958251953125, "num_tokens": 21, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -62.29663848876953, "logits_per_token": -1.694093250093006, "logits_per_char": -0.3784676409782247, "num_chars": 94}, {"sum_logits": -69.73455810546875, "num_tokens": 24, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -93.90240478515625, "logits_per_token": -2.9056065877278647, "logits_per_char": -0.5448012351989746, "num_chars": 128}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 347, "native_id": 1831, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 59.771121978759766, "incorrect_loss_raw": 88.1909408569336, "correct_loss_per_char": 0.44605314909522215, "incorrect_loss_per_char": 0.5165877203389432, "correct_loss_per_token": 1.9923707326253255, "incorrect_loss_per_token": 2.4549024496856187, "correct_loss_uncond": -34.145999908447266, "incorrect_loss_uncond": -22.71422831217448}, "model_output": [{"sum_logits": -67.2099609375, "num_tokens": 33, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -97.49835205078125, "logits_per_token": -2.0366654829545454, "logits_per_char": -0.4603421982020548, "num_chars": 146}, {"sum_logits": -59.771121978759766, "num_tokens": 30, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -93.91712188720703, "logits_per_token": -1.9923707326253255, "logits_per_char": -0.44605314909522215, "num_chars": 134}, {"sum_logits": -86.9832763671875, "num_tokens": 33, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -108.74298095703125, "logits_per_token": -2.635856859611742, "logits_per_char": -0.5402687973117236, "num_chars": 161}, {"sum_logits": -110.37958526611328, "num_tokens": 41, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -126.47417449951172, "logits_per_token": -2.692185006490568, "logits_per_char": -0.5491521655030511, "num_chars": 201}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 348, "native_id": 49798, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 16.651643753051758, "incorrect_loss_raw": 24.95553970336914, "correct_loss_per_char": 0.33982946434799505, "incorrect_loss_per_char": 0.5078081638934961, "correct_loss_per_token": 1.6651643753051757, "incorrect_loss_per_token": 2.5928593452530677, "correct_loss_uncond": -29.459505081176758, "incorrect_loss_uncond": -23.86685307820638}, "model_output": [{"sum_logits": -21.151775360107422, "num_tokens": 11, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -44.72563171386719, "logits_per_token": -1.9228886691006748, "logits_per_char": -0.391699543705693, "num_chars": 54}, {"sum_logits": -33.10685729980469, "num_tokens": 8, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -50.35509490966797, "logits_per_token": -4.138357162475586, "logits_per_char": -0.7882585071382069, "num_chars": 42}, {"sum_logits": -20.607986450195312, "num_tokens": 12, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -51.386451721191406, "logits_per_token": -1.7173322041829426, "logits_per_char": -0.34346644083658856, "num_chars": 60}, {"sum_logits": -16.651643753051758, "num_tokens": 10, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -46.111148834228516, "logits_per_token": -1.6651643753051757, "logits_per_char": -0.33982946434799505, "num_chars": 49}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 349, "native_id": 49624, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 74.78887176513672, "incorrect_loss_raw": 123.23092905680339, "correct_loss_per_char": 0.5419483461241791, "incorrect_loss_per_char": 0.6713813676357328, "correct_loss_per_token": 2.1996726989746094, "incorrect_loss_per_token": 3.1489688356394367, "correct_loss_uncond": -23.617332458496094, "incorrect_loss_uncond": -11.34906005859375}, "model_output": [{"sum_logits": -74.78887176513672, "num_tokens": 34, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -98.40620422363281, "logits_per_token": -2.1996726989746094, "logits_per_char": -0.5419483461241791, "num_chars": 138}, {"sum_logits": -98.12557983398438, "num_tokens": 36, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -113.00526428222656, "logits_per_token": -2.7257105509440103, "logits_per_char": -0.6057134557653356, "num_chars": 162}, {"sum_logits": -111.49413299560547, "num_tokens": 41, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -126.00386810302734, "logits_per_token": -2.719369097453792, "logits_per_char": -0.6159896850585938, "num_chars": 181}, {"sum_logits": -160.0730743408203, "num_tokens": 40, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -164.7308349609375, "logits_per_token": -4.0018268585205075, "logits_per_char": -0.7924409620832689, "num_chars": 202}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 350, "native_id": 42365, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 95.69175720214844, "incorrect_loss_raw": 77.0174077351888, "correct_loss_per_char": 0.5036408273797286, "incorrect_loss_per_char": 0.5238461776460274, "correct_loss_per_token": 2.453634800055088, "incorrect_loss_per_token": 2.781801785305398, "correct_loss_uncond": -37.68708801269531, "incorrect_loss_uncond": -28.098988850911457}, "model_output": [{"sum_logits": -81.03050994873047, "num_tokens": 36, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -118.16136932373047, "logits_per_token": -2.250847498575846, "logits_per_char": -0.46838445057069633, "num_chars": 173}, {"sum_logits": -95.69175720214844, "num_tokens": 39, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -133.37884521484375, "logits_per_token": -2.453634800055088, "logits_per_char": -0.5036408273797286, "num_chars": 190}, {"sum_logits": -99.2528305053711, "num_tokens": 29, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -120.81185913085938, "logits_per_token": -3.422511396736934, "logits_per_char": -0.661685536702474, "num_chars": 150}, {"sum_logits": -50.768882751464844, "num_tokens": 19, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -76.37596130371094, "logits_per_token": -2.6720464606034127, "logits_per_char": -0.44146854566491167, "num_chars": 115}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 351, "native_id": 35815, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 23.174894332885742, "incorrect_loss_raw": 32.44450314839681, "correct_loss_per_char": 0.6621398380824498, "incorrect_loss_per_char": 0.8560542686541686, "correct_loss_per_token": 3.3106991904122487, "incorrect_loss_per_token": 3.4989411195119224, "correct_loss_uncond": -12.032136917114258, "incorrect_loss_uncond": -9.512627283732096}, "model_output": [{"sum_logits": -28.690753936767578, "num_tokens": 10, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -40.30669021606445, "logits_per_token": -2.869075393676758, "logits_per_char": -0.6672268357387808, "num_chars": 43}, {"sum_logits": -23.174894332885742, "num_tokens": 7, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -35.20703125, "logits_per_token": -3.3106991904122487, "logits_per_char": -0.6621398380824498, "num_chars": 35}, {"sum_logits": -38.103858947753906, "num_tokens": 10, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -43.7667121887207, "logits_per_token": -3.810385894775391, "logits_per_char": -1.00273313020405, "num_chars": 38}, {"sum_logits": -30.538896560668945, "num_tokens": 8, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -41.79798889160156, "logits_per_token": -3.817362070083618, "logits_per_char": -0.8982028400196749, "num_chars": 34}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 352, "native_id": 36671, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 153.97409057617188, "incorrect_loss_raw": 91.80771128336589, "correct_loss_per_char": 0.6753249586674205, "incorrect_loss_per_char": 0.6552563751096746, "correct_loss_per_token": 3.0190998152190565, "incorrect_loss_per_token": 2.824827244247908, "correct_loss_uncond": -19.05352783203125, "incorrect_loss_uncond": -21.68859354654948}, "model_output": [{"sum_logits": -81.58277130126953, "num_tokens": 30, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -103.73831176757812, "logits_per_token": -2.7194257100423176, "logits_per_char": -0.6687112401743405, "num_chars": 122}, {"sum_logits": -153.97409057617188, "num_tokens": 51, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -173.02761840820312, "logits_per_token": -3.0190998152190565, "logits_per_char": -0.6753249586674205, "num_chars": 228}, {"sum_logits": -124.25541687011719, "num_tokens": 38, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -152.4771270751953, "logits_per_token": -3.2698793913188733, "logits_per_char": -0.6644674698936748, "num_chars": 187}, {"sum_logits": -69.58494567871094, "num_tokens": 28, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -84.27347564697266, "logits_per_token": -2.4851766313825334, "logits_per_char": -0.6325904152610086, "num_chars": 110}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 353, "native_id": 33120, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 19.307706832885742, "incorrect_loss_raw": 49.86470031738281, "correct_loss_per_char": 0.5850820252389619, "incorrect_loss_per_char": 0.7947650643441787, "correct_loss_per_token": 2.758243833269392, "incorrect_loss_per_token": 3.6078515563348326, "correct_loss_uncond": -10.918163299560547, "incorrect_loss_uncond": -20.807947794596355}, "model_output": [{"sum_logits": -57.81727600097656, "num_tokens": 15, "num_tokens_all": 427, "is_greedy": false, "sum_logits_uncond": -80.91448974609375, "logits_per_token": -3.8544850667317707, "logits_per_char": -0.9636212666829427, "num_chars": 60}, {"sum_logits": -19.307706832885742, "num_tokens": 7, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -30.22587013244629, "logits_per_token": -2.758243833269392, "logits_per_char": -0.5850820252389619, "num_chars": 33}, {"sum_logits": -56.68897247314453, "num_tokens": 15, "num_tokens_all": 427, "is_greedy": false, "sum_logits_uncond": -73.5802001953125, "logits_per_token": -3.7792648315429687, "logits_per_char": -0.7459075325413754, "num_chars": 76}, {"sum_logits": -35.087852478027344, "num_tokens": 11, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -57.52325439453125, "logits_per_token": -3.1898047707297583, "logits_per_char": -0.6747663938082181, "num_chars": 52}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 354, "native_id": 6991, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 55.60963821411133, "incorrect_loss_raw": 105.5353012084961, "correct_loss_per_char": 0.4212851379856919, "incorrect_loss_per_char": 0.5795650213269216, "correct_loss_per_token": 2.138832239004282, "incorrect_loss_per_token": 2.8118204934027062, "correct_loss_uncond": -36.691585540771484, "incorrect_loss_uncond": -20.430007934570312}, "model_output": [{"sum_logits": -137.00274658203125, "num_tokens": 44, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -156.61647033691406, "logits_per_token": -3.113698785955256, "logits_per_char": -0.7098587905804727, "num_chars": 193}, {"sum_logits": -55.60963821411133, "num_tokens": 26, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -92.30122375488281, "logits_per_token": -2.138832239004282, "logits_per_char": -0.4212851379856919, "num_chars": 132}, {"sum_logits": -83.76075744628906, "num_tokens": 36, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -102.6309814453125, "logits_per_token": -2.326687706841363, "logits_per_char": -0.440846091822574, "num_chars": 190}, {"sum_logits": -95.84239959716797, "num_tokens": 32, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -118.64847564697266, "logits_per_token": -2.995074987411499, "logits_per_char": -0.5879901815777177, "num_chars": 163}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 355, "native_id": 11198, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 84.82318878173828, "incorrect_loss_raw": 61.57605234781901, "correct_loss_per_char": 0.6236999175127815, "incorrect_loss_per_char": 0.7956172939279439, "correct_loss_per_token": 2.9249375441978716, "incorrect_loss_per_token": 3.9198712559647633, "correct_loss_uncond": -31.08123016357422, "incorrect_loss_uncond": -12.156224568684896}, "model_output": [{"sum_logits": -28.156539916992188, "num_tokens": 9, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -43.980506896972656, "logits_per_token": -3.128504435221354, "logits_per_char": -0.541471921480619, "num_chars": 52}, {"sum_logits": -114.39286804199219, "num_tokens": 29, "num_tokens_all": 427, "is_greedy": false, "sum_logits_uncond": -126.492431640625, "logits_per_token": -3.94458165662042, "logits_per_char": -0.8411240297205308, "num_chars": 136}, {"sum_logits": -42.178749084472656, "num_tokens": 9, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -50.72389221191406, "logits_per_token": -4.686527676052517, "logits_per_char": -1.0042559305826824, "num_chars": 42}, {"sum_logits": -84.82318878173828, "num_tokens": 29, "num_tokens_all": 427, "is_greedy": false, "sum_logits_uncond": -115.9044189453125, "logits_per_token": -2.9249375441978716, "logits_per_char": -0.6236999175127815, "num_chars": 136}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 356, "native_id": 48936, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 141.04336547851562, "incorrect_loss_raw": 123.32555643717448, "correct_loss_per_char": 0.7159561699417036, "incorrect_loss_per_char": 0.7780832359782881, "correct_loss_per_token": 2.8208673095703123, "incorrect_loss_per_token": 3.223831839972887, "correct_loss_uncond": -11.712722778320312, "incorrect_loss_uncond": -17.705586751302082}, "model_output": [{"sum_logits": -100.01985931396484, "num_tokens": 34, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -125.74153900146484, "logits_per_token": -2.9417605680577896, "logits_per_char": -0.7464168605519764, "num_chars": 134}, {"sum_logits": -119.7677230834961, "num_tokens": 37, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -123.07556915283203, "logits_per_token": -3.2369654887431376, "logits_per_char": -0.7393069326141735, "num_chars": 162}, {"sum_logits": -141.04336547851562, "num_tokens": 50, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -152.75608825683594, "logits_per_token": -2.8208673095703123, "logits_per_char": -0.7159561699417036, "num_chars": 197}, {"sum_logits": -150.1890869140625, "num_tokens": 43, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -174.2763214111328, "logits_per_token": -3.4927694631177326, "logits_per_char": -0.8485259147687146, "num_chars": 177}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 357, "native_id": 32726, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 98.25444030761719, "incorrect_loss_raw": 95.21237182617188, "correct_loss_per_char": 0.5519912376832427, "incorrect_loss_per_char": 0.580800553920627, "correct_loss_per_token": 2.45636100769043, "incorrect_loss_per_token": 2.5794696572386187, "correct_loss_uncond": -21.10344696044922, "incorrect_loss_uncond": -19.02454884847005}, "model_output": [{"sum_logits": -138.46510314941406, "num_tokens": 54, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -151.66891479492188, "logits_per_token": -2.564168576841001, "logits_per_char": -0.6073030839886582, "num_chars": 228}, {"sum_logits": -98.25444030761719, "num_tokens": 40, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -119.3578872680664, "logits_per_token": -2.45636100769043, "logits_per_char": -0.5519912376832427, "num_chars": 178}, {"sum_logits": -74.67521667480469, "num_tokens": 30, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -94.75724029541016, "logits_per_token": -2.4891738891601562, "logits_per_char": -0.5258818075690471, "num_chars": 142}, {"sum_logits": -72.49679565429688, "num_tokens": 27, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -96.28460693359375, "logits_per_token": -2.685066505714699, "logits_per_char": -0.6092167702041754, "num_chars": 119}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 358, "native_id": 19954, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 70.81576538085938, "incorrect_loss_raw": 81.29935201009114, "correct_loss_per_char": 0.5447366567758414, "incorrect_loss_per_char": 0.6440343110018937, "correct_loss_per_token": 2.0828166288488053, "incorrect_loss_per_token": 3.0623254242891114, "correct_loss_uncond": -37.57160949707031, "incorrect_loss_uncond": -18.247802734375}, "model_output": [{"sum_logits": -61.35148620605469, "num_tokens": 23, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -83.67393493652344, "logits_per_token": -2.667455922002378, "logits_per_char": -0.547781126839774, "num_chars": 112}, {"sum_logits": -70.81576538085938, "num_tokens": 34, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -108.38737487792969, "logits_per_token": -2.0828166288488053, "logits_per_char": -0.5447366567758414, "num_chars": 130}, {"sum_logits": -83.97885131835938, "num_tokens": 28, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -115.60362243652344, "logits_per_token": -2.9992446899414062, "logits_per_char": -0.6998237609863281, "num_chars": 120}, {"sum_logits": -98.56771850585938, "num_tokens": 28, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -99.36390686035156, "logits_per_token": -3.520275660923549, "logits_per_char": -0.684498045179579, "num_chars": 144}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 359, "native_id": 48650, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 206.89987182617188, "incorrect_loss_raw": 152.56249237060547, "correct_loss_per_char": 0.7957687377929688, "incorrect_loss_per_char": 0.8819713454627358, "correct_loss_per_token": 3.33709470687374, "incorrect_loss_per_token": 4.1619224512267055, "correct_loss_uncond": -30.6961669921875, "incorrect_loss_uncond": -17.182301839192707}, "model_output": [{"sum_logits": -158.02194213867188, "num_tokens": 33, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -171.6610107421875, "logits_per_token": -4.788543701171875, "logits_per_char": -0.9295408361098345, "num_chars": 170}, {"sum_logits": -206.89987182617188, "num_tokens": 62, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -237.59603881835938, "logits_per_token": -3.33709470687374, "logits_per_char": -0.7957687377929688, "num_chars": 260}, {"sum_logits": -190.07028198242188, "num_tokens": 47, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -214.30245971679688, "logits_per_token": -4.044048552817487, "logits_per_char": -0.8923487417015111, "num_chars": 213}, {"sum_logits": -109.59525299072266, "num_tokens": 30, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -123.27091217041016, "logits_per_token": -3.6531750996907553, "logits_per_char": -0.8240244585768621, "num_chars": 133}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 360, "native_id": 21125, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 79.26898193359375, "incorrect_loss_raw": 81.39506530761719, "correct_loss_per_char": 0.45296561104910715, "incorrect_loss_per_char": 0.5829634046485758, "correct_loss_per_token": 1.8434646961300871, "incorrect_loss_per_token": 2.782476064295885, "correct_loss_uncond": -29.690902709960938, "incorrect_loss_uncond": -22.09235382080078}, "model_output": [{"sum_logits": -56.37310791015625, "num_tokens": 23, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -78.98541259765625, "logits_per_token": -2.451004691745924, "logits_per_char": -0.5033313206263951, "num_chars": 112}, {"sum_logits": -68.86431884765625, "num_tokens": 22, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -77.94994354248047, "logits_per_token": -3.130196311257102, "logits_per_char": -0.6260392622514205, "num_chars": 110}, {"sum_logits": -118.94776916503906, "num_tokens": 43, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -153.5269012451172, "logits_per_token": -2.7662271898846296, "logits_per_char": -0.6195196310679117, "num_chars": 192}, {"sum_logits": -79.26898193359375, "num_tokens": 43, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -108.95988464355469, "logits_per_token": -1.8434646961300871, "logits_per_char": -0.45296561104910715, "num_chars": 175}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 361, "native_id": 11394, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 77.24365234375, "incorrect_loss_raw": 140.7526092529297, "correct_loss_per_char": 0.39011945628156564, "incorrect_loss_per_char": 0.5802598220577002, "correct_loss_per_token": 1.980606470352564, "incorrect_loss_per_token": 2.673286249476435, "correct_loss_uncond": -30.602439880371094, "incorrect_loss_uncond": -21.784085591634113}, "model_output": [{"sum_logits": -227.68740844726562, "num_tokens": 65, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -255.74383544921875, "logits_per_token": -3.5028832068810094, "logits_per_char": -0.7770901312193366, "num_chars": 293}, {"sum_logits": -60.71681213378906, "num_tokens": 33, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -83.4068374633789, "logits_per_token": -1.839903397993608, "logits_per_char": -0.3892103341909555, "num_chars": 156}, {"sum_logits": -133.85360717773438, "num_tokens": 50, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -148.45941162109375, "logits_per_token": -2.6770721435546876, "logits_per_char": -0.5744790007628084, "num_chars": 233}, {"sum_logits": -77.24365234375, "num_tokens": 39, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -107.8460922241211, "logits_per_token": -1.980606470352564, "logits_per_char": -0.39011945628156564, "num_chars": 198}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 362, "native_id": 25430, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 95.22068786621094, "incorrect_loss_raw": 102.22064208984375, "correct_loss_per_char": 0.4784959189256831, "incorrect_loss_per_char": 0.7189622463599203, "correct_loss_per_token": 2.0259720822598073, "incorrect_loss_per_token": 3.2487903573080263, "correct_loss_uncond": -27.63909912109375, "incorrect_loss_uncond": -18.11003875732422}, "model_output": [{"sum_logits": -95.22068786621094, "num_tokens": 47, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -122.85978698730469, "logits_per_token": -2.0259720822598073, "logits_per_char": -0.4784959189256831, "num_chars": 199}, {"sum_logits": -77.0305404663086, "num_tokens": 21, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -98.80609130859375, "logits_per_token": -3.6681209745861234, "logits_per_char": -0.8108477943821957, "num_chars": 95}, {"sum_logits": -104.7201156616211, "num_tokens": 33, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -115.25739288330078, "logits_per_token": -3.1733368382309424, "logits_per_char": -0.7933342095577356, "num_chars": 132}, {"sum_logits": -124.91127014160156, "num_tokens": 43, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -146.92855834960938, "logits_per_token": -2.9049132591070133, "logits_per_char": -0.5527047351398299, "num_chars": 226}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 363, "native_id": 20600, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 73.07768249511719, "incorrect_loss_raw": 93.95457712809245, "correct_loss_per_char": 0.7612258593241373, "incorrect_loss_per_char": 0.6371727033434526, "correct_loss_per_token": 2.7065808331524885, "incorrect_loss_per_token": 2.7207946604865856, "correct_loss_uncond": -28.48681640625, "incorrect_loss_uncond": -39.516743977864586}, "model_output": [{"sum_logits": -88.67698669433594, "num_tokens": 36, "num_tokens_all": 496, "is_greedy": false, "sum_logits_uncond": -132.25294494628906, "logits_per_token": -2.4632496303982205, "logits_per_char": -0.6201187880722793, "num_chars": 143}, {"sum_logits": -73.07768249511719, "num_tokens": 27, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -101.56449890136719, "logits_per_token": -2.7065808331524885, "logits_per_char": -0.7612258593241373, "num_chars": 96}, {"sum_logits": -103.6688003540039, "num_tokens": 33, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -143.85299682617188, "logits_per_token": -3.141478798606179, "logits_per_char": -0.6865483467152577, "num_chars": 151}, {"sum_logits": -89.5179443359375, "num_tokens": 35, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -124.30802154541016, "logits_per_token": -2.557655552455357, "logits_per_char": -0.6048509752428209, "num_chars": 148}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 364, "native_id": 4590, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 69.37091064453125, "incorrect_loss_raw": 118.3008524576823, "correct_loss_per_char": 0.45638757002981084, "incorrect_loss_per_char": 0.6078212304441948, "correct_loss_per_token": 1.9820260184151786, "incorrect_loss_per_token": 2.7000919438168953, "correct_loss_uncond": -35.47698974609375, "incorrect_loss_uncond": -23.02819315592448}, "model_output": [{"sum_logits": -69.37091064453125, "num_tokens": 35, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -104.847900390625, "logits_per_token": -1.9820260184151786, "logits_per_char": -0.45638757002981084, "num_chars": 152}, {"sum_logits": -153.31410217285156, "num_tokens": 44, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -174.55335998535156, "logits_per_token": -3.4844114130193535, "logits_per_char": -0.8026916344128354, "num_chars": 191}, {"sum_logits": -92.48175811767578, "num_tokens": 38, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -103.33221435546875, "logits_per_token": -2.4337304767809416, "logits_per_char": -0.5315043569981367, "num_chars": 174}, {"sum_logits": -109.10669708251953, "num_tokens": 50, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -146.1015625, "logits_per_token": -2.1821339416503904, "logits_per_char": -0.48926769992161223, "num_chars": 223}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 365, "native_id": 10969, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 101.58964538574219, "incorrect_loss_raw": 91.03074391682942, "correct_loss_per_char": 0.6083212298547437, "incorrect_loss_per_char": 0.5841758519533647, "correct_loss_per_token": 2.987930746639476, "incorrect_loss_per_token": 2.502738572003549, "correct_loss_uncond": -15.771247863769531, "incorrect_loss_uncond": -20.5868657430013}, "model_output": [{"sum_logits": -101.58964538574219, "num_tokens": 34, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -117.36089324951172, "logits_per_token": -2.987930746639476, "logits_per_char": -0.6083212298547437, "num_chars": 167}, {"sum_logits": -65.17314910888672, "num_tokens": 32, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -83.00223541259766, "logits_per_token": -2.03666090965271, "logits_per_char": -0.4557562874747323, "num_chars": 143}, {"sum_logits": -92.65240478515625, "num_tokens": 38, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -111.6034927368164, "logits_per_token": -2.4382211785567436, "logits_per_char": -0.5939256716997195, "num_chars": 156}, {"sum_logits": -115.26667785644531, "num_tokens": 38, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -140.24710083007812, "logits_per_token": -3.0333336278011926, "logits_per_char": -0.7028455966856422, "num_chars": 164}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 366, "native_id": 2030, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.467965126037598, "incorrect_loss_raw": 19.9042911529541, "correct_loss_per_char": 0.32765614645821706, "incorrect_loss_per_char": 0.7439152226190896, "correct_loss_per_token": 1.9113275210062664, "incorrect_loss_per_token": 2.8960810048239574, "correct_loss_uncond": -20.19406032562256, "incorrect_loss_uncond": -14.101391474405924}, "model_output": [{"sum_logits": -20.092529296875, "num_tokens": 6, "num_tokens_all": 391, "is_greedy": false, "sum_logits_uncond": -31.601165771484375, "logits_per_token": -3.3487548828125, "logits_per_char": -0.956787109375, "num_chars": 21}, {"sum_logits": -17.951417922973633, "num_tokens": 8, "num_tokens_all": 393, "is_greedy": false, "sum_logits_uncond": -32.032596588134766, "logits_per_token": -2.243927240371704, "logits_per_char": -0.4724057348150956, "num_chars": 38}, {"sum_logits": -21.668926239013672, "num_tokens": 7, "num_tokens_all": 392, "is_greedy": false, "sum_logits_uncond": -38.38328552246094, "logits_per_token": -3.0955608912876675, "logits_per_char": -0.8025528236671731, "num_chars": 27}, {"sum_logits": -11.467965126037598, "num_tokens": 6, "num_tokens_all": 391, "is_greedy": false, "sum_logits_uncond": -31.662025451660156, "logits_per_token": -1.9113275210062664, "logits_per_char": -0.32765614645821706, "num_chars": 35}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 367, "native_id": 14032, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 113.18399047851562, "incorrect_loss_raw": 123.62740071614583, "correct_loss_per_char": 0.7545599365234374, "incorrect_loss_per_char": 0.6923982112311334, "correct_loss_per_token": 3.5369997024536133, "incorrect_loss_per_token": 3.30577700283365, "correct_loss_uncond": -34.67201232910156, "incorrect_loss_uncond": -17.340103149414062}, "model_output": [{"sum_logits": -101.30782318115234, "num_tokens": 31, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -121.08457946777344, "logits_per_token": -3.2679942961662047, "logits_per_char": -0.6891688651779071, "num_chars": 147}, {"sum_logits": -90.43572235107422, "num_tokens": 31, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -107.5023193359375, "logits_per_token": -2.9172813661636843, "logits_per_char": -0.679967837226122, "num_chars": 133}, {"sum_logits": -179.13865661621094, "num_tokens": 48, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -194.31561279296875, "logits_per_token": -3.732055346171061, "logits_per_char": -0.7080579312893713, "num_chars": 253}, {"sum_logits": -113.18399047851562, "num_tokens": 32, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -147.8560028076172, "logits_per_token": -3.5369997024536133, "logits_per_char": -0.7545599365234374, "num_chars": 150}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 368, "native_id": 8446, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 49.343719482421875, "incorrect_loss_raw": 79.74750264485677, "correct_loss_per_char": 0.4011684510766006, "incorrect_loss_per_char": 0.6220629967347172, "correct_loss_per_token": 1.6447906494140625, "incorrect_loss_per_token": 2.524654126634785, "correct_loss_uncond": -28.84845733642578, "incorrect_loss_uncond": -22.05224863688151}, "model_output": [{"sum_logits": -82.35430908203125, "num_tokens": 32, "num_tokens_all": 442, "is_greedy": false, "sum_logits_uncond": -97.51203918457031, "logits_per_token": -2.5735721588134766, "logits_per_char": -0.722406220017818, "num_chars": 114}, {"sum_logits": -95.63787841796875, "num_tokens": 34, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -129.52969360351562, "logits_per_token": -2.812878776999081, "logits_per_char": -0.6333634332315812, "num_chars": 151}, {"sum_logits": -61.25032043457031, "num_tokens": 28, "num_tokens_all": 438, "is_greedy": false, "sum_logits_uncond": -78.3575210571289, "logits_per_token": -2.187511444091797, "logits_per_char": -0.5104193369547526, "num_chars": 120}, {"sum_logits": -49.343719482421875, "num_tokens": 30, "num_tokens_all": 440, "is_greedy": false, "sum_logits_uncond": -78.19217681884766, "logits_per_token": -1.6447906494140625, "logits_per_char": -0.4011684510766006, "num_chars": 123}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 369, "native_id": 30344, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 23.823345184326172, "incorrect_loss_raw": 39.01045354207357, "correct_loss_per_char": 0.5414396632801403, "incorrect_loss_per_char": 0.7474147819792614, "correct_loss_per_token": 2.165758653120561, "incorrect_loss_per_token": 3.372171548696665, "correct_loss_uncond": -23.566059112548828, "incorrect_loss_uncond": -17.95744450887044}, "model_output": [{"sum_logits": -46.16400909423828, "num_tokens": 11, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -58.333534240722656, "logits_per_token": -4.196728099476207, "logits_per_char": -0.7959311912799704, "num_chars": 58}, {"sum_logits": -37.373046875, "num_tokens": 13, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -62.35542297363281, "logits_per_token": -2.8748497596153846, "logits_per_char": -0.6673758370535714, "num_chars": 56}, {"sum_logits": -33.49430465698242, "num_tokens": 11, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -50.21473693847656, "logits_per_token": -3.044936786998402, "logits_per_char": -0.7789373176042423, "num_chars": 43}, {"sum_logits": -23.823345184326172, "num_tokens": 11, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -47.389404296875, "logits_per_token": -2.165758653120561, "logits_per_char": -0.5414396632801403, "num_chars": 44}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 370, "native_id": 41981, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 127.94940948486328, "incorrect_loss_raw": 112.84662628173828, "correct_loss_per_char": 0.6397470474243164, "incorrect_loss_per_char": 0.6895063025861153, "correct_loss_per_token": 3.0464145115443637, "incorrect_loss_per_token": 3.2256557379828554, "correct_loss_uncond": -17.30278778076172, "incorrect_loss_uncond": -14.545069376627604}, "model_output": [{"sum_logits": -127.94940948486328, "num_tokens": 42, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -145.252197265625, "logits_per_token": -3.0464145115443637, "logits_per_char": -0.6397470474243164, "num_chars": 200}, {"sum_logits": -89.33277893066406, "num_tokens": 25, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -105.26138305664062, "logits_per_token": -3.5733111572265623, "logits_per_char": -0.8195667791804042, "num_chars": 109}, {"sum_logits": -83.96355438232422, "num_tokens": 30, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -93.60071563720703, "logits_per_token": -2.7987851460774738, "logits_per_char": -0.6458734952486478, "num_chars": 130}, {"sum_logits": -165.24354553222656, "num_tokens": 50, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -183.31298828125, "logits_per_token": -3.304870910644531, "logits_per_char": -0.603078633329294, "num_chars": 274}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 371, "native_id": 8230, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 31.11069679260254, "incorrect_loss_raw": 107.88017018636067, "correct_loss_per_char": 0.5185116132100424, "incorrect_loss_per_char": 0.6896312941744478, "correct_loss_per_token": 2.828245162963867, "incorrect_loss_per_token": 3.046087282433357, "correct_loss_uncond": -23.361047744750977, "incorrect_loss_uncond": -14.232608795166016}, "model_output": [{"sum_logits": -16.200504302978516, "num_tokens": 7, "num_tokens_all": 395, "is_greedy": false, "sum_logits_uncond": -30.67700958251953, "logits_per_token": -2.3143577575683594, "logits_per_char": -0.5400168100992838, "num_chars": 30}, {"sum_logits": -30.270633697509766, "num_tokens": 10, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -41.05582809448242, "logits_per_token": -3.0270633697509766, "logits_per_char": -0.7039682255234829, "num_chars": 43}, {"sum_logits": -31.11069679260254, "num_tokens": 11, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -54.471744537353516, "logits_per_token": -2.828245162963867, "logits_per_char": -0.5185116132100424, "num_chars": 60}, {"sum_logits": -277.16937255859375, "num_tokens": 73, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -294.6054992675781, "logits_per_token": -3.7968407199807364, "logits_per_char": -0.8249088469005766, "num_chars": 336}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 372, "native_id": 2634, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 129.92538452148438, "incorrect_loss_raw": 97.66905212402344, "correct_loss_per_char": 0.5075210332870483, "incorrect_loss_per_char": 0.5251315295204595, "correct_loss_per_token": 2.0300841331481934, "incorrect_loss_per_token": 2.291361077510102, "correct_loss_uncond": -15.567138671875, "incorrect_loss_uncond": -24.2466303507487}, "model_output": [{"sum_logits": -123.77893829345703, "num_tokens": 56, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -148.69537353515625, "logits_per_token": -2.2103381838117326, "logits_per_char": -0.5222740012382153, "num_chars": 237}, {"sum_logits": -104.66366577148438, "num_tokens": 48, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -133.6155242919922, "logits_per_token": -2.1804930369059243, "logits_per_char": -0.4714579539256053, "num_chars": 222}, {"sum_logits": -64.5645523071289, "num_tokens": 26, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -83.43614959716797, "logits_per_token": -2.48325201181265, "logits_per_char": -0.5816626333975577, "num_chars": 111}, {"sum_logits": -129.92538452148438, "num_tokens": 64, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -145.49252319335938, "logits_per_token": -2.0300841331481934, "logits_per_char": -0.5075210332870483, "num_chars": 256}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 373, "native_id": 40810, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 75.36388397216797, "incorrect_loss_raw": 118.39251708984375, "correct_loss_per_char": 0.4800247386762291, "incorrect_loss_per_char": 0.693270167019168, "correct_loss_per_token": 2.791254961932147, "incorrect_loss_per_token": 3.299196051072705, "correct_loss_uncond": -28.812179565429688, "incorrect_loss_uncond": -18.449350992838543}, "model_output": [{"sum_logits": -146.5821533203125, "num_tokens": 43, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -170.05117797851562, "logits_per_token": -3.4088872865188953, "logits_per_char": -0.6914252515109081, "num_chars": 212}, {"sum_logits": -74.99349975585938, "num_tokens": 27, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -96.80435180664062, "logits_per_token": -2.7775370279947915, "logits_per_char": -0.6249458312988281, "num_chars": 120}, {"sum_logits": -75.36388397216797, "num_tokens": 27, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -104.17606353759766, "logits_per_token": -2.791254961932147, "logits_per_char": -0.4800247386762291, "num_chars": 157}, {"sum_logits": -133.60189819335938, "num_tokens": 36, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -143.67007446289062, "logits_per_token": -3.7111638387044272, "logits_per_char": -0.7634394182477678, "num_chars": 175}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 374, "native_id": 45178, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 102.87480163574219, "incorrect_loss_raw": 151.45012410481772, "correct_loss_per_char": 0.4762722297951027, "incorrect_loss_per_char": 0.5970936463475351, "correct_loss_per_token": 2.188825566717919, "incorrect_loss_per_token": 2.6421023977759908, "correct_loss_uncond": -41.60765075683594, "incorrect_loss_uncond": -22.995829264322918}, "model_output": [{"sum_logits": -149.36380004882812, "num_tokens": 60, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -174.7529296875, "logits_per_token": -2.489396667480469, "logits_per_char": -0.5511579337595134, "num_chars": 271}, {"sum_logits": -102.87480163574219, "num_tokens": 47, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -144.48245239257812, "logits_per_token": -2.188825566717919, "logits_per_char": -0.4762722297951027, "num_chars": 216}, {"sum_logits": -154.06427001953125, "num_tokens": 52, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -175.1485595703125, "logits_per_token": -2.962774423452524, "logits_per_char": -0.6340093416441616, "num_chars": 243}, {"sum_logits": -150.92230224609375, "num_tokens": 61, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -173.43637084960938, "logits_per_token": -2.4741361023949797, "logits_per_char": -0.6061136636389307, "num_chars": 249}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 375, "native_id": 48051, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 57.66838073730469, "incorrect_loss_raw": 110.14553833007812, "correct_loss_per_char": 0.4271731906467014, "incorrect_loss_per_char": 0.7704604346679385, "correct_loss_per_token": 2.135865953233507, "incorrect_loss_per_token": 3.3290528658706378, "correct_loss_uncond": -35.64976501464844, "incorrect_loss_uncond": -18.66375986735026}, "model_output": [{"sum_logits": -141.01953125, "num_tokens": 38, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -162.26109313964844, "logits_per_token": -3.7110402960526314, "logits_per_char": -0.9098034274193548, "num_chars": 155}, {"sum_logits": -87.38038635253906, "num_tokens": 26, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -100.43937683105469, "logits_per_token": -3.3607840904822717, "logits_per_char": -0.688034538208969, "num_chars": 127}, {"sum_logits": -57.66838073730469, "num_tokens": 27, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -93.31814575195312, "logits_per_token": -2.135865953233507, "logits_per_char": -0.4271731906467014, "num_chars": 135}, {"sum_logits": -102.03669738769531, "num_tokens": 35, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -123.72742462158203, "logits_per_token": -2.915334211077009, "logits_per_char": -0.7135433383754917, "num_chars": 143}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 376, "native_id": 20788, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 53.91477584838867, "incorrect_loss_raw": 105.331787109375, "correct_loss_per_char": 0.3935385098422531, "incorrect_loss_per_char": 0.5328222054646631, "correct_loss_per_token": 1.9968435499403212, "incorrect_loss_per_token": 2.943951133445457, "correct_loss_uncond": -14.712970733642578, "incorrect_loss_uncond": -15.049303690592447}, "model_output": [{"sum_logits": -60.46551513671875, "num_tokens": 36, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -82.02039337158203, "logits_per_token": -1.679597642686632, "logits_per_char": -0.3515436926553416, "num_chars": 172}, {"sum_logits": -94.23040771484375, "num_tokens": 24, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -105.16831970214844, "logits_per_token": -3.9262669881184897, "logits_per_char": -0.6118857643821023, "num_chars": 154}, {"sum_logits": -161.2994384765625, "num_tokens": 50, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -173.95455932617188, "logits_per_token": -3.22598876953125, "logits_per_char": -0.6350371593565453, "num_chars": 254}, {"sum_logits": -53.91477584838867, "num_tokens": 27, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -68.62774658203125, "logits_per_token": -1.9968435499403212, "logits_per_char": -0.3935385098422531, "num_chars": 137}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 377, "native_id": 41124, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 160.22528076171875, "incorrect_loss_raw": 125.86100769042969, "correct_loss_per_char": 0.5544127361997189, "incorrect_loss_per_char": 0.5981601323591251, "correct_loss_per_token": 2.3562541288488053, "incorrect_loss_per_token": 2.640170068122691, "correct_loss_uncond": -28.158233642578125, "incorrect_loss_uncond": -18.995717366536457}, "model_output": [{"sum_logits": -104.32972717285156, "num_tokens": 46, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -126.73164367675781, "logits_per_token": -2.2680375472359033, "logits_per_char": -0.5764073324466937, "num_chars": 181}, {"sum_logits": -172.392578125, "num_tokens": 54, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -189.7415771484375, "logits_per_token": -3.192455150462963, "logits_per_char": -0.6787109375, "num_chars": 254}, {"sum_logits": -100.8607177734375, "num_tokens": 41, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -118.09695434570312, "logits_per_token": -2.460017506669207, "logits_per_char": -0.5393621271306818, "num_chars": 187}, {"sum_logits": -160.22528076171875, "num_tokens": 68, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -188.38351440429688, "logits_per_token": -2.3562541288488053, "logits_per_char": -0.5544127361997189, "num_chars": 289}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 378, "native_id": 46126, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 16.169023513793945, "incorrect_loss_raw": 42.331320444742836, "correct_loss_per_char": 0.4255006187840512, "incorrect_loss_per_char": 0.6432049270588754, "correct_loss_per_token": 1.7965581681993272, "incorrect_loss_per_token": 2.7105085348882594, "correct_loss_uncond": -22.087419509887695, "incorrect_loss_uncond": -18.417744954427082}, "model_output": [{"sum_logits": -37.670406341552734, "num_tokens": 14, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -51.0660514831543, "logits_per_token": -2.6907433101109097, "logits_per_char": -0.7107623838028818, "num_chars": 53}, {"sum_logits": -38.60765075683594, "num_tokens": 17, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -72.33133697509766, "logits_per_token": -2.2710382798138786, "logits_per_char": -0.5515378679547991, "num_chars": 70}, {"sum_logits": -50.715904235839844, "num_tokens": 16, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -58.84980773925781, "logits_per_token": -3.1697440147399902, "logits_per_char": -0.6673145294189453, "num_chars": 76}, {"sum_logits": -16.169023513793945, "num_tokens": 9, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -38.25644302368164, "logits_per_token": -1.7965581681993272, "logits_per_char": -0.4255006187840512, "num_chars": 38}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 379, "native_id": 15197, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 96.98991394042969, "incorrect_loss_raw": 58.560909271240234, "correct_loss_per_char": 0.40924014320856406, "incorrect_loss_per_char": 0.46911495522358937, "correct_loss_per_token": 1.9017630184397978, "incorrect_loss_per_token": 2.0233766768891135, "correct_loss_uncond": -23.373092651367188, "incorrect_loss_uncond": -21.987060546875}, "model_output": [{"sum_logits": -96.98991394042969, "num_tokens": 51, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -120.36300659179688, "logits_per_token": -1.9017630184397978, "logits_per_char": -0.40924014320856406, "num_chars": 237}, {"sum_logits": -92.0692367553711, "num_tokens": 32, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -113.68190002441406, "logits_per_token": -2.8771636486053467, "logits_per_char": -0.7192909121513367, "num_chars": 128}, {"sum_logits": -51.155975341796875, "num_tokens": 27, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -62.68863296508789, "logits_per_token": -1.8946657533998843, "logits_per_char": -0.42629979451497396, "num_chars": 120}, {"sum_logits": -32.457515716552734, "num_tokens": 25, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -65.27337646484375, "logits_per_token": -1.2983006286621093, "logits_per_char": -0.26175415900445753, "num_chars": 124}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 380, "native_id": 11909, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 76.06706237792969, "incorrect_loss_raw": 109.14313507080078, "correct_loss_per_char": 0.5246004301926185, "incorrect_loss_per_char": 0.6153155216062681, "correct_loss_per_token": 2.0017647994192025, "incorrect_loss_per_token": 2.646375318077522, "correct_loss_uncond": -41.48883056640625, "incorrect_loss_uncond": -36.769457499186196}, "model_output": [{"sum_logits": -102.51058959960938, "num_tokens": 38, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -140.3616943359375, "logits_per_token": -2.6976470947265625, "logits_per_char": -0.5925467606913837, "num_chars": 173}, {"sum_logits": -112.70455169677734, "num_tokens": 45, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -145.94735717773438, "logits_per_token": -2.5045455932617187, "logits_per_char": -0.560719162670534, "num_chars": 201}, {"sum_logits": -76.06706237792969, "num_tokens": 38, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -117.55589294433594, "logits_per_token": -2.0017647994192025, "logits_per_char": -0.5246004301926185, "num_chars": 145}, {"sum_logits": -112.21426391601562, "num_tokens": 41, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -151.42872619628906, "logits_per_token": -2.7369332662442836, "logits_per_char": -0.6926806414568866, "num_chars": 162}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 381, "native_id": 20063, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 96.97930145263672, "incorrect_loss_raw": 89.85255177815755, "correct_loss_per_char": 0.6688227686388739, "incorrect_loss_per_char": 0.5713348546884436, "correct_loss_per_token": 3.0306031703948975, "incorrect_loss_per_token": 2.7556998575488705, "correct_loss_uncond": -21.259559631347656, "incorrect_loss_uncond": -16.358510335286457}, "model_output": [{"sum_logits": -96.97930145263672, "num_tokens": 32, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -118.23886108398438, "logits_per_token": -3.0306031703948975, "logits_per_char": -0.6688227686388739, "num_chars": 145}, {"sum_logits": -97.64765930175781, "num_tokens": 34, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -112.00784301757812, "logits_per_token": -2.871989979463465, "logits_per_char": -0.5335937666762722, "num_chars": 183}, {"sum_logits": -92.4435043334961, "num_tokens": 29, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -107.90738677978516, "logits_per_token": -3.187707045982624, "logits_per_char": -0.6898768980111649, "num_chars": 134}, {"sum_logits": -79.46649169921875, "num_tokens": 36, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -98.71795654296875, "logits_per_token": -2.207402547200521, "logits_per_char": -0.49053389937789355, "num_chars": 162}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 382, "native_id": 9433, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 105.98722839355469, "incorrect_loss_raw": 148.01092529296875, "correct_loss_per_char": 0.49993975657337114, "incorrect_loss_per_char": 0.7492971422306445, "correct_loss_per_token": 2.408800645308061, "incorrect_loss_per_token": 3.638321439472312, "correct_loss_uncond": -26.315460205078125, "incorrect_loss_uncond": -11.922584533691406}, "model_output": [{"sum_logits": -170.2230682373047, "num_tokens": 50, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -185.63189697265625, "logits_per_token": -3.4044613647460937, "logits_per_char": -0.6754883660210503, "num_chars": 252}, {"sum_logits": -105.98722839355469, "num_tokens": 44, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -132.3026885986328, "logits_per_token": -2.408800645308061, "logits_per_char": -0.49993975657337114, "num_chars": 212}, {"sum_logits": -101.79397583007812, "num_tokens": 29, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -113.14339447021484, "logits_per_token": -3.510137097588901, "logits_per_char": -0.8078886970641121, "num_chars": 126}, {"sum_logits": -172.01573181152344, "num_tokens": 43, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -181.02523803710938, "logits_per_token": -4.000365856081941, "logits_per_char": -0.7645143636067708, "num_chars": 225}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 383, "native_id": 44109, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 53.17416000366211, "incorrect_loss_raw": 66.60677337646484, "correct_loss_per_char": 0.5908240000406901, "incorrect_loss_per_char": 0.5905248144145814, "correct_loss_per_token": 2.9541200002034507, "incorrect_loss_per_token": 3.086040058135987, "correct_loss_uncond": -27.22378921508789, "incorrect_loss_uncond": -31.644546508789062}, "model_output": [{"sum_logits": -84.27021789550781, "num_tokens": 25, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -126.33342742919922, "logits_per_token": -3.3708087158203126, "logits_per_char": -0.6741617431640625, "num_chars": 125}, {"sum_logits": -70.16024780273438, "num_tokens": 23, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -97.99819946289062, "logits_per_token": -3.050445556640625, "logits_per_char": -0.5438778899436774, "num_chars": 129}, {"sum_logits": -53.17416000366211, "num_tokens": 18, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -80.39794921875, "logits_per_token": -2.9541200002034507, "logits_per_char": -0.5908240000406901, "num_chars": 90}, {"sum_logits": -45.389854431152344, "num_tokens": 16, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -70.42233276367188, "logits_per_token": -2.8368659019470215, "logits_per_char": -0.5535348101360041, "num_chars": 82}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 384, "native_id": 31546, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 104.89545440673828, "incorrect_loss_raw": 130.19231160481772, "correct_loss_per_char": 0.5116851434475038, "incorrect_loss_per_char": 0.7094311836923568, "correct_loss_per_token": 2.1407235593211893, "incorrect_loss_per_token": 3.3352087606463514, "correct_loss_uncond": -39.26042938232422, "incorrect_loss_uncond": -15.116500854492188}, "model_output": [{"sum_logits": -99.13485717773438, "num_tokens": 38, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -116.32926940917969, "logits_per_token": -2.60881203099301, "logits_per_char": -0.5477063932471512, "num_chars": 181}, {"sum_logits": -104.89545440673828, "num_tokens": 49, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -144.1558837890625, "logits_per_token": -2.1407235593211893, "logits_per_char": -0.5116851434475038, "num_chars": 205}, {"sum_logits": -139.37535095214844, "num_tokens": 32, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -154.3779296875, "logits_per_token": -4.355479717254639, "logits_per_char": -0.8765745342902418, "num_chars": 159}, {"sum_logits": -152.0667266845703, "num_tokens": 50, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -165.21923828125, "logits_per_token": -3.041334533691406, "logits_per_char": -0.7040126235396774, "num_chars": 216}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 385, "native_id": 42272, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 43.06293869018555, "incorrect_loss_raw": 66.07129287719727, "correct_loss_per_char": 0.49497630678374194, "incorrect_loss_per_char": 0.689351968940637, "correct_loss_per_token": 2.870862579345703, "incorrect_loss_per_token": 3.462256348304201, "correct_loss_uncond": -23.794239044189453, "incorrect_loss_uncond": -20.451910654703777}, "model_output": [{"sum_logits": -43.06293869018555, "num_tokens": 15, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -66.857177734375, "logits_per_token": -2.870862579345703, "logits_per_char": -0.49497630678374194, "num_chars": 87}, {"sum_logits": -65.54225158691406, "num_tokens": 19, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -84.2676010131836, "logits_per_token": -3.4495921887849508, "logits_per_char": -0.7202445229331216, "num_chars": 91}, {"sum_logits": -79.4815902709961, "num_tokens": 22, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -103.01659393310547, "logits_per_token": -3.61279955777255, "logits_per_char": -0.6911442632260529, "num_chars": 115}, {"sum_logits": -53.19003677368164, "num_tokens": 16, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -72.28541564941406, "logits_per_token": -3.3243772983551025, "logits_per_char": -0.6566671206627364, "num_chars": 81}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 386, "native_id": 801, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 32.90707778930664, "incorrect_loss_raw": 45.16709899902344, "correct_loss_per_char": 0.47691417085951654, "incorrect_loss_per_char": 0.7948839141661862, "correct_loss_per_token": 2.5313136761005106, "incorrect_loss_per_token": 3.5156562490001364, "correct_loss_uncond": -25.563941955566406, "incorrect_loss_uncond": -21.82321039835612}, "model_output": [{"sum_logits": -49.10770034790039, "num_tokens": 9, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -67.05445861816406, "logits_per_token": -5.45641114976671, "logits_per_char": -1.116084098815918, "num_chars": 44}, {"sum_logits": -28.63260269165039, "num_tokens": 13, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -51.070499420166016, "logits_per_token": -2.2025078993577223, "logits_per_char": -0.5726520538330078, "num_chars": 50}, {"sum_logits": -32.90707778930664, "num_tokens": 13, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -58.47101974487305, "logits_per_token": -2.5313136761005106, "logits_per_char": -0.47691417085951654, "num_chars": 69}, {"sum_logits": -57.76099395751953, "num_tokens": 20, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -82.8459701538086, "logits_per_token": -2.8880496978759767, "logits_per_char": -0.6959155898496329, "num_chars": 83}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 387, "native_id": 41902, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 87.48585510253906, "incorrect_loss_raw": 59.119513193766274, "correct_loss_per_char": 0.5056985844077403, "incorrect_loss_per_char": 0.5006267504874778, "correct_loss_per_token": 2.243227053911258, "incorrect_loss_per_token": 2.136788179438994, "correct_loss_uncond": -29.834732055664062, "incorrect_loss_uncond": -24.165808359781902}, "model_output": [{"sum_logits": -58.23282241821289, "num_tokens": 27, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -84.2564926147461, "logits_per_token": -2.1567712006745516, "logits_per_char": -0.5199359144483294, "num_chars": 112}, {"sum_logits": -87.48585510253906, "num_tokens": 39, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -117.32058715820312, "logits_per_token": -2.243227053911258, "logits_per_char": -0.5056985844077403, "num_chars": 173}, {"sum_logits": -62.041107177734375, "num_tokens": 29, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -83.27659606933594, "logits_per_token": -2.139348523370151, "logits_per_char": -0.4629933371472715, "num_chars": 134}, {"sum_logits": -57.08460998535156, "num_tokens": 27, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -82.3228759765625, "logits_per_token": -2.11424481427228, "logits_per_char": -0.5189509998668324, "num_chars": 110}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 388, "native_id": 25466, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 56.555694580078125, "incorrect_loss_raw": 100.36151377360027, "correct_loss_per_char": 0.4833820049579327, "incorrect_loss_per_char": 0.6747487110913856, "correct_loss_per_token": 2.0198462350027904, "incorrect_loss_per_token": 3.175685609698492, "correct_loss_uncond": -28.739822387695312, "incorrect_loss_uncond": -16.551437377929688}, "model_output": [{"sum_logits": -56.555694580078125, "num_tokens": 28, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -85.29551696777344, "logits_per_token": -2.0198462350027904, "logits_per_char": -0.4833820049579327, "num_chars": 117}, {"sum_logits": -106.76419830322266, "num_tokens": 32, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -117.2743911743164, "logits_per_token": -3.336381196975708, "logits_per_char": -0.6280246959013097, "num_chars": 170}, {"sum_logits": -108.04096984863281, "num_tokens": 29, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -125.48117065429688, "logits_per_token": -3.725550684435614, "logits_per_char": -0.7886202178732322, "num_chars": 137}, {"sum_logits": -86.27937316894531, "num_tokens": 35, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -107.98329162597656, "logits_per_token": -2.465124947684152, "logits_per_char": -0.6076012194996149, "num_chars": 142}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 389, "native_id": 5246, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 83.53311157226562, "incorrect_loss_raw": 131.00130971272787, "correct_loss_per_char": 0.4283749311398237, "incorrect_loss_per_char": 0.5756043342789691, "correct_loss_per_token": 2.1418746556991186, "incorrect_loss_per_token": 2.771786773135723, "correct_loss_uncond": -15.84735107421875, "incorrect_loss_uncond": -17.26494598388672}, "model_output": [{"sum_logits": -83.53311157226562, "num_tokens": 39, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -99.38046264648438, "logits_per_token": -2.1418746556991186, "logits_per_char": -0.4283749311398237, "num_chars": 195}, {"sum_logits": -178.44476318359375, "num_tokens": 54, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -193.06797790527344, "logits_per_token": -3.3045326515480324, "logits_per_char": -0.6584677608250692, "num_chars": 271}, {"sum_logits": -137.1306915283203, "num_tokens": 49, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -155.4274444580078, "logits_per_token": -2.798585541394292, "logits_per_char": -0.5962203979492188, "num_chars": 230}, {"sum_logits": -77.42847442626953, "num_tokens": 35, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -96.3033447265625, "logits_per_token": -2.2122421264648438, "logits_per_char": -0.4721248440626191, "num_chars": 164}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 390, "native_id": 48310, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 22.64434051513672, "incorrect_loss_raw": 29.328437487284344, "correct_loss_per_char": 0.4817944790454621, "incorrect_loss_per_char": 0.61972991198841, "correct_loss_per_token": 2.264434051513672, "incorrect_loss_per_token": 2.487719582117092, "correct_loss_uncond": -31.048385620117188, "incorrect_loss_uncond": -25.64468288421631}, "model_output": [{"sum_logits": -22.64434051513672, "num_tokens": 10, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -53.692726135253906, "logits_per_token": -2.264434051513672, "logits_per_char": -0.4817944790454621, "num_chars": 47}, {"sum_logits": -49.81389236450195, "num_tokens": 19, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -72.97608947753906, "logits_per_token": -2.6217838086579976, "logits_per_char": -0.7016041178098866, "num_chars": 71}, {"sum_logits": -22.807926177978516, "num_tokens": 10, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -49.89144515991211, "logits_per_token": -2.2807926177978515, "logits_per_char": -0.5430458613804409, "num_chars": 42}, {"sum_logits": -15.363493919372559, "num_tokens": 6, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -42.05182647705078, "logits_per_token": -2.5605823198954263, "logits_per_char": -0.6145397567749024, "num_chars": 25}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 391, "native_id": 29891, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 41.66109085083008, "incorrect_loss_raw": 75.73490397135417, "correct_loss_per_char": 0.37873718955300073, "incorrect_loss_per_char": 0.4987128796664801, "correct_loss_per_token": 1.6664436340332032, "incorrect_loss_per_token": 2.335701483698072, "correct_loss_uncond": -22.587963104248047, "incorrect_loss_uncond": -25.07232411702474}, "model_output": [{"sum_logits": -94.12809753417969, "num_tokens": 41, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -111.09803771972656, "logits_per_token": -2.295807256931212, "logits_per_char": -0.45916145138624237, "num_chars": 205}, {"sum_logits": -53.0546875, "num_tokens": 24, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -81.36317443847656, "logits_per_token": -2.2106119791666665, "logits_per_char": -0.4653919956140351, "num_chars": 114}, {"sum_logits": -41.66109085083008, "num_tokens": 25, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -64.24905395507812, "logits_per_token": -1.6664436340332032, "logits_per_char": -0.37873718955300073, "num_chars": 110}, {"sum_logits": -80.02192687988281, "num_tokens": 32, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -109.9604721069336, "logits_per_token": -2.500685214996338, "logits_per_char": -0.5715851919991629, "num_chars": 140}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 392, "native_id": 9502, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 79.51627349853516, "incorrect_loss_raw": 96.91843159993489, "correct_loss_per_char": 0.3614376068115234, "incorrect_loss_per_char": 0.6099860573073815, "correct_loss_per_token": 1.8932446071079798, "incorrect_loss_per_token": 2.7268909036157285, "correct_loss_uncond": -38.27899169921875, "incorrect_loss_uncond": -15.019485473632812}, "model_output": [{"sum_logits": -122.7874984741211, "num_tokens": 38, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -136.14610290527344, "logits_per_token": -3.2312499598452917, "logits_per_char": -0.6329252498666036, "num_chars": 194}, {"sum_logits": -79.51627349853516, "num_tokens": 42, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -117.7952651977539, "logits_per_token": -1.8932446071079798, "logits_per_char": -0.3614376068115234, "num_chars": 220}, {"sum_logits": -68.60540771484375, "num_tokens": 26, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -91.15499877929688, "logits_per_token": -2.6386695274939904, "logits_per_char": -0.6125482831682477, "num_chars": 112}, {"sum_logits": -99.36238861083984, "num_tokens": 43, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -108.51264953613281, "logits_per_token": -2.3107532235079034, "logits_per_char": -0.5844846388872932, "num_chars": 170}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 393, "native_id": 43834, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 13.861562728881836, "incorrect_loss_raw": 79.39844131469727, "correct_loss_per_char": 0.3223619239274846, "incorrect_loss_per_char": 0.6412224830403299, "correct_loss_per_token": 1.5401736365424261, "incorrect_loss_per_token": 3.2186870251917266, "correct_loss_uncond": -26.71323585510254, "incorrect_loss_uncond": -20.52333704630534}, "model_output": [{"sum_logits": -37.440982818603516, "num_tokens": 19, "num_tokens_all": 427, "is_greedy": false, "sum_logits_uncond": -63.80828094482422, "logits_per_token": -1.9705780430843955, "logits_per_char": -0.44572598593575613, "num_chars": 84}, {"sum_logits": -13.861562728881836, "num_tokens": 9, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -40.574798583984375, "logits_per_token": -1.5401736365424261, "logits_per_char": -0.3223619239274846, "num_chars": 43}, {"sum_logits": -43.303077697753906, "num_tokens": 11, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -56.36809539794922, "logits_per_token": -3.936643427068537, "logits_per_char": -0.7098865196353099, "num_chars": 61}, {"sum_logits": -157.45126342773438, "num_tokens": 42, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -179.58895874023438, "logits_per_token": -3.748839605422247, "logits_per_char": -0.7680549435499238, "num_chars": 205}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 394, "native_id": 18984, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 243.4667205810547, "incorrect_loss_raw": 104.61980946858723, "correct_loss_per_char": 0.9152884232370477, "incorrect_loss_per_char": 0.6529012470317932, "correct_loss_per_token": 3.633831650463503, "incorrect_loss_per_token": 2.9440062734427666, "correct_loss_uncond": -30.092666625976562, "incorrect_loss_uncond": -12.171122233072916}, "model_output": [{"sum_logits": -121.95347595214844, "num_tokens": 45, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -132.6843719482422, "logits_per_token": -2.7100772433810763, "logits_per_char": -0.666412436896986, "num_chars": 183}, {"sum_logits": -64.6013412475586, "num_tokens": 26, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -82.8367691040039, "logits_per_token": -2.484666971059946, "logits_per_char": -0.5521482157910991, "num_chars": 117}, {"sum_logits": -127.30461120605469, "num_tokens": 35, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -134.85165405273438, "logits_per_token": -3.637274605887277, "logits_per_char": -0.7401430884072947, "num_chars": 172}, {"sum_logits": -243.4667205810547, "num_tokens": 67, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -273.55938720703125, "logits_per_token": -3.633831650463503, "logits_per_char": -0.9152884232370477, "num_chars": 266}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 395, "native_id": 46983, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 60.049964904785156, "incorrect_loss_raw": 132.32707977294922, "correct_loss_per_char": 0.46914035081863403, "incorrect_loss_per_char": 0.6503682117061224, "correct_loss_per_token": 2.4019985961914063, "incorrect_loss_per_token": 3.192818839438246, "correct_loss_uncond": -28.796531677246094, "incorrect_loss_uncond": -11.496543884277344}, "model_output": [{"sum_logits": -151.83547973632812, "num_tokens": 48, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -153.2364501953125, "logits_per_token": -3.1632391611735025, "logits_per_char": -0.6406560326427347, "num_chars": 237}, {"sum_logits": -60.049964904785156, "num_tokens": 25, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -88.84649658203125, "logits_per_token": -2.4019985961914063, "logits_per_char": -0.46914035081863403, "num_chars": 128}, {"sum_logits": -110.79569244384766, "num_tokens": 43, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -125.19105529785156, "logits_per_token": -2.5766440103220387, "logits_per_char": -0.5681830381735777, "num_chars": 195}, {"sum_logits": -134.35006713867188, "num_tokens": 35, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -153.04336547851562, "logits_per_token": -3.8385733468191963, "logits_per_char": -0.7422655643020546, "num_chars": 181}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 396, "native_id": 25571, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 63.27587890625, "incorrect_loss_raw": 123.10396575927734, "correct_loss_per_char": 0.3195751459911616, "incorrect_loss_per_char": 0.5786841759190973, "correct_loss_per_token": 1.4715320675872092, "incorrect_loss_per_token": 2.748783354431996, "correct_loss_uncond": -22.7667236328125, "incorrect_loss_uncond": -18.87550099690755}, "model_output": [{"sum_logits": -81.0740737915039, "num_tokens": 31, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -96.85009765625, "logits_per_token": -2.6152927029517388, "logits_per_char": -0.5669515649755518, "num_chars": 143}, {"sum_logits": -170.00244140625, "num_tokens": 59, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -191.7254638671875, "logits_per_token": -2.881397311970339, "logits_per_char": -0.5923430014155052, "num_chars": 287}, {"sum_logits": -63.27587890625, "num_tokens": 43, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -86.0426025390625, "logits_per_token": -1.4715320675872092, "logits_per_char": -0.3195751459911616, "num_chars": 198}, {"sum_logits": -118.23538208007812, "num_tokens": 43, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -137.3628387451172, "logits_per_token": -2.7496600483739098, "logits_per_char": -0.5767579613662348, "num_chars": 205}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 397, "native_id": 40273, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 96.18299102783203, "incorrect_loss_raw": 142.56838989257812, "correct_loss_per_char": 0.5829272183504972, "incorrect_loss_per_char": 0.7681256006599287, "correct_loss_per_token": 2.828911500818589, "incorrect_loss_per_token": 3.572026950010434, "correct_loss_uncond": -24.347183227539062, "incorrect_loss_uncond": -12.491053263346354}, "model_output": [{"sum_logits": -124.10542297363281, "num_tokens": 31, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -142.77536010742188, "logits_per_token": -4.0034007410849295, "logits_per_char": -0.7805372514064957, "num_chars": 159}, {"sum_logits": -96.18299102783203, "num_tokens": 34, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -120.5301742553711, "logits_per_token": -2.828911500818589, "logits_per_char": -0.5829272183504972, "num_chars": 165}, {"sum_logits": -177.6721954345703, "num_tokens": 51, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -179.09461975097656, "logits_per_token": -3.483768537932751, "logits_per_char": -0.8973343203766178, "num_chars": 198}, {"sum_logits": -125.92755126953125, "num_tokens": 39, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -143.308349609375, "logits_per_token": -3.228911571013622, "logits_per_char": -0.6265052301966729, "num_chars": 201}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 398, "native_id": 48795, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 48.631595611572266, "incorrect_loss_raw": 73.07672119140625, "correct_loss_per_char": 0.4502925519590025, "incorrect_loss_per_char": 0.5671863861140999, "correct_loss_per_token": 2.026316483815511, "incorrect_loss_per_token": 2.748906212291498, "correct_loss_uncond": -22.91500473022461, "incorrect_loss_uncond": -26.695513407389324}, "model_output": [{"sum_logits": -84.33380126953125, "num_tokens": 24, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -109.6362075805664, "logits_per_token": -3.5139083862304688, "logits_per_char": -0.7397701865748355, "num_chars": 114}, {"sum_logits": -48.631595611572266, "num_tokens": 24, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -71.54660034179688, "logits_per_token": -2.026316483815511, "logits_per_char": -0.4502925519590025, "num_chars": 108}, {"sum_logits": -68.95258331298828, "num_tokens": 29, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -96.47120666503906, "logits_per_token": -2.3776752866547683, "logits_per_char": -0.5070042890660903, "num_chars": 136}, {"sum_logits": -65.94377899169922, "num_tokens": 28, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -93.20928955078125, "logits_per_token": -2.355134963989258, "logits_per_char": -0.4547846827013739, "num_chars": 145}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 399, "native_id": 47146, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 88.85496520996094, "incorrect_loss_raw": 123.15203857421875, "correct_loss_per_char": 0.4334388546827363, "incorrect_loss_per_char": 0.6226125028148779, "correct_loss_per_token": 2.1671942734136813, "incorrect_loss_per_token": 2.7927061106170186, "correct_loss_uncond": -15.826850891113281, "incorrect_loss_uncond": -15.675898234049479}, "model_output": [{"sum_logits": -88.85496520996094, "num_tokens": 41, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -104.68181610107422, "logits_per_token": -2.1671942734136813, "logits_per_char": -0.4334388546827363, "num_chars": 205}, {"sum_logits": -150.7657012939453, "num_tokens": 58, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -168.08135986328125, "logits_per_token": -2.599408642999057, "logits_per_char": -0.5522553160950378, "num_chars": 273}, {"sum_logits": -86.67375183105469, "num_tokens": 32, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -102.49388122558594, "logits_per_token": -2.708554744720459, "logits_per_char": -0.6420277913411458, "num_chars": 135}, {"sum_logits": -132.01666259765625, "num_tokens": 43, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -145.9085693359375, "logits_per_token": -3.0701549441315406, "logits_per_char": -0.6735544010084502, "num_chars": 196}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 400, "native_id": 34272, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 99.40286254882812, "incorrect_loss_raw": 148.5316645304362, "correct_loss_per_char": 0.6098335125694977, "incorrect_loss_per_char": 0.6586444506204185, "correct_loss_per_token": 2.6158648039165295, "incorrect_loss_per_token": 3.0244187372374216, "correct_loss_uncond": -37.90509033203125, "incorrect_loss_uncond": -24.462196350097656}, "model_output": [{"sum_logits": -173.48558044433594, "num_tokens": 57, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -195.36483764648438, "logits_per_token": -3.043606674462034, "logits_per_char": -0.6354783166459191, "num_chars": 273}, {"sum_logits": -99.40286254882812, "num_tokens": 38, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -137.30795288085938, "logits_per_token": -2.6158648039165295, "logits_per_char": -0.6098335125694977, "num_chars": 163}, {"sum_logits": -121.30423736572266, "num_tokens": 43, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -136.31874084472656, "logits_per_token": -2.8210287759470387, "logits_per_char": -0.6188991702332789, "num_chars": 196}, {"sum_logits": -150.80517578125, "num_tokens": 47, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -187.29800415039062, "logits_per_token": -3.2086207613031914, "logits_per_char": -0.7215558649820574, "num_chars": 209}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 401, "native_id": 38221, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 102.47262573242188, "incorrect_loss_raw": 100.82858276367188, "correct_loss_per_char": 0.4657846624200994, "incorrect_loss_per_char": 0.6854635046594745, "correct_loss_per_token": 2.439824422200521, "incorrect_loss_per_token": 3.0710966322157116, "correct_loss_uncond": -45.40254211425781, "incorrect_loss_uncond": -28.868914286295574}, "model_output": [{"sum_logits": -88.10763549804688, "num_tokens": 24, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -122.37149047851562, "logits_per_token": -3.6711514790852866, "logits_per_char": -0.8638003480200674, "num_chars": 102}, {"sum_logits": -102.47262573242188, "num_tokens": 42, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -147.8751678466797, "logits_per_token": -2.439824422200521, "logits_per_char": -0.4657846624200994, "num_chars": 220}, {"sum_logits": -91.95850372314453, "num_tokens": 35, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -111.1188735961914, "logits_per_token": -2.627385820661272, "logits_per_char": -0.5711708305785375, "num_chars": 161}, {"sum_logits": -122.41960906982422, "num_tokens": 42, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -155.6021270751953, "logits_per_token": -2.9147525969005765, "logits_per_char": -0.6214193353798184, "num_chars": 197}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 402, "native_id": 30058, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 154.2924346923828, "incorrect_loss_raw": 117.74200693766277, "correct_loss_per_char": 0.7382413143176211, "incorrect_loss_per_char": 0.6576523757844899, "correct_loss_per_token": 3.5066462430087, "incorrect_loss_per_token": 2.8229717875594083, "correct_loss_uncond": -9.99554443359375, "incorrect_loss_uncond": -18.99249521891276}, "model_output": [{"sum_logits": -138.4130096435547, "num_tokens": 49, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -155.76918029785156, "logits_per_token": -2.824755298848055, "logits_per_char": -0.6751854128953887, "num_chars": 205}, {"sum_logits": -98.22077941894531, "num_tokens": 37, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -124.08953857421875, "logits_per_token": -2.654615659971495, "logits_per_char": -0.5644872380399156, "num_chars": 174}, {"sum_logits": -116.59223175048828, "num_tokens": 39, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -130.34478759765625, "logits_per_token": -2.989544403858674, "logits_per_char": -0.7332844764181653, "num_chars": 159}, {"sum_logits": -154.2924346923828, "num_tokens": 44, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -164.28797912597656, "logits_per_token": -3.5066462430087, "logits_per_char": -0.7382413143176211, "num_chars": 209}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 403, "native_id": 21295, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 62.62220764160156, "incorrect_loss_raw": 144.18155670166016, "correct_loss_per_char": 0.3913887977600098, "incorrect_loss_per_char": 0.7082457316500762, "correct_loss_per_token": 1.6924920984216638, "incorrect_loss_per_token": 3.152286884038135, "correct_loss_uncond": -29.819503784179688, "incorrect_loss_uncond": -14.07733408610026}, "model_output": [{"sum_logits": -127.93257904052734, "num_tokens": 44, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -143.63671875, "logits_per_token": -2.9075586145574395, "logits_per_char": -0.6527172400026905, "num_chars": 196}, {"sum_logits": -158.29598999023438, "num_tokens": 48, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -171.16531372070312, "logits_per_token": -3.2978331247965493, "logits_per_char": -0.7914799499511719, "num_chars": 200}, {"sum_logits": -62.62220764160156, "num_tokens": 37, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -92.44171142578125, "logits_per_token": -1.6924920984216638, "logits_per_char": -0.3913887977600098, "num_chars": 160}, {"sum_logits": -146.31610107421875, "num_tokens": 45, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -159.97463989257812, "logits_per_token": -3.2514689127604166, "logits_per_char": -0.6805400049963662, "num_chars": 215}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 404, "native_id": 25717, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 13.22431468963623, "incorrect_loss_raw": 46.29627482096354, "correct_loss_per_char": 0.33908499204195464, "incorrect_loss_per_char": 0.7188477636046556, "correct_loss_per_token": 1.46936829884847, "incorrect_loss_per_token": 3.1703899011066063, "correct_loss_uncond": -25.20744800567627, "incorrect_loss_uncond": -13.360216776529947}, "model_output": [{"sum_logits": -83.38329315185547, "num_tokens": 22, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -97.93309020996094, "logits_per_token": -3.790149688720703, "logits_per_char": -0.9368909342905108, "num_chars": 89}, {"sum_logits": -13.22431468963623, "num_tokens": 9, "num_tokens_all": 395, "is_greedy": false, "sum_logits_uncond": -38.4317626953125, "logits_per_token": -1.46936829884847, "logits_per_char": -0.33908499204195464, "num_chars": 39}, {"sum_logits": -22.08993148803711, "num_tokens": 11, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -34.6680908203125, "logits_per_token": -2.008175589821555, "logits_per_char": -0.46020690600077313, "num_chars": 48}, {"sum_logits": -33.41559982299805, "num_tokens": 9, "num_tokens_all": 395, "is_greedy": false, "sum_logits_uncond": -46.36829376220703, "logits_per_token": -3.7128444247775607, "logits_per_char": -0.7594454505226829, "num_chars": 44}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 405, "native_id": 40119, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 119.9836654663086, "incorrect_loss_raw": 86.66984049479167, "correct_loss_per_char": 0.6450734702489709, "incorrect_loss_per_char": 0.5337927828655705, "correct_loss_per_token": 2.66630367702908, "incorrect_loss_per_token": 2.4221585698488384, "correct_loss_uncond": -16.70124053955078, "incorrect_loss_uncond": -24.967076619466145}, "model_output": [{"sum_logits": -74.15078735351562, "num_tokens": 34, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -93.49182891845703, "logits_per_token": -2.1809055103975186, "logits_per_char": -0.5492650915075231, "num_chars": 135}, {"sum_logits": -119.52261352539062, "num_tokens": 44, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -148.599365234375, "logits_per_token": -2.7164230346679688, "logits_per_char": -0.5533454329879196, "num_chars": 216}, {"sum_logits": -66.33612060546875, "num_tokens": 28, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -92.8195571899414, "logits_per_token": -2.369147164481027, "logits_per_char": -0.4987678241012688, "num_chars": 133}, {"sum_logits": -119.9836654663086, "num_tokens": 45, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -136.68490600585938, "logits_per_token": -2.66630367702908, "logits_per_char": -0.6450734702489709, "num_chars": 186}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 406, "native_id": 24651, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 85.30583190917969, "incorrect_loss_raw": 90.48890813191731, "correct_loss_per_char": 0.5575544569227431, "incorrect_loss_per_char": 0.542093349891703, "correct_loss_per_token": 2.4373094831194195, "incorrect_loss_per_token": 2.501895571695748, "correct_loss_uncond": -19.560653686523438, "incorrect_loss_uncond": -21.359722137451172}, "model_output": [{"sum_logits": -124.11843872070312, "num_tokens": 39, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -140.30474853515625, "logits_per_token": -3.1825240697616186, "logits_per_char": -0.7092482212611607, "num_chars": 175}, {"sum_logits": -85.30583190917969, "num_tokens": 35, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -104.86648559570312, "logits_per_token": -2.4373094831194195, "logits_per_char": -0.5575544569227431, "num_chars": 153}, {"sum_logits": -99.93898010253906, "num_tokens": 38, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -120.45838928222656, "logits_per_token": -2.629973160593133, "logits_per_char": -0.5315903196943567, "num_chars": 188}, {"sum_logits": -47.409305572509766, "num_tokens": 28, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -74.78275299072266, "logits_per_token": -1.6931894847324915, "logits_per_char": -0.3854415087195916, "num_chars": 123}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 407, "native_id": 46289, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 102.40459442138672, "incorrect_loss_raw": 72.92723337809245, "correct_loss_per_char": 0.541823250906808, "incorrect_loss_per_char": 0.50936520939279, "correct_loss_per_token": 2.2756576538085938, "incorrect_loss_per_token": 2.2754276813697696, "correct_loss_uncond": -31.321861267089844, "incorrect_loss_uncond": -26.71155548095703}, "model_output": [{"sum_logits": -81.22515869140625, "num_tokens": 34, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -104.32038879394531, "logits_per_token": -2.3889752556295956, "logits_per_char": -0.5240332818800403, "num_chars": 155}, {"sum_logits": -68.84294128417969, "num_tokens": 31, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -95.25730895996094, "logits_per_token": -2.220740041425151, "logits_per_char": -0.5025032210524065, "num_chars": 137}, {"sum_logits": -68.7136001586914, "num_tokens": 31, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -99.33866882324219, "logits_per_token": -2.2165677470545613, "logits_per_char": -0.5015591252459227, "num_chars": 137}, {"sum_logits": -102.40459442138672, "num_tokens": 45, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -133.72645568847656, "logits_per_token": -2.2756576538085938, "logits_per_char": -0.541823250906808, "num_chars": 189}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 408, "native_id": 32044, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.664772033691406, "incorrect_loss_raw": 20.472591718037922, "correct_loss_per_char": 0.3430815304026884, "incorrect_loss_per_char": 0.4598672033061577, "correct_loss_per_token": 1.4580965042114258, "incorrect_loss_per_token": 2.2759188440110947, "correct_loss_uncond": -22.135635375976562, "incorrect_loss_uncond": -21.075570106506348}, "model_output": [{"sum_logits": -10.289685249328613, "num_tokens": 6, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -32.12042999267578, "logits_per_token": -1.7149475415547688, "logits_per_char": -0.3810994536788375, "num_chars": 27}, {"sum_logits": -11.664772033691406, "num_tokens": 8, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -33.80040740966797, "logits_per_token": -1.4580965042114258, "logits_per_char": -0.3430815304026884, "num_chars": 34}, {"sum_logits": -29.87540626525879, "num_tokens": 10, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -50.92654037475586, "logits_per_token": -2.987540626525879, "logits_per_char": -0.5975081253051758, "num_chars": 50}, {"sum_logits": -21.252683639526367, "num_tokens": 10, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -41.59751510620117, "logits_per_token": -2.1252683639526366, "logits_per_char": -0.40099403093445973, "num_chars": 53}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 409, "native_id": 26807, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 22.926395416259766, "incorrect_loss_raw": 43.57226053873698, "correct_loss_per_char": 0.4325734984199956, "incorrect_loss_per_char": 0.6721407305889198, "correct_loss_per_token": 1.9105329513549805, "incorrect_loss_per_token": 2.668315718827643, "correct_loss_uncond": -29.267425537109375, "incorrect_loss_uncond": -22.67121124267578}, "model_output": [{"sum_logits": -28.48223114013672, "num_tokens": 12, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -52.76011657714844, "logits_per_token": -2.37351926167806, "logits_per_char": -0.6781483604794457, "num_chars": 42}, {"sum_logits": -40.48192596435547, "num_tokens": 17, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -61.850257873535156, "logits_per_token": -2.3812897626091454, "logits_per_char": -0.5257392982383827, "num_chars": 77}, {"sum_logits": -22.926395416259766, "num_tokens": 12, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -52.19382095336914, "logits_per_token": -1.9105329513549805, "logits_per_char": -0.4325734984199956, "num_chars": 53}, {"sum_logits": -61.75262451171875, "num_tokens": 19, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -84.12004089355469, "logits_per_token": -3.250138132195724, "logits_per_char": -0.812534533048931, "num_chars": 76}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 410, "native_id": 32887, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 84.37818908691406, "incorrect_loss_raw": 96.70220438639323, "correct_loss_per_char": 0.48773519703418533, "incorrect_loss_per_char": 0.6584193833247136, "correct_loss_per_token": 2.163543309920873, "incorrect_loss_per_token": 2.820754754936749, "correct_loss_uncond": -32.47932434082031, "incorrect_loss_uncond": -20.556716918945312}, "model_output": [{"sum_logits": -119.09225463867188, "num_tokens": 34, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -139.9190673828125, "logits_per_token": -3.5027133717256436, "logits_per_char": -0.8446259194232048, "num_chars": 141}, {"sum_logits": -84.37818908691406, "num_tokens": 39, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -116.85751342773438, "logits_per_token": -2.163543309920873, "logits_per_char": -0.48773519703418533, "num_chars": 173}, {"sum_logits": -47.9849853515625, "num_tokens": 21, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -79.55267333984375, "logits_per_token": -2.284999302455357, "logits_per_char": -0.521575927734375, "num_chars": 92}, {"sum_logits": -123.02937316894531, "num_tokens": 46, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -132.30502319335938, "logits_per_token": -2.674551590629246, "logits_per_char": -0.609056302816561, "num_chars": 202}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 411, "native_id": 35187, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 85.08407592773438, "incorrect_loss_raw": 104.87744140625, "correct_loss_per_char": 0.4275581704911275, "incorrect_loss_per_char": 0.6253563770755157, "correct_loss_per_token": 2.2995696196684965, "incorrect_loss_per_token": 2.827519348187378, "correct_loss_uncond": -23.046768188476562, "incorrect_loss_uncond": -23.091407775878906}, "model_output": [{"sum_logits": -85.08407592773438, "num_tokens": 37, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -108.13084411621094, "logits_per_token": -2.2995696196684965, "logits_per_char": -0.4275581704911275, "num_chars": 199}, {"sum_logits": -90.8470458984375, "num_tokens": 37, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -123.107421875, "logits_per_token": -2.455325564822635, "logits_per_char": -0.5749813031546677, "num_chars": 158}, {"sum_logits": -116.13749694824219, "num_tokens": 42, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -137.3212127685547, "logits_per_token": -2.7651784987676713, "logits_per_char": -0.5637742570303019, "num_chars": 206}, {"sum_logits": -107.64778137207031, "num_tokens": 33, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -123.47791290283203, "logits_per_token": -3.2620539809718276, "logits_per_char": -0.7373135710415775, "num_chars": 146}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 412, "native_id": 23897, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 48.537269592285156, "incorrect_loss_raw": 22.35810152689616, "correct_loss_per_char": 0.4536193419839734, "incorrect_loss_per_char": 0.5586799281361053, "correct_loss_per_token": 2.1103160692297895, "incorrect_loss_per_token": 2.8024951183434688, "correct_loss_uncond": -35.998939514160156, "incorrect_loss_uncond": -20.91109275817871}, "model_output": [{"sum_logits": -48.537269592285156, "num_tokens": 23, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -84.53620910644531, "logits_per_token": -2.1103160692297895, "logits_per_char": -0.4536193419839734, "num_chars": 107}, {"sum_logits": -36.58466339111328, "num_tokens": 11, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -59.70191955566406, "logits_per_token": -3.3258784901012075, "logits_per_char": -0.6307700584674704, "num_chars": 58}, {"sum_logits": -7.8160552978515625, "num_tokens": 6, "num_tokens_all": 395, "is_greedy": false, "sum_logits_uncond": -32.175140380859375, "logits_per_token": -1.3026758829752605, "logits_per_char": -0.28948352955005785, "num_chars": 27}, {"sum_logits": -22.673585891723633, "num_tokens": 6, "num_tokens_all": 395, "is_greedy": false, "sum_logits_uncond": -37.93052291870117, "logits_per_token": -3.778930981953939, "logits_per_char": -0.7557861963907878, "num_chars": 30}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 413, "native_id": 42271, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 122.71694946289062, "incorrect_loss_raw": 111.60882822672527, "correct_loss_per_char": 0.6358391163880344, "incorrect_loss_per_char": 0.6844833777185957, "correct_loss_per_token": 3.146588447766426, "incorrect_loss_per_token": 3.3018330668425784, "correct_loss_uncond": -14.948394775390625, "incorrect_loss_uncond": -17.96947733561198}, "model_output": [{"sum_logits": -78.64141082763672, "num_tokens": 29, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -94.6214370727539, "logits_per_token": -2.711772787159887, "logits_per_char": -0.5538127523073009, "num_chars": 142}, {"sum_logits": -142.81997680664062, "num_tokens": 38, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -161.0068359375, "logits_per_token": -3.7584204422800163, "logits_per_char": -0.8208044644059806, "num_chars": 174}, {"sum_logits": -122.71694946289062, "num_tokens": 39, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -137.66534423828125, "logits_per_token": -3.146588447766426, "logits_per_char": -0.6358391163880344, "num_chars": 193}, {"sum_logits": -113.36509704589844, "num_tokens": 33, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -133.1066436767578, "logits_per_token": -3.4353059710878315, "logits_per_char": -0.6788329164425057, "num_chars": 167}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 414, "native_id": 43600, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 78.8727798461914, "incorrect_loss_raw": 102.60322570800781, "correct_loss_per_char": 0.4532918381965023, "incorrect_loss_per_char": 0.6147831596820371, "correct_loss_per_token": 2.0223789704151645, "incorrect_loss_per_token": 2.8811301014410753, "correct_loss_uncond": -26.311195373535156, "incorrect_loss_uncond": -16.96301523844401}, "model_output": [{"sum_logits": -78.8727798461914, "num_tokens": 39, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -105.18397521972656, "logits_per_token": -2.0223789704151645, "logits_per_char": -0.4532918381965023, "num_chars": 174}, {"sum_logits": -118.99333190917969, "num_tokens": 45, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -137.54681396484375, "logits_per_token": -2.6442962646484376, "logits_per_char": -0.6467028908107592, "num_chars": 184}, {"sum_logits": -93.78770446777344, "num_tokens": 36, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -113.74439239501953, "logits_per_token": -2.6052140129937067, "logits_per_char": -0.48847762743632, "num_chars": 192}, {"sum_logits": -95.02864074707031, "num_tokens": 28, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -107.40751647949219, "logits_per_token": -3.3938800266810825, "logits_per_char": -0.7091689607990321, "num_chars": 134}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 415, "native_id": 33068, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 13.641888618469238, "incorrect_loss_raw": 16.95683193206787, "correct_loss_per_char": 0.4012320181902717, "incorrect_loss_per_char": 0.5779467139581237, "correct_loss_per_token": 1.7052360773086548, "incorrect_loss_per_token": 2.330285110170879, "correct_loss_uncond": -21.845248222351074, "incorrect_loss_uncond": -16.99517027537028}, "model_output": [{"sum_logits": -13.641888618469238, "num_tokens": 8, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -35.48713684082031, "logits_per_token": -1.7052360773086548, "logits_per_char": -0.4012320181902717, "num_chars": 34}, {"sum_logits": -20.133560180664062, "num_tokens": 7, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -34.59574890136719, "logits_per_token": -2.876222882952009, "logits_per_char": -0.6711186726888021, "num_chars": 30}, {"sum_logits": -18.147422790527344, "num_tokens": 9, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -35.02049255371094, "logits_per_token": -2.0163803100585938, "logits_per_char": -0.49047088623046875, "num_chars": 37}, {"sum_logits": -12.589512825012207, "num_tokens": 6, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -32.23976516723633, "logits_per_token": -2.0982521375020347, "logits_per_char": -0.5722505829551003, "num_chars": 22}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 416, "native_id": 7145, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 97.6832046508789, "incorrect_loss_raw": 92.95205942789714, "correct_loss_per_char": 0.5087666908899943, "incorrect_loss_per_char": 0.489720264013449, "correct_loss_per_token": 2.325790586925688, "incorrect_loss_per_token": 2.3359612024732037, "correct_loss_uncond": -15.65032958984375, "incorrect_loss_uncond": -19.002161661783855}, "model_output": [{"sum_logits": -97.6832046508789, "num_tokens": 42, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -113.33353424072266, "logits_per_token": -2.325790586925688, "logits_per_char": -0.5087666908899943, "num_chars": 192}, {"sum_logits": -77.38896942138672, "num_tokens": 40, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -98.50009155273438, "logits_per_token": -1.9347242355346679, "logits_per_char": -0.4094654466740038, "num_chars": 189}, {"sum_logits": -98.61557006835938, "num_tokens": 43, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -113.29912567138672, "logits_per_token": -2.2933853504269623, "logits_per_char": -0.4363520799484928, "num_chars": 226}, {"sum_logits": -102.85163879394531, "num_tokens": 37, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -124.06344604492188, "logits_per_token": -2.7797740214579814, "logits_per_char": -0.6233432654178503, "num_chars": 165}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 417, "native_id": 5244, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 102.26168060302734, "incorrect_loss_raw": 106.31772104899089, "correct_loss_per_char": 0.9296516418457031, "incorrect_loss_per_char": 0.6646822965374591, "correct_loss_per_token": 3.787469651963976, "incorrect_loss_per_token": 2.884491641719878, "correct_loss_uncond": -16.27735137939453, "incorrect_loss_uncond": -16.21240997314453}, "model_output": [{"sum_logits": -104.21436309814453, "num_tokens": 39, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -119.08840942382812, "logits_per_token": -2.6721631563626804, "logits_per_char": -0.5955106462751116, "num_chars": 175}, {"sum_logits": -102.79682922363281, "num_tokens": 31, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -115.47335815429688, "logits_per_token": -3.3160267491494455, "logits_per_char": -0.8031002283096313, "num_chars": 128}, {"sum_logits": -111.94197082519531, "num_tokens": 42, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -133.02862548828125, "logits_per_token": -2.6652850196475075, "logits_per_char": -0.5954360150276347, "num_chars": 188}, {"sum_logits": -102.26168060302734, "num_tokens": 27, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -118.53903198242188, "logits_per_token": -3.787469651963976, "logits_per_char": -0.9296516418457031, "num_chars": 110}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 418, "native_id": 16882, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 113.61631774902344, "incorrect_loss_raw": 68.37852223714192, "correct_loss_per_char": 0.6455472599376332, "incorrect_loss_per_char": 0.48328621287795487, "correct_loss_per_token": 2.7711297011956937, "incorrect_loss_per_token": 2.2573495087800204, "correct_loss_uncond": -18.295745849609375, "incorrect_loss_uncond": -26.608965555826824}, "model_output": [{"sum_logits": -53.919464111328125, "num_tokens": 27, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -78.58360290527344, "logits_per_token": -1.9970171893084492, "logits_per_char": -0.4054095045964521, "num_chars": 133}, {"sum_logits": -89.16102600097656, "num_tokens": 36, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -114.64073181152344, "logits_per_token": -2.4766951666937933, "logits_per_char": -0.5094915771484375, "num_chars": 175}, {"sum_logits": -62.055076599121094, "num_tokens": 27, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -91.73812866210938, "logits_per_token": -2.2983361703378185, "logits_per_char": -0.534957556888975, "num_chars": 116}, {"sum_logits": -113.61631774902344, "num_tokens": 41, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -131.9120635986328, "logits_per_token": -2.7711297011956937, "logits_per_char": -0.6455472599376332, "num_chars": 176}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 419, "native_id": 23601, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 49.23641586303711, "incorrect_loss_raw": 119.4513676961263, "correct_loss_per_char": 0.4208240672054454, "incorrect_loss_per_char": 0.8863707062809499, "correct_loss_per_token": 2.1407137331755264, "incorrect_loss_per_token": 3.9352937570534894, "correct_loss_uncond": -39.90497970581055, "incorrect_loss_uncond": -11.630223592122396}, "model_output": [{"sum_logits": -121.37939453125, "num_tokens": 28, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -132.08688354492188, "logits_per_token": -4.334978376116071, "logits_per_char": -1.1034490411931819, "num_chars": 110}, {"sum_logits": -128.48138427734375, "num_tokens": 27, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -143.19375610351562, "logits_per_token": -4.758569788049768, "logits_per_char": -0.9660254456943139, "num_chars": 133}, {"sum_logits": -108.49332427978516, "num_tokens": 40, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -117.9641342163086, "logits_per_token": -2.712333106994629, "logits_per_char": -0.5896376319553541, "num_chars": 184}, {"sum_logits": -49.23641586303711, "num_tokens": 23, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -89.14139556884766, "logits_per_token": -2.1407137331755264, "logits_per_char": -0.4208240672054454, "num_chars": 117}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 420, "native_id": 12020, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 98.03788757324219, "incorrect_loss_raw": 110.69563293457031, "correct_loss_per_char": 0.48057788026099113, "incorrect_loss_per_char": 0.6310259984062242, "correct_loss_per_token": 2.8010825020926338, "incorrect_loss_per_token": 2.75536142047466, "correct_loss_uncond": -34.32920837402344, "incorrect_loss_uncond": -39.968475341796875}, "model_output": [{"sum_logits": -98.03788757324219, "num_tokens": 35, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -132.36709594726562, "logits_per_token": -2.8010825020926338, "logits_per_char": -0.48057788026099113, "num_chars": 204}, {"sum_logits": -85.34567260742188, "num_tokens": 28, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -109.01713562011719, "logits_per_token": -3.048059735979353, "logits_per_char": -0.682765380859375, "num_chars": 125}, {"sum_logits": -99.52482604980469, "num_tokens": 35, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -138.266357421875, "logits_per_token": -2.843566458565848, "logits_per_char": -0.646265104219511, "num_chars": 154}, {"sum_logits": -147.21640014648438, "num_tokens": 62, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -204.70883178710938, "logits_per_token": -2.37445806687878, "logits_per_char": -0.5640475101397868, "num_chars": 261}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 421, "native_id": 39250, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 46.98027038574219, "incorrect_loss_raw": 86.82342910766602, "correct_loss_per_char": 0.4121076349626508, "incorrect_loss_per_char": 0.5780762021626323, "correct_loss_per_token": 1.6778667994907923, "incorrect_loss_per_token": 2.2923959023464673, "correct_loss_uncond": -30.367706298828125, "incorrect_loss_uncond": -20.403130849202473}, "model_output": [{"sum_logits": -46.98027038574219, "num_tokens": 28, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -77.34797668457031, "logits_per_token": -1.6778667994907923, "logits_per_char": -0.4121076349626508, "num_chars": 114}, {"sum_logits": -112.08406066894531, "num_tokens": 44, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -132.4366455078125, "logits_per_token": -2.5473650152033027, "logits_per_char": -0.5899161087839226, "num_chars": 190}, {"sum_logits": -100.64372253417969, "num_tokens": 43, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -119.08719635009766, "logits_per_token": -2.3405516868413883, "logits_per_char": -0.5161216540214343, "num_chars": 195}, {"sum_logits": -47.74250411987305, "num_tokens": 24, "num_tokens_all": 447, "is_greedy": false, "sum_logits_uncond": -70.15583801269531, "logits_per_token": -1.9892710049947102, "logits_per_char": -0.6281908436825401, "num_chars": 76}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 422, "native_id": 19152, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 70.9283676147461, "incorrect_loss_raw": 99.59676615397136, "correct_loss_per_char": 0.4378294297206549, "incorrect_loss_per_char": 0.5861318684069318, "correct_loss_per_token": 2.2165114879608154, "incorrect_loss_per_token": 2.712007737777672, "correct_loss_uncond": -33.792884826660156, "incorrect_loss_uncond": -22.073226928710938}, "model_output": [{"sum_logits": -70.9283676147461, "num_tokens": 32, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -104.72125244140625, "logits_per_token": -2.2165114879608154, "logits_per_char": -0.4378294297206549, "num_chars": 162}, {"sum_logits": -139.6043701171875, "num_tokens": 51, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -164.1338348388672, "logits_per_token": -2.7373405905330883, "logits_per_char": -0.5792712452995332, "num_chars": 241}, {"sum_logits": -78.72396850585938, "num_tokens": 30, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -105.13700866699219, "logits_per_token": -2.624132283528646, "logits_per_char": -0.5874923022825327, "num_chars": 134}, {"sum_logits": -80.46195983886719, "num_tokens": 29, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -95.7391357421875, "logits_per_token": -2.7745503392712823, "logits_per_char": -0.5916320576387293, "num_chars": 136}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 423, "native_id": 25105, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 95.40587615966797, "incorrect_loss_raw": 94.14620208740234, "correct_loss_per_char": 0.5021361903140419, "incorrect_loss_per_char": 0.543326787993318, "correct_loss_per_token": 2.5106809515702095, "incorrect_loss_per_token": 2.701223559451783, "correct_loss_uncond": -29.91973114013672, "incorrect_loss_uncond": -19.565996805826824}, "model_output": [{"sum_logits": -127.28251647949219, "num_tokens": 37, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -145.36459350585938, "logits_per_token": -3.4400680129592485, "logits_per_char": -0.6734524681454613, "num_chars": 189}, {"sum_logits": -95.40587615966797, "num_tokens": 38, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -125.32560729980469, "logits_per_token": -2.5106809515702095, "logits_per_char": -0.5021361903140419, "num_chars": 190}, {"sum_logits": -77.65528869628906, "num_tokens": 39, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -87.414794921875, "logits_per_token": -1.9911612486227965, "logits_per_char": -0.42203961247983185, "num_chars": 184}, {"sum_logits": -77.50080108642578, "num_tokens": 29, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -108.35720825195312, "logits_per_token": -2.6724414167733026, "logits_per_char": -0.5344882833546606, "num_chars": 145}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 424, "native_id": 42405, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 106.0475082397461, "incorrect_loss_raw": 80.05064137776692, "correct_loss_per_char": 0.5147937293191558, "incorrect_loss_per_char": 0.5694640526120298, "correct_loss_per_token": 2.256329962547789, "incorrect_loss_per_token": 2.4798279782192885, "correct_loss_uncond": -14.149070739746094, "incorrect_loss_uncond": -15.05633544921875}, "model_output": [{"sum_logits": -69.95591735839844, "num_tokens": 27, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -88.73200988769531, "logits_per_token": -2.590959902162905, "logits_per_char": -0.6030682530896417, "num_chars": 116}, {"sum_logits": -86.38368225097656, "num_tokens": 32, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -98.6521224975586, "logits_per_token": -2.6994900703430176, "logits_per_char": -0.5645992303985397, "num_chars": 153}, {"sum_logits": -106.0475082397461, "num_tokens": 47, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -120.19657897949219, "logits_per_token": -2.256329962547789, "logits_per_char": -0.5147937293191558, "num_chars": 206}, {"sum_logits": -83.81232452392578, "num_tokens": 39, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -97.93679809570312, "logits_per_token": -2.149033962151943, "logits_per_char": -0.5407246743479083, "num_chars": 155}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 425, "native_id": 47429, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 33.0338249206543, "incorrect_loss_raw": 52.92966079711914, "correct_loss_per_char": 0.5161535143852234, "incorrect_loss_per_char": 0.697164028529402, "correct_loss_per_token": 2.752818743387858, "incorrect_loss_per_token": 3.455171884012501, "correct_loss_uncond": -25.756664276123047, "incorrect_loss_uncond": -16.788914998372395}, "model_output": [{"sum_logits": -43.78754425048828, "num_tokens": 12, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -58.52409744262695, "logits_per_token": -3.6489620208740234, "logits_per_char": -0.7178285942702997, "num_chars": 61}, {"sum_logits": -33.0338249206543, "num_tokens": 12, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -58.790489196777344, "logits_per_token": -2.752818743387858, "logits_per_char": -0.5161535143852234, "num_chars": 64}, {"sum_logits": -47.29905319213867, "num_tokens": 15, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -65.53155517578125, "logits_per_token": -3.153270212809245, "logits_per_char": -0.6757007598876953, "num_chars": 70}, {"sum_logits": -67.70238494873047, "num_tokens": 19, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -85.1000747680664, "logits_per_token": -3.563283418354235, "logits_per_char": -0.697962731430211, "num_chars": 97}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 426, "native_id": 49660, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 113.19474029541016, "incorrect_loss_raw": 122.40655008951823, "correct_loss_per_char": 0.615188805953316, "incorrect_loss_per_char": 0.6408221897492911, "correct_loss_per_token": 3.0593173052813554, "incorrect_loss_per_token": 3.1044196459202986, "correct_loss_uncond": -18.362327575683594, "incorrect_loss_uncond": -23.528798421223957}, "model_output": [{"sum_logits": -154.53060913085938, "num_tokens": 51, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -179.32786560058594, "logits_per_token": -3.0300119437423407, "logits_per_char": -0.6206048559472264, "num_chars": 249}, {"sum_logits": -68.27644348144531, "num_tokens": 24, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -91.31155395507812, "logits_per_token": -2.844851811726888, "logits_per_char": -0.6042163139950912, "num_chars": 113}, {"sum_logits": -113.19474029541016, "num_tokens": 37, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -131.55706787109375, "logits_per_token": -3.0593173052813554, "logits_per_char": -0.615188805953316, "num_chars": 184}, {"sum_logits": -144.41259765625, "num_tokens": 42, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -167.1666259765625, "logits_per_token": -3.4383951822916665, "logits_per_char": -0.6976453993055556, "num_chars": 207}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 427, "native_id": 33350, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 60.99751281738281, "incorrect_loss_raw": 99.32708231608073, "correct_loss_per_char": 0.44851112365722656, "incorrect_loss_per_char": 0.6037943788865267, "correct_loss_per_token": 2.346058185283954, "incorrect_loss_per_token": 2.9827721368698845, "correct_loss_uncond": -37.308692932128906, "incorrect_loss_uncond": -16.235689798990887}, "model_output": [{"sum_logits": -91.25896453857422, "num_tokens": 30, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -106.08303833007812, "logits_per_token": -3.0419654846191406, "logits_per_char": -0.6337428092956543, "num_chars": 144}, {"sum_logits": -100.2602767944336, "num_tokens": 35, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -118.92091369628906, "logits_per_token": -2.864579336983817, "logits_per_char": -0.5795391722221594, "num_chars": 173}, {"sum_logits": -60.99751281738281, "num_tokens": 26, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -98.30620574951172, "logits_per_token": -2.346058185283954, "logits_per_char": -0.44851112365722656, "num_chars": 136}, {"sum_logits": -106.46200561523438, "num_tokens": 35, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -121.68436431884766, "logits_per_token": -3.0417715890066965, "logits_per_char": -0.5981011551417662, "num_chars": 178}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 428, "native_id": 32452, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 135.84127807617188, "incorrect_loss_raw": 99.5974604288737, "correct_loss_per_char": 0.6499582683070425, "incorrect_loss_per_char": 0.7532579940131742, "correct_loss_per_token": 2.772270981146365, "incorrect_loss_per_token": 3.5849940899994337, "correct_loss_uncond": -32.68153381347656, "incorrect_loss_uncond": -18.949429829915363}, "model_output": [{"sum_logits": -110.15003204345703, "num_tokens": 29, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -114.82271575927734, "logits_per_token": -3.79827696701576, "logits_per_char": -0.8099267062018899, "num_chars": 136}, {"sum_logits": -79.89019012451172, "num_tokens": 26, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -107.2101821899414, "logits_per_token": -3.0726996201735277, "logits_per_char": -0.6442757268105784, "num_chars": 124}, {"sum_logits": -108.75215911865234, "num_tokens": 28, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -133.60777282714844, "logits_per_token": -3.884005682809012, "logits_per_char": -0.8055715490270544, "num_chars": 135}, {"sum_logits": -135.84127807617188, "num_tokens": 49, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -168.52281188964844, "logits_per_token": -2.772270981146365, "logits_per_char": -0.6499582683070425, "num_chars": 209}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 429, "native_id": 14897, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 143.8050537109375, "incorrect_loss_raw": 133.38934580485025, "correct_loss_per_char": 0.5045791358278509, "incorrect_loss_per_char": 0.5863858737658855, "correct_loss_per_token": 2.3194363501764115, "incorrect_loss_per_token": 2.8744688609289746, "correct_loss_uncond": -25.292144775390625, "incorrect_loss_uncond": -21.044085184733074}, "model_output": [{"sum_logits": -155.45933532714844, "num_tokens": 63, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -175.9097900390625, "logits_per_token": -2.4676084972563244, "logits_per_char": -0.4935216994512649, "num_chars": 315}, {"sum_logits": -143.8050537109375, "num_tokens": 62, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -169.09719848632812, "logits_per_token": -2.3194363501764115, "logits_per_char": -0.5045791358278509, "num_chars": 285}, {"sum_logits": -129.20884704589844, "num_tokens": 36, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -152.95468139648438, "logits_per_token": -3.5891346401638455, "logits_per_char": -0.7022219948146654, "num_chars": 184}, {"sum_logits": -115.4998550415039, "num_tokens": 45, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -134.43582153320312, "logits_per_token": -2.5666634453667534, "logits_per_char": -0.5634139270317263, "num_chars": 205}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 430, "native_id": 29587, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 73.32168579101562, "incorrect_loss_raw": 88.12432479858398, "correct_loss_per_char": 0.4920918509464136, "incorrect_loss_per_char": 0.5428180842694686, "correct_loss_per_token": 1.981667183540963, "incorrect_loss_per_token": 2.209974135545196, "correct_loss_uncond": -45.925926208496094, "incorrect_loss_uncond": -28.087116241455078}, "model_output": [{"sum_logits": -73.32168579101562, "num_tokens": 37, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -119.24761199951172, "logits_per_token": -1.981667183540963, "logits_per_char": -0.4920918509464136, "num_chars": 149}, {"sum_logits": -111.97476196289062, "num_tokens": 56, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -132.60154724121094, "logits_per_token": -1.999549320765904, "logits_per_char": -0.5570883679745803, "num_chars": 201}, {"sum_logits": -53.535160064697266, "num_tokens": 22, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -79.3814697265625, "logits_per_token": -2.4334163665771484, "logits_per_char": -0.5695229794116731, "num_chars": 94}, {"sum_logits": -98.86305236816406, "num_tokens": 45, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -136.65130615234375, "logits_per_token": -2.1969567192925346, "logits_per_char": -0.5018429054221526, "num_chars": 197}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 431, "native_id": 39586, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 99.96209716796875, "incorrect_loss_raw": 144.84477996826172, "correct_loss_per_char": 0.4271884494357639, "incorrect_loss_per_char": 0.6163410795152767, "correct_loss_per_token": 2.040042799346301, "incorrect_loss_per_token": 2.7658040463406963, "correct_loss_uncond": -28.406112670898438, "incorrect_loss_uncond": -16.4878667195638}, "model_output": [{"sum_logits": -125.30733489990234, "num_tokens": 41, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -138.4621124267578, "logits_per_token": -3.056276460973228, "logits_per_char": -0.681018124455991, "num_chars": 184}, {"sum_logits": -143.47293090820312, "num_tokens": 59, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -163.47152709960938, "logits_per_token": -2.4317445916644598, "logits_per_char": -0.5518189650315505, "num_chars": 260}, {"sum_logits": -165.7540740966797, "num_tokens": 59, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -182.06430053710938, "logits_per_token": -2.8093910863844016, "logits_per_char": -0.6161861490582888, "num_chars": 269}, {"sum_logits": -99.96209716796875, "num_tokens": 49, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -128.3682098388672, "logits_per_token": -2.040042799346301, "logits_per_char": -0.4271884494357639, "num_chars": 234}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 432, "native_id": 27332, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 30.108562469482422, "incorrect_loss_raw": 67.18450164794922, "correct_loss_per_char": 0.5903639699898514, "incorrect_loss_per_char": 0.8575198730053929, "correct_loss_per_token": 2.316043266883263, "incorrect_loss_per_token": 3.780673951712268, "correct_loss_uncond": -21.89120101928711, "incorrect_loss_uncond": -20.626986185709637}, "model_output": [{"sum_logits": -30.108562469482422, "num_tokens": 13, "num_tokens_all": 424, "is_greedy": false, "sum_logits_uncond": -51.99976348876953, "logits_per_token": -2.316043266883263, "logits_per_char": -0.5903639699898514, "num_chars": 51}, {"sum_logits": -69.80768585205078, "num_tokens": 20, "num_tokens_all": 431, "is_greedy": false, "sum_logits_uncond": -85.81378173828125, "logits_per_token": -3.4903842926025392, "logits_per_char": -0.767117426945613, "num_chars": 91}, {"sum_logits": -66.36346435546875, "num_tokens": 19, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -87.41354370117188, "logits_per_token": -3.492813913445724, "logits_per_char": -0.7995598115116717, "num_chars": 83}, {"sum_logits": -65.38235473632812, "num_tokens": 15, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -90.20713806152344, "logits_per_token": -4.358823649088541, "logits_per_char": -1.0058823805588943, "num_chars": 65}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 433, "native_id": 41420, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 75.36121368408203, "incorrect_loss_raw": 104.04213968912761, "correct_loss_per_char": 0.519732508166083, "incorrect_loss_per_char": 0.5456919490626045, "correct_loss_per_token": 2.4310068930349042, "incorrect_loss_per_token": 2.4783224600332754, "correct_loss_uncond": -17.441246032714844, "incorrect_loss_uncond": -26.177396138509113}, "model_output": [{"sum_logits": -75.36121368408203, "num_tokens": 31, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -92.80245971679688, "logits_per_token": -2.4310068930349042, "logits_per_char": -0.519732508166083, "num_chars": 145}, {"sum_logits": -131.731201171875, "num_tokens": 45, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -152.73348999023438, "logits_per_token": -2.927360026041667, "logits_per_char": -0.6896921527323299, "num_chars": 191}, {"sum_logits": -89.7884521484375, "num_tokens": 36, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -112.60344696044922, "logits_per_token": -2.4941236707899304, "logits_per_char": -0.4827336137012769, "num_chars": 186}, {"sum_logits": -90.60676574707031, "num_tokens": 45, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -125.32167053222656, "logits_per_token": -2.013483683268229, "logits_per_char": -0.4646500807542067, "num_chars": 195}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 434, "native_id": 32262, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 154.56454467773438, "incorrect_loss_raw": 137.47796122233072, "correct_loss_per_char": 0.5239476090770657, "incorrect_loss_per_char": 0.6699606428901431, "correct_loss_per_token": 2.533844994716957, "incorrect_loss_per_token": 3.1685519376386897, "correct_loss_uncond": -37.80548095703125, "incorrect_loss_uncond": -30.929239908854168}, "model_output": [{"sum_logits": -141.1907196044922, "num_tokens": 39, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -162.41903686523438, "logits_per_token": -3.620274861653646, "logits_per_char": -0.5957414329303468, "num_chars": 237}, {"sum_logits": -167.89089965820312, "num_tokens": 59, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -198.23223876953125, "logits_per_token": -2.845608468783104, "logits_per_char": -0.6311687957075306, "num_chars": 266}, {"sum_logits": -154.56454467773438, "num_tokens": 61, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -192.37002563476562, "logits_per_token": -2.533844994716957, "logits_per_char": -0.5239476090770657, "num_chars": 295}, {"sum_logits": -103.35226440429688, "num_tokens": 34, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -144.57032775878906, "logits_per_token": -3.0397724824793197, "logits_per_char": -0.7829717000325521, "num_chars": 132}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 435, "native_id": 20868, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 26.720821380615234, "incorrect_loss_raw": 51.97157669067383, "correct_loss_per_char": 0.5808874213177225, "incorrect_loss_per_char": 0.6557595710429059, "correct_loss_per_token": 2.968980153401693, "incorrect_loss_per_token": 3.1624969332825903, "correct_loss_uncond": -19.563793182373047, "incorrect_loss_uncond": -26.102375030517578}, "model_output": [{"sum_logits": -60.352237701416016, "num_tokens": 17, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -98.69810485839844, "logits_per_token": -3.5501316294950596, "logits_per_char": -0.7100263258990119, "num_chars": 85}, {"sum_logits": -26.720821380615234, "num_tokens": 9, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -46.28461456298828, "logits_per_token": -2.968980153401693, "logits_per_char": -0.5808874213177225, "num_chars": 46}, {"sum_logits": -40.29460144042969, "num_tokens": 15, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -56.11572265625, "logits_per_token": -2.6863067626953123, "logits_per_char": -0.6499129264585434, "num_chars": 62}, {"sum_logits": -55.26789093017578, "num_tokens": 17, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -79.40802764892578, "logits_per_token": -3.251052407657399, "logits_per_char": -0.6073394607711624, "num_chars": 91}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 436, "native_id": 18187, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 23.612627029418945, "incorrect_loss_raw": 38.79834874471029, "correct_loss_per_char": 0.3577670762033174, "incorrect_loss_per_char": 0.7714956669386296, "correct_loss_per_token": 1.8163559253399189, "incorrect_loss_per_token": 3.36216636940285, "correct_loss_uncond": -26.34467887878418, "incorrect_loss_uncond": -17.26786168416341}, "model_output": [{"sum_logits": -23.612627029418945, "num_tokens": 13, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -49.957305908203125, "logits_per_token": -1.8163559253399189, "logits_per_char": -0.3577670762033174, "num_chars": 66}, {"sum_logits": -25.44607162475586, "num_tokens": 9, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -43.36853790283203, "logits_per_token": -2.8273412916395397, "logits_per_char": -0.565468258327908, "num_chars": 45}, {"sum_logits": -49.908050537109375, "num_tokens": 13, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -65.83303833007812, "logits_per_token": -3.839080810546875, "logits_per_char": -0.9597702026367188, "num_chars": 52}, {"sum_logits": -41.040924072265625, "num_tokens": 12, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -58.99705505371094, "logits_per_token": -3.4200770060221353, "logits_per_char": -0.789248539851262, "num_chars": 52}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 437, "native_id": 50071, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 51.106178283691406, "incorrect_loss_raw": 30.975615819295246, "correct_loss_per_char": 0.6469136491606507, "incorrect_loss_per_char": 0.882914253696092, "correct_loss_per_token": 2.8392321268717446, "incorrect_loss_per_token": 3.887089905915437, "correct_loss_uncond": -35.26709747314453, "incorrect_loss_uncond": -18.643547693888348}, "model_output": [{"sum_logits": -51.106178283691406, "num_tokens": 18, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -86.37327575683594, "logits_per_token": -2.8392321268717446, "logits_per_char": -0.6469136491606507, "num_chars": 79}, {"sum_logits": -45.63185119628906, "num_tokens": 8, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -57.27352523803711, "logits_per_token": -5.703981399536133, "logits_per_char": -1.3037671770368304, "num_chars": 35}, {"sum_logits": -25.172901153564453, "num_tokens": 9, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -45.464839935302734, "logits_per_token": -2.796989017062717, "logits_per_char": -0.7628151864716501, "num_chars": 33}, {"sum_logits": -22.122095108032227, "num_tokens": 7, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -46.11912536621094, "logits_per_token": -3.160299301147461, "logits_per_char": -0.5821603975797954, "num_chars": 38}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 438, "native_id": 44090, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.3159284591674805, "incorrect_loss_raw": 43.69503657023112, "correct_loss_per_char": 0.21263713836669923, "incorrect_loss_per_char": 0.6418949744280646, "correct_loss_per_token": 0.8859880765279134, "incorrect_loss_per_token": 3.124382361659297, "correct_loss_uncond": -29.066487312316895, "incorrect_loss_uncond": -28.905521392822266}, "model_output": [{"sum_logits": -56.628273010253906, "num_tokens": 16, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -82.59455108642578, "logits_per_token": -3.539267063140869, "logits_per_char": -0.6292030334472656, "num_chars": 90}, {"sum_logits": -54.879791259765625, "num_tokens": 15, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -82.15754699707031, "logits_per_token": -3.658652750651042, "logits_per_char": -0.8070557538200828, "num_chars": 68}, {"sum_logits": -19.577045440673828, "num_tokens": 9, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -53.04957580566406, "logits_per_token": -2.175227271185981, "logits_per_char": -0.4894261360168457, "num_chars": 40}, {"sum_logits": -5.3159284591674805, "num_tokens": 6, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -34.382415771484375, "logits_per_token": -0.8859880765279134, "logits_per_char": -0.21263713836669923, "num_chars": 25}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 439, "native_id": 49978, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 83.09054565429688, "incorrect_loss_raw": 76.53176625569661, "correct_loss_per_char": 0.41133933492226177, "incorrect_loss_per_char": 0.43194226762847915, "correct_loss_per_token": 1.846456570095486, "incorrect_loss_per_token": 2.1109140068882826, "correct_loss_uncond": -24.468002319335938, "incorrect_loss_uncond": -26.331993103027344}, "model_output": [{"sum_logits": -72.23175048828125, "num_tokens": 39, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -92.02361297607422, "logits_per_token": -1.852096166366186, "logits_per_char": -0.36852933922592473, "num_chars": 196}, {"sum_logits": -93.66375732421875, "num_tokens": 32, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -123.90437316894531, "logits_per_token": -2.926992416381836, "logits_per_char": -0.5291737701933262, "num_chars": 177}, {"sum_logits": -63.699790954589844, "num_tokens": 41, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -92.66329193115234, "logits_per_token": -1.5536534379168254, "logits_per_char": -0.39812369346618653, "num_chars": 160}, {"sum_logits": -83.09054565429688, "num_tokens": 45, "num_tokens_all": 494, "is_greedy": false, "sum_logits_uncond": -107.55854797363281, "logits_per_token": -1.846456570095486, "logits_per_char": -0.41133933492226177, "num_chars": 202}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 440, "native_id": 16921, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 101.68949890136719, "incorrect_loss_raw": 88.67883809407552, "correct_loss_per_char": 0.5496729670344173, "incorrect_loss_per_char": 0.5282712776424843, "correct_loss_per_token": 2.3111249750310723, "incorrect_loss_per_token": 2.330309471113857, "correct_loss_uncond": -18.581436157226562, "incorrect_loss_uncond": -13.037623087565104}, "model_output": [{"sum_logits": -91.20294189453125, "num_tokens": 33, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -104.90438842773438, "logits_per_token": -2.763725511955492, "logits_per_char": -0.5700183868408203, "num_chars": 160}, {"sum_logits": -99.2546615600586, "num_tokens": 48, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -111.63812255859375, "logits_per_token": -2.067805449167887, "logits_per_char": -0.5142728578241378, "num_chars": 193}, {"sum_logits": -101.68949890136719, "num_tokens": 44, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -120.27093505859375, "logits_per_token": -2.3111249750310723, "logits_per_char": -0.5496729670344173, "num_chars": 185}, {"sum_logits": -75.57891082763672, "num_tokens": 35, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -88.60687255859375, "logits_per_token": -2.159397452218192, "logits_per_char": -0.5005225882624948, "num_chars": 151}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 441, "native_id": 43370, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 38.72970962524414, "incorrect_loss_raw": 65.47861735026042, "correct_loss_per_char": 0.4451690761522315, "incorrect_loss_per_char": 0.8311720176099783, "correct_loss_per_token": 2.0384057697496916, "incorrect_loss_per_token": 3.548809585037765, "correct_loss_uncond": -44.800655364990234, "incorrect_loss_uncond": -22.62530517578125}, "model_output": [{"sum_logits": -43.80039978027344, "num_tokens": 13, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -65.76777648925781, "logits_per_token": -3.369261521559495, "logits_per_char": -0.7064580609721522, "num_chars": 62}, {"sum_logits": -78.01318359375, "num_tokens": 22, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -110.90453338623047, "logits_per_token": -3.546053799715909, "logits_per_char": -0.7648351332720589, "num_chars": 102}, {"sum_logits": -38.72970962524414, "num_tokens": 19, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -83.53036499023438, "logits_per_token": -2.0384057697496916, "logits_per_char": -0.4451690761522315, "num_chars": 87}, {"sum_logits": -74.62226867675781, "num_tokens": 20, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -87.63945770263672, "logits_per_token": -3.7311134338378906, "logits_per_char": -1.0222228585857234, "num_chars": 73}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 442, "native_id": 21875, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 149.36331176757812, "incorrect_loss_raw": 175.11009724934897, "correct_loss_per_char": 0.5880445345180241, "incorrect_loss_per_char": 0.8664173123200086, "correct_loss_per_token": 2.818175693727889, "incorrect_loss_per_token": 3.7529620315089374, "correct_loss_uncond": -29.971511840820312, "incorrect_loss_uncond": -19.695841471354168}, "model_output": [{"sum_logits": -199.09698486328125, "num_tokens": 45, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -214.1383056640625, "logits_per_token": -4.42437744140625, "logits_per_char": -0.9905322630013993, "num_chars": 201}, {"sum_logits": -132.43911743164062, "num_tokens": 40, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -150.6576385498047, "logits_per_token": -3.3109779357910156, "logits_per_char": -0.7357728746202257, "num_chars": 180}, {"sum_logits": -149.36331176757812, "num_tokens": 53, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -179.33482360839844, "logits_per_token": -2.818175693727889, "logits_per_char": -0.5880445345180241, "num_chars": 254}, {"sum_logits": -193.794189453125, "num_tokens": 55, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -219.6218719482422, "logits_per_token": -3.5235307173295456, "logits_per_char": -0.8729467993384009, "num_chars": 222}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 443, "native_id": 9171, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 55.9241828918457, "incorrect_loss_raw": 85.46473693847656, "correct_loss_per_char": 0.44739346313476563, "incorrect_loss_per_char": 0.5371783096253838, "correct_loss_per_token": 2.150930111224835, "incorrect_loss_per_token": 2.6653123798149436, "correct_loss_uncond": -11.176921844482422, "incorrect_loss_uncond": -21.540130615234375}, "model_output": [{"sum_logits": -55.9241828918457, "num_tokens": 26, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -67.10110473632812, "logits_per_token": -2.150930111224835, "logits_per_char": -0.44739346313476563, "num_chars": 125}, {"sum_logits": -68.8511962890625, "num_tokens": 25, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -88.12738800048828, "logits_per_token": -2.7540478515625, "logits_per_char": -0.5737599690755208, "num_chars": 120}, {"sum_logits": -112.12055969238281, "num_tokens": 35, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -139.35552978515625, "logits_per_token": -3.203444562639509, "logits_per_char": -0.5634198979516725, "num_chars": 199}, {"sum_logits": -75.42245483398438, "num_tokens": 37, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -93.53168487548828, "logits_per_token": -2.038444725242821, "logits_per_char": -0.4743550618489583, "num_chars": 159}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 444, "native_id": 258, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 82.05569458007812, "incorrect_loss_raw": 82.05737050374348, "correct_loss_per_char": 0.4743103732952493, "incorrect_loss_per_char": 0.60403356358988, "correct_loss_per_token": 2.1593603836862663, "incorrect_loss_per_token": 2.760534798615898, "correct_loss_uncond": -22.936355590820312, "incorrect_loss_uncond": -25.46881866455078}, "model_output": [{"sum_logits": -85.67144775390625, "num_tokens": 30, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -114.52277374267578, "logits_per_token": -2.8557149251302083, "logits_per_char": -0.6119389125279018, "num_chars": 140}, {"sum_logits": -93.8907241821289, "num_tokens": 34, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -116.43289184570312, "logits_per_token": -2.7614918877096737, "logits_per_char": -0.6259381612141927, "num_chars": 150}, {"sum_logits": -82.05569458007812, "num_tokens": 38, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -104.99205017089844, "logits_per_token": -2.1593603836862663, "logits_per_char": -0.4743103732952493, "num_chars": 173}, {"sum_logits": -66.60993957519531, "num_tokens": 25, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -91.6229019165039, "logits_per_token": -2.6643975830078124, "logits_per_char": -0.5742236170275458, "num_chars": 116}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 445, "native_id": 8231, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 99.95086669921875, "incorrect_loss_raw": 78.51280975341797, "correct_loss_per_char": 0.4326877346286526, "incorrect_loss_per_char": 0.5914036579334546, "correct_loss_per_token": 2.1728449282438858, "incorrect_loss_per_token": 2.7924149077279226, "correct_loss_uncond": -32.27177429199219, "incorrect_loss_uncond": -17.428003946940105}, "model_output": [{"sum_logits": -99.95086669921875, "num_tokens": 46, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -132.22264099121094, "logits_per_token": -2.1728449282438858, "logits_per_char": -0.4326877346286526, "num_chars": 231}, {"sum_logits": -85.34062194824219, "num_tokens": 25, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -103.85282897949219, "logits_per_token": -3.4136248779296876, "logits_per_char": -0.6514551293758946, "num_chars": 131}, {"sum_logits": -89.73161315917969, "num_tokens": 32, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -109.83364868164062, "logits_per_token": -2.8041129112243652, "logits_per_char": -0.6646786159939236, "num_chars": 135}, {"sum_logits": -60.46619415283203, "num_tokens": 28, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -74.1359634399414, "logits_per_token": -2.1595069340297153, "logits_per_char": -0.4580772284305457, "num_chars": 132}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 446, "native_id": 6724, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 17.39306640625, "incorrect_loss_raw": 28.976038614908855, "correct_loss_per_char": 0.3700652426861702, "incorrect_loss_per_char": 0.757555376646112, "correct_loss_per_token": 1.739306640625, "incorrect_loss_per_token": 3.201285064535797, "correct_loss_uncond": -24.24947738647461, "incorrect_loss_uncond": -13.436447143554688}, "model_output": [{"sum_logits": -17.39306640625, "num_tokens": 10, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -41.64254379272461, "logits_per_token": -1.739306640625, "logits_per_char": -0.3700652426861702, "num_chars": 47}, {"sum_logits": -31.60809326171875, "num_tokens": 9, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -47.076744079589844, "logits_per_token": -3.5120103624131946, "logits_per_char": -0.7902023315429687, "num_chars": 40}, {"sum_logits": -22.474353790283203, "num_tokens": 6, "num_tokens_all": 393, "is_greedy": false, "sum_logits_uncond": -32.766258239746094, "logits_per_token": -3.745725631713867, "logits_per_char": -0.977145816968835, "num_chars": 23}, {"sum_logits": -32.84566879272461, "num_tokens": 14, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -47.39445495605469, "logits_per_token": -2.3461191994803294, "logits_per_char": -0.5053179814265325, "num_chars": 65}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 447, "native_id": 39680, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 39.285858154296875, "incorrect_loss_raw": 52.62624994913737, "correct_loss_per_char": 0.5952402750651041, "incorrect_loss_per_char": 0.8085240330743484, "correct_loss_per_token": 3.2738215128580728, "incorrect_loss_per_token": 4.3297356144464425, "correct_loss_uncond": -29.843063354492188, "incorrect_loss_uncond": -21.515405019124348}, "model_output": [{"sum_logits": -36.627723693847656, "num_tokens": 11, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -72.4224853515625, "logits_per_token": -3.3297930630770596, "logits_per_char": -0.6208088761669094, "num_chars": 59}, {"sum_logits": -72.9221420288086, "num_tokens": 17, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -88.980224609375, "logits_per_token": -4.289537766400506, "logits_per_char": -0.8381855405610183, "num_chars": 87}, {"sum_logits": -39.285858154296875, "num_tokens": 12, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -69.12892150878906, "logits_per_token": -3.2738215128580728, "logits_per_char": -0.5952402750651041, "num_chars": 66}, {"sum_logits": -48.32888412475586, "num_tokens": 9, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -61.022254943847656, "logits_per_token": -5.369876013861762, "logits_per_char": -0.9665776824951172, "num_chars": 50}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 448, "native_id": 14440, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 122.78079223632812, "incorrect_loss_raw": 81.08148956298828, "correct_loss_per_char": 0.501146090760523, "incorrect_loss_per_char": 0.4797596009832818, "correct_loss_per_token": 2.4074665144378065, "incorrect_loss_per_token": 2.4570903788552694, "correct_loss_uncond": -16.737808227539062, "incorrect_loss_uncond": -16.930803934733074}, "model_output": [{"sum_logits": -84.93085479736328, "num_tokens": 35, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -107.28657531738281, "logits_per_token": -2.4265958513532366, "logits_per_char": -0.4744740491472809, "num_chars": 179}, {"sum_logits": -75.34039306640625, "num_tokens": 31, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -87.207763671875, "logits_per_token": -2.430335260206653, "logits_per_char": -0.47383895010318394, "num_chars": 159}, {"sum_logits": -122.78079223632812, "num_tokens": 51, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -139.5186004638672, "logits_per_token": -2.4074665144378065, "logits_per_char": -0.501146090760523, "num_chars": 245}, {"sum_logits": -82.97322082519531, "num_tokens": 33, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -99.54254150390625, "logits_per_token": -2.5143400250059185, "logits_per_char": -0.49096580369938053, "num_chars": 169}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 449, "native_id": 49197, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 66.16508483886719, "incorrect_loss_raw": 95.3914566040039, "correct_loss_per_char": 0.4760078045961668, "incorrect_loss_per_char": 0.6909387213452772, "correct_loss_per_token": 2.1343575754473285, "incorrect_loss_per_token": 2.9152182658974937, "correct_loss_uncond": -27.98199462890625, "incorrect_loss_uncond": -15.509073893229166}, "model_output": [{"sum_logits": -100.02159118652344, "num_tokens": 34, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -111.6240005493164, "logits_per_token": -2.9418115054859832, "logits_per_char": -0.6712858468894191, "num_chars": 149}, {"sum_logits": -66.16508483886719, "num_tokens": 31, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -94.14707946777344, "logits_per_token": -2.1343575754473285, "logits_per_char": -0.4760078045961668, "num_chars": 139}, {"sum_logits": -83.29777526855469, "num_tokens": 31, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -99.69656372070312, "logits_per_token": -2.6870250086630545, "logits_per_char": -0.6507638692855835, "num_chars": 128}, {"sum_logits": -102.8550033569336, "num_tokens": 33, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -121.38102722167969, "logits_per_token": -3.1168182835434424, "logits_per_char": -0.7507664478608291, "num_chars": 137}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 450, "native_id": 24906, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 53.60108184814453, "incorrect_loss_raw": 74.63063685099284, "correct_loss_per_char": 0.7342613951800621, "incorrect_loss_per_char": 0.8770878131011548, "correct_loss_per_token": 3.350067615509033, "incorrect_loss_per_token": 3.9037085639105906, "correct_loss_uncond": -21.97138214111328, "incorrect_loss_uncond": -27.126733144124348}, "model_output": [{"sum_logits": -34.44704055786133, "num_tokens": 10, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -54.31605529785156, "logits_per_token": -3.444704055786133, "logits_per_char": -0.9310010961584143, "num_chars": 37}, {"sum_logits": -53.60108184814453, "num_tokens": 16, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -75.57246398925781, "logits_per_token": -3.350067615509033, "logits_per_char": -0.7342613951800621, "num_chars": 73}, {"sum_logits": -98.44174194335938, "num_tokens": 22, "num_tokens_all": 442, "is_greedy": false, "sum_logits_uncond": -121.67782592773438, "logits_per_token": -4.4746246337890625, "logits_per_char": -0.8949249267578125, "num_chars": 110}, {"sum_logits": -91.00312805175781, "num_tokens": 24, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -129.27822875976562, "logits_per_token": -3.7917970021565757, "logits_per_char": -0.8053374163872373, "num_chars": 113}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 451, "native_id": 10416, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.513935089111328, "incorrect_loss_raw": 17.713028271993, "correct_loss_per_char": 0.2808276851002763, "incorrect_loss_per_char": 0.6072674624723078, "correct_loss_per_token": 1.151393508911133, "incorrect_loss_per_token": 2.520885930742536, "correct_loss_uncond": -33.7895622253418, "incorrect_loss_uncond": -18.70628007253011}, "model_output": [{"sum_logits": -23.840747833251953, "num_tokens": 10, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -44.23089599609375, "logits_per_token": -2.3840747833251954, "logits_per_char": -0.5418351780284535, "num_chars": 44}, {"sum_logits": -11.918976783752441, "num_tokens": 7, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -34.94425964355469, "logits_per_token": -1.7027109691074915, "logits_per_char": -0.41099919943973934, "num_chars": 29}, {"sum_logits": -11.513935089111328, "num_tokens": 10, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -45.303497314453125, "logits_per_token": -1.151393508911133, "logits_per_char": -0.2808276851002763, "num_chars": 41}, {"sum_logits": -17.37936019897461, "num_tokens": 5, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -30.0827693939209, "logits_per_token": -3.475872039794922, "logits_per_char": -0.8689680099487305, "num_chars": 20}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 452, "native_id": 12598, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 33.91252136230469, "incorrect_loss_raw": 60.99687067667643, "correct_loss_per_char": 0.5559429731525358, "incorrect_loss_per_char": 0.7262423402678357, "correct_loss_per_token": 2.4223229544503346, "incorrect_loss_per_token": 3.8178075179851874, "correct_loss_uncond": -46.25475311279297, "incorrect_loss_uncond": -23.818917592366535}, "model_output": [{"sum_logits": -78.87686157226562, "num_tokens": 18, "num_tokens_all": 432, "is_greedy": false, "sum_logits_uncond": -108.3232421875, "logits_per_token": -4.382047865125868, "logits_per_char": -0.7303413108543113, "num_chars": 108}, {"sum_logits": -40.916038513183594, "num_tokens": 16, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -67.80679321289062, "logits_per_token": -2.5572524070739746, "logits_per_char": -0.5051362779405382, "num_chars": 81}, {"sum_logits": -33.91252136230469, "num_tokens": 14, "num_tokens_all": 428, "is_greedy": false, "sum_logits_uncond": -80.16727447509766, "logits_per_token": -2.4223229544503346, "logits_per_char": -0.5559429731525358, "num_chars": 61}, {"sum_logits": -63.19771194458008, "num_tokens": 14, "num_tokens_all": 428, "is_greedy": false, "sum_logits_uncond": -78.31732940673828, "logits_per_token": -4.51412228175572, "logits_per_char": -0.9432494320086579, "num_chars": 67}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 453, "native_id": 27434, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 76.77970886230469, "incorrect_loss_raw": 78.08863321940105, "correct_loss_per_char": 0.6242252753032902, "incorrect_loss_per_char": 0.5505440435567653, "correct_loss_per_token": 2.5593236287434897, "incorrect_loss_per_token": 2.633419243655221, "correct_loss_uncond": -27.434043884277344, "incorrect_loss_uncond": -24.79791768391927}, "model_output": [{"sum_logits": -53.509254455566406, "num_tokens": 22, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -76.20559692382812, "logits_per_token": -2.4322388388893823, "logits_per_char": -0.5145120620727539, "num_chars": 104}, {"sum_logits": -100.29647064208984, "num_tokens": 36, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -128.1839599609375, "logits_per_token": -2.7860130733913846, "logits_per_char": -0.5934702404857387, "num_chars": 169}, {"sum_logits": -80.46017456054688, "num_tokens": 30, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -104.27009582519531, "logits_per_token": -2.6820058186848956, "logits_per_char": -0.5436498281118032, "num_chars": 148}, {"sum_logits": -76.77970886230469, "num_tokens": 30, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -104.21375274658203, "logits_per_token": -2.5593236287434897, "logits_per_char": -0.6242252753032902, "num_chars": 123}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 454, "native_id": 15339, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 59.99944305419922, "incorrect_loss_raw": 111.69803365071614, "correct_loss_per_char": 0.3947331779881528, "incorrect_loss_per_char": 0.5940550319821504, "correct_loss_per_token": 1.7646895015940947, "incorrect_loss_per_token": 2.368747643614127, "correct_loss_uncond": -37.389686584472656, "incorrect_loss_uncond": -20.617299397786457}, "model_output": [{"sum_logits": -142.2819061279297, "num_tokens": 51, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -157.9254150390625, "logits_per_token": -2.789841296626072, "logits_per_char": -0.7948709839549144, "num_chars": 179}, {"sum_logits": -92.03154754638672, "num_tokens": 40, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -109.73507690429688, "logits_per_token": -2.300788688659668, "logits_per_char": -0.54136204439051, "num_chars": 170}, {"sum_logits": -100.78064727783203, "num_tokens": 50, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -129.28550720214844, "logits_per_token": -2.0156129455566405, "logits_per_char": -0.44593206760102666, "num_chars": 226}, {"sum_logits": -59.99944305419922, "num_tokens": 34, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -97.38912963867188, "logits_per_token": -1.7646895015940947, "logits_per_char": -0.3947331779881528, "num_chars": 152}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 455, "native_id": 17314, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 79.41522979736328, "incorrect_loss_raw": 79.02912902832031, "correct_loss_per_char": 0.5476912399818157, "incorrect_loss_per_char": 0.5286914315968837, "correct_loss_per_token": 2.0898744683516655, "incorrect_loss_per_token": 2.84216168688155, "correct_loss_uncond": -28.59272003173828, "incorrect_loss_uncond": -19.8527348836263}, "model_output": [{"sum_logits": -68.2325439453125, "num_tokens": 23, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -82.85302734375, "logits_per_token": -2.9666323454483696, "logits_per_char": -0.520859114086355, "num_chars": 131}, {"sum_logits": -79.41522979736328, "num_tokens": 38, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -108.00794982910156, "logits_per_token": -2.0898744683516655, "logits_per_char": -0.5476912399818157, "num_chars": 145}, {"sum_logits": -81.27055358886719, "num_tokens": 32, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -112.9599838256836, "logits_per_token": -2.5397047996520996, "logits_per_char": -0.5143705923346025, "num_chars": 158}, {"sum_logits": -87.58428955078125, "num_tokens": 29, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -100.83258056640625, "logits_per_token": -3.020147915544181, "logits_per_char": -0.5508445883696934, "num_chars": 159}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 456, "native_id": 13487, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 84.89994812011719, "incorrect_loss_raw": 94.04817199707031, "correct_loss_per_char": 0.5339619378623722, "incorrect_loss_per_char": 0.622714393585294, "correct_loss_per_token": 2.12249870300293, "incorrect_loss_per_token": 2.7840397298585002, "correct_loss_uncond": -34.477073669433594, "incorrect_loss_uncond": -31.87169647216797}, "model_output": [{"sum_logits": -94.92666625976562, "num_tokens": 30, "num_tokens_all": 494, "is_greedy": false, "sum_logits_uncond": -125.49260711669922, "logits_per_token": -3.1642222086588543, "logits_per_char": -0.7474540650375247, "num_chars": 127}, {"sum_logits": -84.89994812011719, "num_tokens": 40, "num_tokens_all": 504, "is_greedy": false, "sum_logits_uncond": -119.37702178955078, "logits_per_token": -2.12249870300293, "logits_per_char": -0.5339619378623722, "num_chars": 159}, {"sum_logits": -71.45843505859375, "num_tokens": 38, "num_tokens_all": 502, "is_greedy": false, "sum_logits_uncond": -101.7154541015625, "logits_per_token": -1.880485133120888, "logits_per_char": -0.4357221649914253, "num_chars": 164}, {"sum_logits": -115.75941467285156, "num_tokens": 35, "num_tokens_all": 499, "is_greedy": false, "sum_logits_uncond": -150.55154418945312, "logits_per_token": -3.307411847795759, "logits_per_char": -0.6849669507269324, "num_chars": 169}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 457, "native_id": 47889, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 121.28269958496094, "incorrect_loss_raw": 71.37973022460938, "correct_loss_per_char": 0.5205266076607765, "incorrect_loss_per_char": 0.5421649693101874, "correct_loss_per_token": 2.5267229080200195, "incorrect_loss_per_token": 2.300899452633328, "correct_loss_uncond": -35.00376892089844, "incorrect_loss_uncond": -18.481717427571613}, "model_output": [{"sum_logits": -121.28269958496094, "num_tokens": 48, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -156.28646850585938, "logits_per_token": -2.5267229080200195, "logits_per_char": -0.5205266076607765, "num_chars": 233}, {"sum_logits": -46.1177978515625, "num_tokens": 24, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -70.9253921508789, "logits_per_token": -1.9215749104817708, "logits_per_char": -0.44774561020934467, "num_chars": 103}, {"sum_logits": -90.39241027832031, "num_tokens": 32, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -104.92509460449219, "logits_per_token": -2.8247628211975098, "logits_per_char": -0.6646500755758846, "num_chars": 136}, {"sum_logits": -77.62898254394531, "num_tokens": 36, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -93.73385620117188, "logits_per_token": -2.156360626220703, "logits_per_char": -0.5140992221453332, "num_chars": 151}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 458, "native_id": 28681, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 69.99798583984375, "incorrect_loss_raw": 125.70606486002605, "correct_loss_per_char": 0.4166546776181176, "incorrect_loss_per_char": 0.5663254938997103, "correct_loss_per_token": 2.121151086055871, "incorrect_loss_per_token": 2.910601115917391, "correct_loss_uncond": -22.652755737304688, "incorrect_loss_uncond": -24.547078450520832}, "model_output": [{"sum_logits": -100.96669006347656, "num_tokens": 38, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -131.61241149902344, "logits_per_token": -2.6570181595651725, "logits_per_char": -0.5487320112145465, "num_chars": 184}, {"sum_logits": -69.99798583984375, "num_tokens": 33, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -92.65074157714844, "logits_per_token": -2.121151086055871, "logits_per_char": -0.4166546776181176, "num_chars": 168}, {"sum_logits": -143.38311767578125, "num_tokens": 48, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -158.24456787109375, "logits_per_token": -2.9871482849121094, "logits_per_char": -0.5828582019340701, "num_chars": 246}, {"sum_logits": -132.7683868408203, "num_tokens": 43, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -160.90245056152344, "logits_per_token": -3.087636903274891, "logits_per_char": -0.5673862685505141, "num_chars": 234}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 459, "native_id": 45912, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 89.51616668701172, "incorrect_loss_raw": 135.4025904337565, "correct_loss_per_char": 0.45671513615822307, "incorrect_loss_per_char": 0.6580037414554655, "correct_loss_per_token": 2.0344583337957207, "incorrect_loss_per_token": 2.8210098613342027, "correct_loss_uncond": -42.675025939941406, "incorrect_loss_uncond": -14.679176330566406}, "model_output": [{"sum_logits": -147.8331298828125, "num_tokens": 48, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -155.27389526367188, "logits_per_token": -3.0798568725585938, "logits_per_char": -0.70396728515625, "num_chars": 210}, {"sum_logits": -126.91930389404297, "num_tokens": 47, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -145.06741333007812, "logits_per_token": -2.7004107211498503, "logits_per_char": -0.7130297971575448, "num_chars": 178}, {"sum_logits": -131.45533752441406, "num_tokens": 49, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -149.90399169921875, "logits_per_token": -2.6827619902941646, "logits_per_char": -0.5570141420526019, "num_chars": 236}, {"sum_logits": -89.51616668701172, "num_tokens": 44, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -132.19119262695312, "logits_per_token": -2.0344583337957207, "logits_per_char": -0.45671513615822307, "num_chars": 196}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 460, "native_id": 41666, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 24.132381439208984, "incorrect_loss_raw": 54.4515749613444, "correct_loss_per_char": 0.635062669452868, "incorrect_loss_per_char": 0.9126856249142317, "correct_loss_per_token": 3.016547679901123, "incorrect_loss_per_token": 4.060349177171462, "correct_loss_uncond": -9.836170196533203, "incorrect_loss_uncond": -23.23033905029297}, "model_output": [{"sum_logits": -59.60264205932617, "num_tokens": 16, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -85.90191650390625, "logits_per_token": -3.7251651287078857, "logits_per_char": -0.8765094420489143, "num_chars": 68}, {"sum_logits": -24.132381439208984, "num_tokens": 8, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -33.96855163574219, "logits_per_token": -3.016547679901123, "logits_per_char": -0.635062669452868, "num_chars": 38}, {"sum_logits": -58.754425048828125, "num_tokens": 17, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -84.46981048583984, "logits_per_token": -3.456142649931066, "logits_per_char": -0.9792404174804688, "num_chars": 60}, {"sum_logits": -44.997657775878906, "num_tokens": 9, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -62.674015045166016, "logits_per_token": -4.999739752875434, "logits_per_char": -0.8823070152133119, "num_chars": 51}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 461, "native_id": 21610, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 51.026371002197266, "incorrect_loss_raw": 70.30346552530925, "correct_loss_per_char": 0.40178244883619896, "incorrect_loss_per_char": 0.5137794205205771, "correct_loss_per_token": 2.0410548400878907, "incorrect_loss_per_token": 2.2138479643187616, "correct_loss_uncond": -25.347515106201172, "incorrect_loss_uncond": -24.508037567138672}, "model_output": [{"sum_logits": -58.50075912475586, "num_tokens": 27, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -78.94677734375, "logits_per_token": -2.1666947823983653, "logits_per_char": -0.5571500869024367, "num_chars": 105}, {"sum_logits": -51.026371002197266, "num_tokens": 25, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -76.37388610839844, "logits_per_token": -2.0410548400878907, "logits_per_char": -0.40178244883619896, "num_chars": 127}, {"sum_logits": -47.378700256347656, "num_tokens": 24, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -70.68264770507812, "logits_per_token": -1.9741125106811523, "logits_per_char": -0.44279159118081923, "num_chars": 107}, {"sum_logits": -105.03093719482422, "num_tokens": 42, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -134.80508422851562, "logits_per_token": -2.500736599876767, "logits_per_char": -0.5413965834784753, "num_chars": 194}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 462, "native_id": 2048, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 47.3074836730957, "incorrect_loss_raw": 45.3359120686849, "correct_loss_per_char": 0.7060818458671001, "incorrect_loss_per_char": 0.645360195717844, "correct_loss_per_token": 2.9567177295684814, "incorrect_loss_per_token": 2.656044324239095, "correct_loss_uncond": -19.89187240600586, "incorrect_loss_uncond": -34.402740478515625}, "model_output": [{"sum_logits": -32.4971923828125, "num_tokens": 14, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -79.51583862304688, "logits_per_token": -2.32122802734375, "logits_per_char": -0.5803070068359375, "num_chars": 56}, {"sum_logits": -55.25697326660156, "num_tokens": 16, "num_tokens_all": 428, "is_greedy": false, "sum_logits_uncond": -86.96885681152344, "logits_per_token": -3.4535608291625977, "logits_per_char": -0.8633902072906494, "num_chars": 64}, {"sum_logits": -48.253570556640625, "num_tokens": 22, "num_tokens_all": 434, "is_greedy": false, "sum_logits_uncond": -72.73126220703125, "logits_per_token": -2.1933441162109375, "logits_per_char": -0.4923833730269451, "num_chars": 98}, {"sum_logits": -47.3074836730957, "num_tokens": 16, "num_tokens_all": 428, "is_greedy": false, "sum_logits_uncond": -67.19935607910156, "logits_per_token": -2.9567177295684814, "logits_per_char": -0.7060818458671001, "num_chars": 67}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 463, "native_id": 11489, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 104.41777038574219, "incorrect_loss_raw": 124.83210754394531, "correct_loss_per_char": 0.5800987243652344, "incorrect_loss_per_char": 0.6054541461961215, "correct_loss_per_token": 3.164174860174006, "incorrect_loss_per_token": 2.593384126113387, "correct_loss_uncond": -14.624099731445312, "incorrect_loss_uncond": -9.071891784667969}, "model_output": [{"sum_logits": -89.22377014160156, "num_tokens": 38, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -91.17656707763672, "logits_per_token": -2.347993951094778, "logits_per_char": -0.5127802881701239, "num_chars": 174}, {"sum_logits": -104.41777038574219, "num_tokens": 33, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -119.0418701171875, "logits_per_token": -3.164174860174006, "logits_per_char": -0.5800987243652344, "num_chars": 180}, {"sum_logits": -136.85589599609375, "num_tokens": 52, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -151.36447143554688, "logits_per_token": -2.6318441537710338, "logits_per_char": -0.6164679999824043, "num_chars": 222}, {"sum_logits": -148.41665649414062, "num_tokens": 53, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -159.17095947265625, "logits_per_token": -2.8003142734743514, "logits_per_char": -0.6871141504358362, "num_chars": 216}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 464, "native_id": 48516, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 86.52818298339844, "incorrect_loss_raw": 102.41657129923503, "correct_loss_per_char": 0.5276108718499904, "incorrect_loss_per_char": 0.6933267093211896, "correct_loss_per_token": 2.47223379952567, "incorrect_loss_per_token": 2.9384622800459135, "correct_loss_uncond": -21.95543670654297, "incorrect_loss_uncond": -17.92423375447591}, "model_output": [{"sum_logits": -51.61616897583008, "num_tokens": 18, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -68.8001480102539, "logits_per_token": -2.867564943101671, "logits_per_char": -0.6218815539256636, "num_chars": 83}, {"sum_logits": -74.14468383789062, "num_tokens": 28, "num_tokens_all": 440, "is_greedy": false, "sum_logits_uncond": -96.7630386352539, "logits_per_token": -2.648024422781808, "logits_per_char": -0.6178723653157552, "num_chars": 120}, {"sum_logits": -86.52818298339844, "num_tokens": 35, "num_tokens_all": 447, "is_greedy": false, "sum_logits_uncond": -108.4836196899414, "logits_per_token": -2.47223379952567, "logits_per_char": -0.5276108718499904, "num_chars": 164}, {"sum_logits": -181.48886108398438, "num_tokens": 55, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -195.459228515625, "logits_per_token": -3.2997974742542615, "logits_per_char": -0.8402262087221499, "num_chars": 216}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 465, "native_id": 48146, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 50.012489318847656, "incorrect_loss_raw": 90.50103251139323, "correct_loss_per_char": 0.35723206656319756, "incorrect_loss_per_char": 0.7586891345349288, "correct_loss_per_token": 1.4709555682014017, "incorrect_loss_per_token": 3.280512562504521, "correct_loss_uncond": -23.436485290527344, "incorrect_loss_uncond": -17.568125406901043}, "model_output": [{"sum_logits": -64.04739379882812, "num_tokens": 18, "num_tokens_all": 440, "is_greedy": false, "sum_logits_uncond": -81.40092468261719, "logits_per_token": -3.55818854437934, "logits_per_char": -0.7810657780344893, "num_chars": 82}, {"sum_logits": -121.40426635742188, "num_tokens": 45, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -134.6986846923828, "logits_per_token": -2.697872585720486, "logits_per_char": -0.6257951874093911, "num_chars": 194}, {"sum_logits": -50.012489318847656, "num_tokens": 34, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -73.448974609375, "logits_per_token": -1.4709555682014017, "logits_per_char": -0.35723206656319756, "num_chars": 140}, {"sum_logits": -86.05143737792969, "num_tokens": 24, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -108.10786437988281, "logits_per_token": -3.585476557413737, "logits_per_char": -0.8692064381609059, "num_chars": 99}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 466, "native_id": 10703, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 77.9495849609375, "incorrect_loss_raw": 108.14000956217448, "correct_loss_per_char": 0.5567827497209822, "incorrect_loss_per_char": 0.5741415545254273, "correct_loss_per_token": 2.6879167227909484, "incorrect_loss_per_token": 2.4641862299034596, "correct_loss_uncond": -13.220085144042969, "incorrect_loss_uncond": -25.174967447916668}, "model_output": [{"sum_logits": -77.9495849609375, "num_tokens": 29, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -91.16967010498047, "logits_per_token": -2.6879167227909484, "logits_per_char": -0.5567827497209822, "num_chars": 140}, {"sum_logits": -139.23606872558594, "num_tokens": 59, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -167.32691955566406, "logits_per_token": -2.35993336823027, "logits_per_char": -0.5614357609902659, "num_chars": 248}, {"sum_logits": -139.61459350585938, "num_tokens": 53, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -160.58843994140625, "logits_per_token": -2.6342376133181014, "logits_per_char": -0.5841614791040142, "num_chars": 239}, {"sum_logits": -45.569366455078125, "num_tokens": 19, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -72.02957153320312, "logits_per_token": -2.3983877081620064, "logits_per_char": -0.5768274234820016, "num_chars": 79}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 467, "native_id": 14481, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 130.665283203125, "incorrect_loss_raw": 173.9648895263672, "correct_loss_per_char": 0.5756179876789648, "incorrect_loss_per_char": 0.761434482639686, "correct_loss_per_token": 2.562064376531863, "incorrect_loss_per_token": 3.936492152118509, "correct_loss_uncond": -27.8311767578125, "incorrect_loss_uncond": -13.65618896484375}, "model_output": [{"sum_logits": -130.665283203125, "num_tokens": 51, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -158.4964599609375, "logits_per_token": -2.562064376531863, "logits_per_char": -0.5756179876789648, "num_chars": 227}, {"sum_logits": -204.84848022460938, "num_tokens": 48, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -219.67025756835938, "logits_per_token": -4.267676671346028, "logits_per_char": -0.8906455661939539, "num_chars": 230}, {"sum_logits": -178.65560913085938, "num_tokens": 61, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -193.58721923828125, "logits_per_token": -2.9287804775550716, "logits_per_char": -0.6290690462354204, "num_chars": 284}, {"sum_logits": -138.3905792236328, "num_tokens": 30, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -149.6057586669922, "logits_per_token": -4.613019307454427, "logits_per_char": -0.764588835489684, "num_chars": 181}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 468, "native_id": 50209, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 84.6252212524414, "incorrect_loss_raw": 80.43583170572917, "correct_loss_per_char": 0.4754225913058506, "incorrect_loss_per_char": 0.623363510255206, "correct_loss_per_token": 2.3507005903455944, "incorrect_loss_per_token": 2.879720298690025, "correct_loss_uncond": -26.925880432128906, "incorrect_loss_uncond": -18.737208048502605}, "model_output": [{"sum_logits": -72.73799133300781, "num_tokens": 25, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -88.99644470214844, "logits_per_token": -2.9095196533203125, "logits_per_char": -0.5865967042984501, "num_chars": 124}, {"sum_logits": -84.6252212524414, "num_tokens": 36, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -111.55110168457031, "logits_per_token": -2.3507005903455944, "logits_per_char": -0.4754225913058506, "num_chars": 178}, {"sum_logits": -64.08955383300781, "num_tokens": 25, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -83.21317291259766, "logits_per_token": -2.5635821533203127, "logits_per_char": -0.6345500379505724, "num_chars": 101}, {"sum_logits": -104.47994995117188, "num_tokens": 33, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -125.30950164794922, "logits_per_token": -3.1660590894294507, "logits_per_char": -0.6489437885165955, "num_chars": 161}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 469, "native_id": 6369, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 126.23847198486328, "incorrect_loss_raw": 99.86452738444011, "correct_loss_per_char": 0.5712148053613724, "incorrect_loss_per_char": 0.47616479787429683, "correct_loss_per_token": 2.4276629227858324, "incorrect_loss_per_token": 2.3244310154815033, "correct_loss_uncond": -37.92560577392578, "incorrect_loss_uncond": -37.857208251953125}, "model_output": [{"sum_logits": -79.7906494140625, "num_tokens": 48, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -121.82785034179688, "logits_per_token": -1.662305196126302, "logits_per_char": -0.3763709878021816, "num_chars": 212}, {"sum_logits": -126.23847198486328, "num_tokens": 52, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -164.16407775878906, "logits_per_token": -2.4276629227858324, "logits_per_char": -0.5712148053613724, "num_chars": 221}, {"sum_logits": -131.59390258789062, "num_tokens": 46, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -159.85972595214844, "logits_per_token": -2.860737012780231, "logits_per_char": -0.5647806978021057, "num_chars": 233}, {"sum_logits": -88.20903015136719, "num_tokens": 36, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -131.47763061523438, "logits_per_token": -2.4502508375379772, "logits_per_char": -0.48734270801860324, "num_chars": 181}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 470, "native_id": 39166, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 30.865644454956055, "incorrect_loss_raw": 27.57004737854004, "correct_loss_per_char": 0.5321662837061388, "incorrect_loss_per_char": 0.6455794450650713, "correct_loss_per_token": 2.374280342688927, "incorrect_loss_per_token": 3.2993551890055337, "correct_loss_uncond": -27.388383865356445, "incorrect_loss_uncond": -18.89159329732259}, "model_output": [{"sum_logits": -36.67305374145508, "num_tokens": 9, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -46.6458740234375, "logits_per_token": -4.074783749050564, "logits_per_char": -0.7334610748291016, "num_chars": 50}, {"sum_logits": -22.30356788635254, "num_tokens": 7, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -41.332027435302734, "logits_per_token": -3.1862239837646484, "logits_per_char": -0.6758656935258345, "num_chars": 33}, {"sum_logits": -23.7335205078125, "num_tokens": 9, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -51.407020568847656, "logits_per_token": -2.637057834201389, "logits_per_char": -0.5274115668402778, "num_chars": 45}, {"sum_logits": -30.865644454956055, "num_tokens": 13, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -58.2540283203125, "logits_per_token": -2.374280342688927, "logits_per_char": -0.5321662837061388, "num_chars": 58}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 471, "native_id": 49227, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 18.244457244873047, "incorrect_loss_raw": 44.555468241373696, "correct_loss_per_char": 0.5212702069963727, "incorrect_loss_per_char": 0.7132578195143977, "correct_loss_per_token": 2.0271619160970054, "incorrect_loss_per_token": 3.2237844018854642, "correct_loss_uncond": -20.65548324584961, "incorrect_loss_uncond": -18.94337336222331}, "model_output": [{"sum_logits": -48.37062454223633, "num_tokens": 13, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -64.1736068725586, "logits_per_token": -3.7208172724797177, "logits_per_char": -0.6910089220319475, "num_chars": 70}, {"sum_logits": -18.244457244873047, "num_tokens": 9, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -38.899940490722656, "logits_per_token": -2.0271619160970054, "logits_per_char": -0.5212702069963727, "num_chars": 35}, {"sum_logits": -55.557395935058594, "num_tokens": 16, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -63.891319274902344, "logits_per_token": -3.472337245941162, "logits_per_char": -0.829214864702367, "num_chars": 67}, {"sum_logits": -29.738384246826172, "num_tokens": 12, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -62.43159866333008, "logits_per_token": -2.478198687235514, "logits_per_char": -0.6195496718088785, "num_chars": 48}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 472, "native_id": 50313, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 78.25491333007812, "incorrect_loss_raw": 107.94302368164062, "correct_loss_per_char": 0.5670645893483922, "incorrect_loss_per_char": 0.6519311235299162, "correct_loss_per_token": 2.4454660415649414, "incorrect_loss_per_token": 2.9566845955786767, "correct_loss_uncond": -21.26641845703125, "incorrect_loss_uncond": -31.609100341796875}, "model_output": [{"sum_logits": -110.65397644042969, "num_tokens": 44, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -142.94528198242188, "logits_per_token": -2.5148631009188565, "logits_per_char": -0.5885849810661153, "num_chars": 188}, {"sum_logits": -98.73684692382812, "num_tokens": 32, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -132.1180419921875, "logits_per_token": -3.085526466369629, "logits_per_char": -0.6288971141645103, "num_chars": 157}, {"sum_logits": -114.43824768066406, "num_tokens": 35, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -143.59304809570312, "logits_per_token": -3.2696642194475447, "logits_per_char": -0.738311275359123, "num_chars": 155}, {"sum_logits": -78.25491333007812, "num_tokens": 32, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -99.52133178710938, "logits_per_token": -2.4454660415649414, "logits_per_char": -0.5670645893483922, "num_chars": 138}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 473, "native_id": 31099, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 60.286895751953125, "incorrect_loss_raw": 82.4027608235677, "correct_loss_per_char": 0.5152726132645566, "incorrect_loss_per_char": 0.4993640298081317, "correct_loss_per_token": 2.009563191731771, "incorrect_loss_per_token": 2.0983257636210464, "correct_loss_uncond": -19.73260498046875, "incorrect_loss_uncond": -20.772542317708332}, "model_output": [{"sum_logits": -95.8297119140625, "num_tokens": 44, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -115.30333709716797, "logits_per_token": -2.177947998046875, "logits_per_char": -0.45633196149553573, "num_chars": 210}, {"sum_logits": -104.09566497802734, "num_tokens": 44, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -129.56741333007812, "logits_per_token": -2.3658105676824395, "logits_per_char": -0.6233273351977685, "num_chars": 167}, {"sum_logits": -47.28290557861328, "num_tokens": 27, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -64.65515899658203, "logits_per_token": -1.7512187251338251, "logits_per_char": -0.41843279273109096, "num_chars": 113}, {"sum_logits": -60.286895751953125, "num_tokens": 30, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -80.01950073242188, "logits_per_token": -2.009563191731771, "logits_per_char": -0.5152726132645566, "num_chars": 117}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 474, "native_id": 49858, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 61.850921630859375, "incorrect_loss_raw": 162.28319295247397, "correct_loss_per_char": 0.4870151309516486, "incorrect_loss_per_char": 0.7605676552374071, "correct_loss_per_token": 2.474036865234375, "incorrect_loss_per_token": 3.2046671612711655, "correct_loss_uncond": -34.42877960205078, "incorrect_loss_uncond": -21.830739339192707}, "model_output": [{"sum_logits": -175.28024291992188, "num_tokens": 55, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -205.26177978515625, "logits_per_token": -3.186913507634943, "logits_per_char": -0.7687729952628153, "num_chars": 228}, {"sum_logits": -145.5379638671875, "num_tokens": 45, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -157.16796875, "logits_per_token": -3.234176974826389, "logits_per_char": -0.7782778816427139, "num_chars": 187}, {"sum_logits": -166.0313720703125, "num_tokens": 52, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -189.91204833984375, "logits_per_token": -3.1929110013521633, "logits_per_char": -0.7346520888066925, "num_chars": 226}, {"sum_logits": -61.850921630859375, "num_tokens": 25, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -96.27970123291016, "logits_per_token": -2.474036865234375, "logits_per_char": -0.4870151309516486, "num_chars": 127}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 475, "native_id": 30024, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 35.45415115356445, "incorrect_loss_raw": 76.01461791992188, "correct_loss_per_char": 0.460443521474863, "incorrect_loss_per_char": 0.5725333867479024, "correct_loss_per_token": 1.969675064086914, "incorrect_loss_per_token": 2.514187548029336, "correct_loss_uncond": -16.818252563476562, "incorrect_loss_uncond": -23.618133544921875}, "model_output": [{"sum_logits": -72.10932159423828, "num_tokens": 31, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -92.19392395019531, "logits_per_token": -2.3261071482012348, "logits_per_char": -0.5150665828159877, "num_chars": 140}, {"sum_logits": -71.62891387939453, "num_tokens": 22, "num_tokens_all": 441, "is_greedy": false, "sum_logits_uncond": -100.03875732421875, "logits_per_token": -3.2558597217906606, "logits_per_char": -0.7235243826201467, "num_chars": 99}, {"sum_logits": -35.45415115356445, "num_tokens": 18, "num_tokens_all": 437, "is_greedy": false, "sum_logits_uncond": -52.272403717041016, "logits_per_token": -1.969675064086914, "logits_per_char": -0.460443521474863, "num_chars": 77}, {"sum_logits": -84.30561828613281, "num_tokens": 43, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -106.66557312011719, "logits_per_token": -1.960595774096112, "logits_per_char": -0.4790091948075728, "num_chars": 176}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 476, "native_id": 26574, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 63.302223205566406, "incorrect_loss_raw": 105.23172251383464, "correct_loss_per_char": 0.3385145625966118, "incorrect_loss_per_char": 0.6382335902815214, "correct_loss_per_token": 1.5071957906087239, "incorrect_loss_per_token": 2.629203107363486, "correct_loss_uncond": -27.880638122558594, "incorrect_loss_uncond": -27.739662170410156}, "model_output": [{"sum_logits": -63.302223205566406, "num_tokens": 42, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -91.182861328125, "logits_per_token": -1.5071957906087239, "logits_per_char": -0.3385145625966118, "num_chars": 187}, {"sum_logits": -72.80203247070312, "num_tokens": 30, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -100.58787536621094, "logits_per_token": -2.4267344156901043, "logits_per_char": -0.5314016968664461, "num_chars": 137}, {"sum_logits": -130.26695251464844, "num_tokens": 48, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -160.48907470703125, "logits_per_token": -2.713894844055176, "logits_per_char": -0.674958303184707, "num_chars": 193}, {"sum_logits": -112.62618255615234, "num_tokens": 41, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -137.8372039794922, "logits_per_token": -2.746980062345179, "logits_per_char": -0.708340770793411, "num_chars": 159}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 477, "native_id": 16992, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 87.90760803222656, "incorrect_loss_raw": 123.40200297037761, "correct_loss_per_char": 0.47775873930557916, "incorrect_loss_per_char": 0.5477778372281951, "correct_loss_per_token": 1.8703746389835438, "incorrect_loss_per_token": 2.7409253101135964, "correct_loss_uncond": -32.459693908691406, "incorrect_loss_uncond": -23.11077372233073}, "model_output": [{"sum_logits": -149.4626007080078, "num_tokens": 43, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -169.12640380859375, "logits_per_token": -3.475874435069949, "logits_per_char": -0.644235347879344, "num_chars": 232}, {"sum_logits": -133.2142791748047, "num_tokens": 51, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -158.08119201660156, "logits_per_token": -2.612044689702053, "logits_per_char": -0.5741994792017443, "num_chars": 232}, {"sum_logits": -87.90760803222656, "num_tokens": 47, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -120.36730194091797, "logits_per_token": -1.8703746389835438, "logits_per_char": -0.47775873930557916, "num_chars": 184}, {"sum_logits": -87.52912902832031, "num_tokens": 41, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -112.33073425292969, "logits_per_token": -2.134856805568788, "logits_per_char": -0.42489868460349667, "num_chars": 206}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 478, "native_id": 39374, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 93.99403381347656, "incorrect_loss_raw": 102.20306396484375, "correct_loss_per_char": 0.5136286000736424, "incorrect_loss_per_char": 0.5846902956941181, "correct_loss_per_token": 2.3498508453369142, "incorrect_loss_per_token": 2.7990993751312225, "correct_loss_uncond": -29.12299346923828, "incorrect_loss_uncond": -30.089569091796875}, "model_output": [{"sum_logits": -119.36709594726562, "num_tokens": 41, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -146.35903930664062, "logits_per_token": -2.9113925840796493, "logits_per_char": -0.6452275456608952, "num_chars": 185}, {"sum_logits": -105.23806762695312, "num_tokens": 36, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -132.6348419189453, "logits_per_token": -2.9232796563042536, "logits_per_char": -0.6118492303892623, "num_chars": 172}, {"sum_logits": -82.0040283203125, "num_tokens": 32, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -117.88401794433594, "logits_per_token": -2.5626258850097656, "logits_per_char": -0.496994111032197, "num_chars": 165}, {"sum_logits": -93.99403381347656, "num_tokens": 40, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -123.11702728271484, "logits_per_token": -2.3498508453369142, "logits_per_char": -0.5136286000736424, "num_chars": 183}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 479, "native_id": 8795, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 91.86775207519531, "incorrect_loss_raw": 97.7114969889323, "correct_loss_per_char": 0.46397854583431974, "incorrect_loss_per_char": 0.5603078971991365, "correct_loss_per_token": 2.1364593505859375, "incorrect_loss_per_token": 2.50935436477009, "correct_loss_uncond": -17.254791259765625, "incorrect_loss_uncond": -18.21673329671224}, "model_output": [{"sum_logits": -80.06082916259766, "num_tokens": 30, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -105.37944030761719, "logits_per_token": -2.6686943054199217, "logits_per_char": -0.555977980295817, "num_chars": 144}, {"sum_logits": -89.13040924072266, "num_tokens": 36, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -107.39957427978516, "logits_per_token": -2.475844701131185, "logits_per_char": -0.564116514181789, "num_chars": 158}, {"sum_logits": -123.94325256347656, "num_tokens": 52, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -135.00567626953125, "logits_per_token": -2.383524087759165, "logits_per_char": -0.5608291971198035, "num_chars": 221}, {"sum_logits": -91.86775207519531, "num_tokens": 43, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -109.12254333496094, "logits_per_token": -2.1364593505859375, "logits_per_char": -0.46397854583431974, "num_chars": 198}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 480, "native_id": 28845, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 79.35488891601562, "incorrect_loss_raw": 119.78160858154297, "correct_loss_per_char": 0.6104222224308894, "incorrect_loss_per_char": 0.6655122949811806, "correct_loss_per_token": 2.5598351263230845, "incorrect_loss_per_token": 2.7325441472014487, "correct_loss_uncond": -29.416427612304688, "incorrect_loss_uncond": -26.931597391764324}, "model_output": [{"sum_logits": -79.35488891601562, "num_tokens": 31, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -108.77131652832031, "logits_per_token": -2.5598351263230845, "logits_per_char": -0.6104222224308894, "num_chars": 130}, {"sum_logits": -141.98143005371094, "num_tokens": 48, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -174.4034881591797, "logits_per_token": -2.957946459452311, "logits_per_char": -0.7931923466687761, "num_chars": 179}, {"sum_logits": -117.35568237304688, "num_tokens": 45, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -148.255126953125, "logits_per_token": -2.607904052734375, "logits_per_char": -0.6080605304302947, "num_chars": 193}, {"sum_logits": -100.0077133178711, "num_tokens": 38, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -117.48100280761719, "logits_per_token": -2.6317819294176603, "logits_per_char": -0.5952840078444708, "num_chars": 168}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 481, "native_id": 23540, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 65.84947204589844, "incorrect_loss_raw": 133.59872690836588, "correct_loss_per_char": 0.4988596367113518, "incorrect_loss_per_char": 0.6121446772575351, "correct_loss_per_token": 2.124176517609627, "incorrect_loss_per_token": 2.564566048597678, "correct_loss_uncond": -20.342666625976562, "incorrect_loss_uncond": -22.373380025227863}, "model_output": [{"sum_logits": -129.3175811767578, "num_tokens": 52, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -149.02566528320312, "logits_per_token": -2.4868765610914965, "logits_per_char": -0.5798994671603489, "num_chars": 223}, {"sum_logits": -168.5486297607422, "num_tokens": 64, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -188.12387084960938, "logits_per_token": -2.6335723400115967, "logits_per_char": -0.6289127976147096, "num_chars": 268}, {"sum_logits": -102.92996978759766, "num_tokens": 40, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -130.76678466796875, "logits_per_token": -2.5732492446899413, "logits_per_char": -0.6276217669975467, "num_chars": 164}, {"sum_logits": -65.84947204589844, "num_tokens": 31, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -86.192138671875, "logits_per_token": -2.124176517609627, "logits_per_char": -0.4988596367113518, "num_chars": 132}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 482, "native_id": 7669, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 101.49874877929688, "incorrect_loss_raw": 117.79955546061198, "correct_loss_per_char": 0.5178507590780452, "incorrect_loss_per_char": 0.6333725030407934, "correct_loss_per_token": 2.6025320199819713, "incorrect_loss_per_token": 2.6354498969184026, "correct_loss_uncond": -27.451507568359375, "incorrect_loss_uncond": -16.196461995442707}, "model_output": [{"sum_logits": -101.49874877929688, "num_tokens": 39, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -128.95025634765625, "logits_per_token": -2.6025320199819713, "logits_per_char": -0.5178507590780452, "num_chars": 196}, {"sum_logits": -111.53367614746094, "num_tokens": 45, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -130.53660583496094, "logits_per_token": -2.478526136610243, "logits_per_char": -0.5870193481445313, "num_chars": 190}, {"sum_logits": -136.83392333984375, "num_tokens": 45, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -151.92391967773438, "logits_per_token": -3.0407538519965276, "logits_per_char": -0.6841696166992187, "num_chars": 200}, {"sum_logits": -105.03106689453125, "num_tokens": 44, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -119.52752685546875, "logits_per_token": -2.3870697021484375, "logits_per_char": -0.6289285442786302, "num_chars": 167}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 483, "native_id": 45617, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 103.78482818603516, "incorrect_loss_raw": 74.46595637003581, "correct_loss_per_char": 0.9103932297020628, "incorrect_loss_per_char": 0.7499237820358542, "correct_loss_per_token": 3.8438825254087097, "incorrect_loss_per_token": 3.196143007185492, "correct_loss_uncond": -15.495887756347656, "incorrect_loss_uncond": -14.695476531982422}, "model_output": [{"sum_logits": -94.76663208007812, "num_tokens": 36, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -108.57389068603516, "logits_per_token": -2.6324064466688366, "logits_per_char": -0.6275935899342923, "num_chars": 151}, {"sum_logits": -65.03378295898438, "num_tokens": 19, "num_tokens_all": 441, "is_greedy": false, "sum_logits_uncond": -82.98079681396484, "logits_per_token": -3.422830682051809, "logits_per_char": -0.7742117018926711, "num_chars": 84}, {"sum_logits": -103.78482818603516, "num_tokens": 27, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -119.28071594238281, "logits_per_token": -3.8438825254087097, "logits_per_char": -0.9103932297020628, "num_chars": 114}, {"sum_logits": -63.59745407104492, "num_tokens": 18, "num_tokens_all": 440, "is_greedy": false, "sum_logits_uncond": -75.92961120605469, "logits_per_token": -3.533191892835829, "logits_per_char": -0.847966054280599, "num_chars": 75}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 484, "native_id": 35818, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 151.90484619140625, "incorrect_loss_raw": 175.90332539876303, "correct_loss_per_char": 0.46454081404099773, "incorrect_loss_per_char": 0.5702991577942841, "correct_loss_per_token": 2.4111880347842263, "incorrect_loss_per_token": 2.9902271309440565, "correct_loss_uncond": -30.432403564453125, "incorrect_loss_uncond": -18.780548095703125}, "model_output": [{"sum_logits": -151.90484619140625, "num_tokens": 63, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -182.33724975585938, "logits_per_token": -2.4111880347842263, "logits_per_char": -0.46454081404099773, "num_chars": 327}, {"sum_logits": -202.16778564453125, "num_tokens": 58, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -213.29620361328125, "logits_per_token": -3.485651476629849, "logits_per_char": -0.6650256106728002, "num_chars": 304}, {"sum_logits": -193.7648468017578, "num_tokens": 68, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -212.5160369873047, "logits_per_token": -2.849483041202321, "logits_per_char": -0.5504683147777211, "num_chars": 352}, {"sum_logits": -131.77734375, "num_tokens": 50, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -158.2393798828125, "logits_per_token": -2.635546875, "logits_per_char": -0.4954035479323308, "num_chars": 266}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 485, "native_id": 14327, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 82.72598266601562, "incorrect_loss_raw": 99.53323109944661, "correct_loss_per_char": 0.4570496279890366, "incorrect_loss_per_char": 0.6245741067793801, "correct_loss_per_token": 2.2358373693517737, "incorrect_loss_per_token": 3.2468191377324183, "correct_loss_uncond": -35.760009765625, "incorrect_loss_uncond": -26.77045440673828}, "model_output": [{"sum_logits": -84.39669036865234, "num_tokens": 28, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -112.42210388183594, "logits_per_token": -3.014167513166155, "logits_per_char": -0.5444947765719506, "num_chars": 155}, {"sum_logits": -82.72598266601562, "num_tokens": 37, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -118.48599243164062, "logits_per_token": -2.2358373693517737, "logits_per_char": -0.4570496279890366, "num_chars": 181}, {"sum_logits": -130.1560516357422, "num_tokens": 34, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -154.91099548339844, "logits_per_token": -3.828119165757123, "logits_per_char": -0.7112352548401213, "num_chars": 183}, {"sum_logits": -84.04695129394531, "num_tokens": 29, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -111.57795715332031, "logits_per_token": -2.898170734273976, "logits_per_char": -0.6179922889260685, "num_chars": 136}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 486, "native_id": 6286, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 85.99217224121094, "incorrect_loss_raw": 100.65078989664714, "correct_loss_per_char": 0.6055786777550066, "incorrect_loss_per_char": 0.7581239572798658, "correct_loss_per_token": 2.456919206891741, "incorrect_loss_per_token": 3.2595709301176528, "correct_loss_uncond": -25.80316162109375, "incorrect_loss_uncond": -26.110939025878906}, "model_output": [{"sum_logits": -72.62116241455078, "num_tokens": 20, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -100.74075317382812, "logits_per_token": -3.631058120727539, "logits_per_char": -0.8856239318847656, "num_chars": 82}, {"sum_logits": -85.99217224121094, "num_tokens": 35, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -111.79533386230469, "logits_per_token": -2.456919206891741, "logits_per_char": -0.6055786777550066, "num_chars": 142}, {"sum_logits": -113.30635070800781, "num_tokens": 40, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -139.79495239257812, "logits_per_token": -2.8326587677001953, "logits_per_char": -0.7405643837124694, "num_chars": 153}, {"sum_logits": -116.02485656738281, "num_tokens": 35, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -139.74948120117188, "logits_per_token": -3.314995901925223, "logits_per_char": -0.6481835562423621, "num_chars": 179}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 487, "native_id": 11125, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 62.06634521484375, "incorrect_loss_raw": 89.52756881713867, "correct_loss_per_char": 0.4811344590297965, "incorrect_loss_per_char": 0.6466619634691824, "correct_loss_per_token": 2.002140168220766, "incorrect_loss_per_token": 2.7535574693069713, "correct_loss_uncond": -13.385345458984375, "incorrect_loss_uncond": -15.92172114054362}, "model_output": [{"sum_logits": -94.52330780029297, "num_tokens": 33, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -107.78609466552734, "logits_per_token": -2.8643426606149385, "logits_per_char": -0.7327388201573098, "num_chars": 129}, {"sum_logits": -49.40732955932617, "num_tokens": 16, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -63.95635223388672, "logits_per_token": -3.0879580974578857, "logits_per_char": -0.6768127336893996, "num_chars": 73}, {"sum_logits": -62.06634521484375, "num_tokens": 31, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -75.45169067382812, "logits_per_token": -2.002140168220766, "logits_per_char": -0.4811344590297965, "num_chars": 129}, {"sum_logits": -124.65206909179688, "num_tokens": 54, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -144.6054229736328, "logits_per_token": -2.30837164984809, "logits_per_char": -0.5304343365608377, "num_chars": 235}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 488, "native_id": 39566, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 28.52057647705078, "incorrect_loss_raw": 30.942352930704754, "correct_loss_per_char": 0.6790613446916852, "incorrect_loss_per_char": 0.8104235055768658, "correct_loss_per_token": 3.1689529418945312, "incorrect_loss_per_token": 3.5834496944440333, "correct_loss_uncond": -20.700313568115234, "incorrect_loss_uncond": -16.886353810628254}, "model_output": [{"sum_logits": -22.455215454101562, "num_tokens": 6, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -29.240324020385742, "logits_per_token": -3.7425359090169272, "logits_per_char": -1.0692959740048362, "num_chars": 21}, {"sum_logits": -28.52057647705078, "num_tokens": 9, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -49.220890045166016, "logits_per_token": -3.1689529418945312, "logits_per_char": -0.6790613446916852, "num_chars": 42}, {"sum_logits": -40.15838623046875, "num_tokens": 11, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -61.448448181152344, "logits_per_token": -3.6507623845880683, "logits_per_char": -0.7577054005748821, "num_chars": 53}, {"sum_logits": -30.213457107543945, "num_tokens": 9, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -52.79734802246094, "logits_per_token": -3.357050789727105, "logits_per_char": -0.6042691421508789, "num_chars": 50}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 489, "native_id": 40760, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 65.43832397460938, "incorrect_loss_raw": 60.129041035970054, "correct_loss_per_char": 0.6292146536020132, "incorrect_loss_per_char": 0.7126510230207508, "correct_loss_per_token": 3.1161106654575894, "incorrect_loss_per_token": 3.6402583221050357, "correct_loss_uncond": -38.51800537109375, "incorrect_loss_uncond": -15.053538004557291}, "model_output": [{"sum_logits": -62.95610809326172, "num_tokens": 19, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -79.00666809082031, "logits_per_token": -3.313479373329564, "logits_per_char": -0.66974583077938, "num_chars": 94}, {"sum_logits": -78.77873229980469, "num_tokens": 17, "num_tokens_all": 428, "is_greedy": false, "sum_logits_uncond": -83.74446105957031, "logits_per_token": -4.6340430764591, "logits_per_char": -0.9160317709279615, "num_chars": 86}, {"sum_logits": -65.43832397460938, "num_tokens": 21, "num_tokens_all": 432, "is_greedy": false, "sum_logits_uncond": -103.95632934570312, "logits_per_token": -3.1161106654575894, "logits_per_char": -0.6292146536020132, "num_chars": 104}, {"sum_logits": -38.65228271484375, "num_tokens": 13, "num_tokens_all": 424, "is_greedy": false, "sum_logits_uncond": -62.796607971191406, "logits_per_token": -2.9732525165264425, "logits_per_char": -0.5521754673549107, "num_chars": 70}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 490, "native_id": 25727, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 73.84548950195312, "incorrect_loss_raw": 103.0328877766927, "correct_loss_per_char": 0.5724456550539002, "incorrect_loss_per_char": 0.7325786246166537, "correct_loss_per_token": 2.307671546936035, "incorrect_loss_per_token": 3.3058659685809455, "correct_loss_uncond": -22.721572875976562, "incorrect_loss_uncond": -20.905985514322918}, "model_output": [{"sum_logits": -124.24278259277344, "num_tokens": 34, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -149.64161682128906, "logits_per_token": -3.654199488022748, "logits_per_char": -0.7351643940400795, "num_chars": 169}, {"sum_logits": -84.09349060058594, "num_tokens": 27, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -102.33158874511719, "logits_per_token": -3.1145737259476274, "logits_per_char": -0.7376621982507539, "num_chars": 114}, {"sum_logits": -100.76239013671875, "num_tokens": 32, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -119.84341430664062, "logits_per_token": -3.148824691772461, "logits_per_char": -0.7249092815591277, "num_chars": 139}, {"sum_logits": -73.84548950195312, "num_tokens": 32, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -96.56706237792969, "logits_per_token": -2.307671546936035, "logits_per_char": -0.5724456550539002, "num_chars": 129}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 491, "native_id": 39446, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 34.5882568359375, "incorrect_loss_raw": 42.024645487467446, "correct_loss_per_char": 0.6405232747395834, "incorrect_loss_per_char": 0.7360787194301426, "correct_loss_per_token": 3.144386985085227, "incorrect_loss_per_token": 3.2553307506482096, "correct_loss_uncond": -15.424125671386719, "incorrect_loss_uncond": -17.01441701253255}, "model_output": [{"sum_logits": -35.951332092285156, "num_tokens": 14, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -50.279930114746094, "logits_per_token": -2.5679522923060825, "logits_per_char": -0.6536605834960938, "num_chars": 55}, {"sum_logits": -34.5882568359375, "num_tokens": 11, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -50.01238250732422, "logits_per_token": -3.144386985085227, "logits_per_char": -0.6405232747395834, "num_chars": 54}, {"sum_logits": -41.42298126220703, "num_tokens": 12, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -55.45482635498047, "logits_per_token": -3.4519151051839194, "logits_per_char": -0.7815656841925855, "num_chars": 53}, {"sum_logits": -48.699623107910156, "num_tokens": 13, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -71.38243103027344, "logits_per_token": -3.7461248544546275, "logits_per_char": -0.7730098906017485, "num_chars": 63}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 492, "native_id": 7871, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 20.440052032470703, "incorrect_loss_raw": 42.13971265157064, "correct_loss_per_char": 0.8176020812988282, "incorrect_loss_per_char": 0.7251558625762996, "correct_loss_per_token": 2.555006504058838, "incorrect_loss_per_token": 2.785115133013044, "correct_loss_uncond": -16.75385284423828, "incorrect_loss_uncond": -27.494084040323894}, "model_output": [{"sum_logits": -20.440052032470703, "num_tokens": 8, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -37.193904876708984, "logits_per_token": -2.555006504058838, "logits_per_char": -0.8176020812988282, "num_chars": 25}, {"sum_logits": -45.626502990722656, "num_tokens": 14, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -75.82968139648438, "logits_per_token": -3.2590359279087613, "logits_per_char": -0.786663844667632, "num_chars": 58}, {"sum_logits": -59.659080505371094, "num_tokens": 20, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -83.91876220703125, "logits_per_token": -2.9829540252685547, "logits_per_char": -0.7849879013864618, "num_chars": 76}, {"sum_logits": -21.133554458618164, "num_tokens": 10, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -49.15294647216797, "logits_per_token": -2.1133554458618162, "logits_per_char": -0.6038158416748047, "num_chars": 35}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 493, "native_id": 4335, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 162.24166870117188, "incorrect_loss_raw": 76.44879659016927, "correct_loss_per_char": 0.7725793747674852, "incorrect_loss_per_char": 0.6434490238132893, "correct_loss_per_token": 2.897172655378069, "incorrect_loss_per_token": 2.7693688991742254, "correct_loss_uncond": -31.5601806640625, "incorrect_loss_uncond": -28.907732645670574}, "model_output": [{"sum_logits": -71.37403106689453, "num_tokens": 26, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -96.39783477783203, "logits_per_token": -2.745155041034405, "logits_per_char": -0.7066735749197478, "num_chars": 101}, {"sum_logits": -71.12328338623047, "num_tokens": 32, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -101.71044921875, "logits_per_token": -2.222602605819702, "logits_per_char": -0.46183950250799005, "num_chars": 154}, {"sum_logits": -86.84907531738281, "num_tokens": 26, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -117.9613037109375, "logits_per_token": -3.3403490506685696, "logits_per_char": -0.7618339940121299, "num_chars": 114}, {"sum_logits": -162.24166870117188, "num_tokens": 56, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -193.80184936523438, "logits_per_token": -2.897172655378069, "logits_per_char": -0.7725793747674852, "num_chars": 210}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 494, "native_id": 45888, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 92.24015808105469, "incorrect_loss_raw": 88.15708669026692, "correct_loss_per_char": 0.46822415269570905, "incorrect_loss_per_char": 0.5597780751137524, "correct_loss_per_token": 2.4273725810803866, "incorrect_loss_per_token": 2.8604888651106095, "correct_loss_uncond": -24.143310546875, "incorrect_loss_uncond": -7.789588928222656}, "model_output": [{"sum_logits": -112.43619537353516, "num_tokens": 32, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -113.75865173339844, "logits_per_token": -3.5136311054229736, "logits_per_char": -0.6692630676996141, "num_chars": 168}, {"sum_logits": -71.88398742675781, "num_tokens": 30, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -79.14006042480469, "logits_per_token": -2.3961329142252605, "logits_per_char": -0.5572402126105257, "num_chars": 129}, {"sum_logits": -80.15107727050781, "num_tokens": 30, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -94.94131469726562, "logits_per_token": -2.671702575683594, "logits_per_char": -0.4528309450311176, "num_chars": 177}, {"sum_logits": -92.24015808105469, "num_tokens": 38, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -116.38346862792969, "logits_per_token": -2.4273725810803866, "logits_per_char": -0.46822415269570905, "num_chars": 197}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 495, "native_id": 42165, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 89.52803039550781, "incorrect_loss_raw": 90.19512685139973, "correct_loss_per_char": 0.4662918249766032, "incorrect_loss_per_char": 0.6030686855635281, "correct_loss_per_token": 2.2955905229617386, "incorrect_loss_per_token": 2.565026223999619, "correct_loss_uncond": -30.654502868652344, "incorrect_loss_uncond": -18.68871307373047}, "model_output": [{"sum_logits": -58.03663635253906, "num_tokens": 27, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -67.1827392578125, "logits_per_token": -2.149505050094039, "logits_per_char": -0.5527298700241815, "num_chars": 105}, {"sum_logits": -126.9959487915039, "num_tokens": 51, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -150.48094177246094, "logits_per_token": -2.4901166429706647, "logits_per_char": -0.5720538233851528, "num_chars": 222}, {"sum_logits": -89.52803039550781, "num_tokens": 39, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -120.18253326416016, "logits_per_token": -2.2955905229617386, "logits_per_char": -0.4662918249766032, "num_chars": 192}, {"sum_logits": -85.55279541015625, "num_tokens": 28, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -108.98783874511719, "logits_per_token": -3.055456978934152, "logits_per_char": -0.68442236328125, "num_chars": 125}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 496, "native_id": 41952, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 74.21884155273438, "incorrect_loss_raw": 86.52111562093098, "correct_loss_per_char": 0.5263747627853502, "incorrect_loss_per_char": 0.6903822199736278, "correct_loss_per_token": 2.1829071044921875, "incorrect_loss_per_token": 2.7471632588576953, "correct_loss_uncond": -21.81145477294922, "incorrect_loss_uncond": -24.57470194498698}, "model_output": [{"sum_logits": -78.28324127197266, "num_tokens": 28, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -113.27377319335938, "logits_per_token": -2.795830045427595, "logits_per_char": -0.6164034745824618, "num_chars": 127}, {"sum_logits": -74.21884155273438, "num_tokens": 34, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -96.0302963256836, "logits_per_token": -2.1829071044921875, "logits_per_char": -0.5263747627853502, "num_chars": 141}, {"sum_logits": -85.07398986816406, "num_tokens": 38, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -104.6505355834961, "logits_per_token": -2.238789207056949, "logits_per_char": -0.6033616302706671, "num_chars": 141}, {"sum_logits": -96.20611572265625, "num_tokens": 30, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -115.36314392089844, "logits_per_token": -3.206870524088542, "logits_per_char": -0.8513815550677544, "num_chars": 113}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 497, "native_id": 1152, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 116.51887512207031, "incorrect_loss_raw": 157.68390909830728, "correct_loss_per_char": 0.6165019847728588, "incorrect_loss_per_char": 0.68672393801639, "correct_loss_per_token": 2.4274765650431314, "incorrect_loss_per_token": 2.9893092245584008, "correct_loss_uncond": -24.848678588867188, "incorrect_loss_uncond": -19.40935770670573}, "model_output": [{"sum_logits": -121.46673583984375, "num_tokens": 50, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -138.63427734375, "logits_per_token": -2.429334716796875, "logits_per_char": -0.5521215265447443, "num_chars": 220}, {"sum_logits": -189.49078369140625, "num_tokens": 52, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -213.8372344970703, "logits_per_token": -3.6440535325270433, "logits_per_char": -0.8063437603889627, "num_chars": 235}, {"sum_logits": -162.09420776367188, "num_tokens": 56, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -178.80828857421875, "logits_per_token": -2.8945394243512834, "logits_per_char": -0.7017065271154627, "num_chars": 231}, {"sum_logits": -116.51887512207031, "num_tokens": 48, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -141.3675537109375, "logits_per_token": -2.4274765650431314, "logits_per_char": -0.6165019847728588, "num_chars": 189}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 498, "native_id": 28259, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 56.33698272705078, "incorrect_loss_raw": 99.01202646891277, "correct_loss_per_char": 0.47743205700890495, "incorrect_loss_per_char": 0.7494594029858063, "correct_loss_per_token": 2.4494340316109033, "incorrect_loss_per_token": 3.2870298703511556, "correct_loss_uncond": -31.93218994140625, "incorrect_loss_uncond": -14.399340311686197}, "model_output": [{"sum_logits": -73.4405746459961, "num_tokens": 20, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -85.05433654785156, "logits_per_token": -3.672028732299805, "logits_per_char": -0.8742925553094774, "num_chars": 84}, {"sum_logits": -56.33698272705078, "num_tokens": 23, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -88.26917266845703, "logits_per_token": -2.4494340316109033, "logits_per_char": -0.47743205700890495, "num_chars": 118}, {"sum_logits": -127.727783203125, "num_tokens": 40, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -150.69384765625, "logits_per_token": -3.193194580078125, "logits_per_char": -0.6941727347995924, "num_chars": 184}, {"sum_logits": -95.86772155761719, "num_tokens": 32, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -104.48591613769531, "logits_per_token": -2.995866298675537, "logits_per_char": -0.6799129188483488, "num_chars": 141}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 499, "native_id": 29958, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 53.3485107421875, "incorrect_loss_raw": 70.25291951497395, "correct_loss_per_char": 0.4267880859375, "incorrect_loss_per_char": 0.6257949110291583, "correct_loss_per_token": 1.905303955078125, "incorrect_loss_per_token": 2.5500834182293914, "correct_loss_uncond": -26.302146911621094, "incorrect_loss_uncond": -18.659718831380207}, "model_output": [{"sum_logits": -53.3485107421875, "num_tokens": 28, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -79.6506576538086, "logits_per_token": -1.905303955078125, "logits_per_char": -0.4267880859375, "num_chars": 125}, {"sum_logits": -80.87335205078125, "num_tokens": 28, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -96.42341613769531, "logits_per_token": -2.888334001813616, "logits_per_char": -0.6221027080829327, "num_chars": 130}, {"sum_logits": -69.0831298828125, "num_tokens": 31, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -90.11483764648438, "logits_per_token": -2.228488060735887, "logits_per_char": -0.54827880859375, "num_chars": 126}, {"sum_logits": -60.802276611328125, "num_tokens": 24, "num_tokens_all": 441, "is_greedy": false, "sum_logits_uncond": -80.19966125488281, "logits_per_token": -2.533428192138672, "logits_per_char": -0.7070032164107921, "num_chars": 86}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 500, "native_id": 5504, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 34.873191833496094, "incorrect_loss_raw": 64.67795689900716, "correct_loss_per_char": 0.5128410563749426, "incorrect_loss_per_char": 0.763148812389275, "correct_loss_per_token": 2.682553217961238, "incorrect_loss_per_token": 3.9995337266188398, "correct_loss_uncond": -21.716609954833984, "incorrect_loss_uncond": -21.506301879882812}, "model_output": [{"sum_logits": -59.95833969116211, "num_tokens": 13, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -86.68247985839844, "logits_per_token": -4.612179976243239, "logits_per_char": -0.8102478336643528, "num_chars": 74}, {"sum_logits": -43.198577880859375, "num_tokens": 12, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -60.14572525024414, "logits_per_token": -3.5998814900716147, "logits_per_char": -0.6749777793884277, "num_chars": 64}, {"sum_logits": -90.876953125, "num_tokens": 24, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -111.72457122802734, "logits_per_token": -3.7865397135416665, "logits_per_char": -0.8042208241150443, "num_chars": 113}, {"sum_logits": -34.873191833496094, "num_tokens": 13, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -56.58980178833008, "logits_per_token": -2.682553217961238, "logits_per_char": -0.5128410563749426, "num_chars": 68}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 501, "native_id": 42651, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 95.94767761230469, "incorrect_loss_raw": 76.60593159993489, "correct_loss_per_char": 0.5214547696320907, "incorrect_loss_per_char": 0.5300548324136712, "correct_loss_per_token": 2.3401872588367, "incorrect_loss_per_token": 2.459998791034405, "correct_loss_uncond": -30.41991424560547, "incorrect_loss_uncond": -26.81653594970703}, "model_output": [{"sum_logits": -95.94767761230469, "num_tokens": 41, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -126.36759185791016, "logits_per_token": -2.3401872588367, "logits_per_char": -0.5214547696320907, "num_chars": 184}, {"sum_logits": -73.9058609008789, "num_tokens": 28, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -101.01025390625, "logits_per_token": -2.6394950321742465, "logits_per_char": -0.6107922388502389, "num_chars": 121}, {"sum_logits": -73.73575592041016, "num_tokens": 28, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -100.31427764892578, "logits_per_token": -2.633419854300363, "logits_per_char": -0.5016037817714977, "num_chars": 147}, {"sum_logits": -82.17617797851562, "num_tokens": 39, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -108.94287109375, "logits_per_token": -2.107081486628606, "logits_per_char": -0.4777684766192769, "num_chars": 172}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 502, "native_id": 25017, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 96.71289825439453, "incorrect_loss_raw": 99.48550415039062, "correct_loss_per_char": 0.6447526550292969, "incorrect_loss_per_char": 0.659601358557132, "correct_loss_per_token": 3.223763275146484, "incorrect_loss_per_token": 2.869892350258342, "correct_loss_uncond": -20.32697296142578, "incorrect_loss_uncond": -14.550389607747396}, "model_output": [{"sum_logits": -112.8106918334961, "num_tokens": 39, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -136.376708984375, "logits_per_token": -2.8925818418845153, "logits_per_char": -0.6164518679426017, "num_chars": 183}, {"sum_logits": -96.71289825439453, "num_tokens": 30, "num_tokens_all": 442, "is_greedy": false, "sum_logits_uncond": -117.03987121582031, "logits_per_token": -3.223763275146484, "logits_per_char": -0.6447526550292969, "num_chars": 150}, {"sum_logits": -89.05953979492188, "num_tokens": 33, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -109.35247802734375, "logits_per_token": -2.6987739331794507, "logits_per_char": -0.6361395699637277, "num_chars": 140}, {"sum_logits": -96.5862808227539, "num_tokens": 32, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -96.37849426269531, "logits_per_token": -3.0183212757110596, "logits_per_char": -0.726212637765067, "num_chars": 133}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 503, "native_id": 16174, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 21.373363494873047, "incorrect_loss_raw": 19.397158940633137, "correct_loss_per_char": 0.3503830081126729, "incorrect_loss_per_char": 0.5451876273861638, "correct_loss_per_token": 1.5266688210623605, "incorrect_loss_per_token": 2.350068904735424, "correct_loss_uncond": -46.32089614868164, "incorrect_loss_uncond": -24.806901931762695}, "model_output": [{"sum_logits": -20.741334915161133, "num_tokens": 9, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -40.494136810302734, "logits_per_token": -2.304592768351237, "logits_per_char": -0.6481667160987854, "num_chars": 32}, {"sum_logits": -21.373363494873047, "num_tokens": 14, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -67.69425964355469, "logits_per_token": -1.5266688210623605, "logits_per_char": -0.3503830081126729, "num_chars": 61}, {"sum_logits": -19.038799285888672, "num_tokens": 9, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -50.517616271972656, "logits_per_token": -2.115422142876519, "logits_per_char": -0.4759699821472168, "num_chars": 40}, {"sum_logits": -18.41134262084961, "num_tokens": 7, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -41.60042953491211, "logits_per_token": -2.6301918029785156, "logits_per_char": -0.5114261839124892, "num_chars": 36}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 504, "native_id": 16501, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 32.61897277832031, "incorrect_loss_raw": 47.34114074707031, "correct_loss_per_char": 0.7413402904163707, "incorrect_loss_per_char": 0.8067853149266901, "correct_loss_per_token": 2.718247731526693, "incorrect_loss_per_token": 3.422305144960918, "correct_loss_uncond": -16.831371307373047, "incorrect_loss_uncond": -18.893460591634113}, "model_output": [{"sum_logits": -45.15073013305664, "num_tokens": 14, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -61.57025909423828, "logits_per_token": -3.225052152361189, "logits_per_char": -0.6946266174316407, "num_chars": 65}, {"sum_logits": -31.38857650756836, "num_tokens": 8, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -52.895751953125, "logits_per_token": -3.923572063446045, "logits_per_char": -0.8968164716448103, "num_chars": 35}, {"sum_logits": -65.48411560058594, "num_tokens": 21, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -84.23779296875, "logits_per_token": -3.118291219075521, "logits_per_char": -0.8289128557036195, "num_chars": 79}, {"sum_logits": -32.61897277832031, "num_tokens": 12, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -49.45034408569336, "logits_per_token": -2.718247731526693, "logits_per_char": -0.7413402904163707, "num_chars": 44}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 505, "native_id": 5816, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 83.90924072265625, "incorrect_loss_raw": 107.5782953898112, "correct_loss_per_char": 0.5344537625646895, "incorrect_loss_per_char": 0.6445892127441332, "correct_loss_per_token": 2.706749700730847, "incorrect_loss_per_token": 2.844351048760334, "correct_loss_uncond": -17.620391845703125, "incorrect_loss_uncond": -20.751978556315105}, "model_output": [{"sum_logits": -83.90924072265625, "num_tokens": 31, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -101.52963256835938, "logits_per_token": -2.706749700730847, "logits_per_char": -0.5344537625646895, "num_chars": 157}, {"sum_logits": -129.33596801757812, "num_tokens": 43, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -146.3592071533203, "logits_per_token": -3.007813209711119, "logits_per_char": -0.6953546667611727, "num_chars": 186}, {"sum_logits": -107.81784057617188, "num_tokens": 39, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -124.74581909179688, "logits_per_token": -2.7645600147736378, "logits_per_char": -0.6091403422382592, "num_chars": 177}, {"sum_logits": -85.5810775756836, "num_tokens": 31, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -113.88579559326172, "logits_per_token": -2.7606799217962448, "logits_per_char": -0.6292726292329676, "num_chars": 136}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 506, "native_id": 22245, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 66.3862075805664, "incorrect_loss_raw": 156.59056091308594, "correct_loss_per_char": 0.663862075805664, "incorrect_loss_per_char": 0.7086243288306152, "correct_loss_per_token": 2.7660919825236, "incorrect_loss_per_token": 3.1407779858761686, "correct_loss_uncond": -25.424453735351562, "incorrect_loss_uncond": -25.41943868001302}, "model_output": [{"sum_logits": -117.03297424316406, "num_tokens": 39, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -140.63528442382812, "logits_per_token": -3.000845493414463, "logits_per_char": -0.6465910179180335, "num_chars": 181}, {"sum_logits": -66.3862075805664, "num_tokens": 24, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -91.81066131591797, "logits_per_token": -2.7660919825236, "logits_per_char": -0.663862075805664, "num_chars": 100}, {"sum_logits": -176.04127502441406, "num_tokens": 53, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -206.85293579101562, "logits_per_token": -3.3215334910266803, "logits_per_char": -0.8150059028908059, "num_chars": 216}, {"sum_logits": -176.6974334716797, "num_tokens": 57, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -198.54177856445312, "logits_per_token": -3.099954973187363, "logits_per_char": -0.6642760656830063, "num_chars": 266}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 507, "native_id": 34834, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 78.46239471435547, "incorrect_loss_raw": 109.52052561442058, "correct_loss_per_char": 0.5486880749255627, "incorrect_loss_per_char": 0.5467679344341124, "correct_loss_per_token": 2.4519498348236084, "incorrect_loss_per_token": 2.400436205013778, "correct_loss_uncond": -40.364463806152344, "incorrect_loss_uncond": -25.692616780598957}, "model_output": [{"sum_logits": -95.53712463378906, "num_tokens": 43, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -115.94808197021484, "logits_per_token": -2.2217935961346296, "logits_per_char": -0.46153200306178294, "num_chars": 207}, {"sum_logits": -135.03790283203125, "num_tokens": 50, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -166.98501586914062, "logits_per_token": -2.700758056640625, "logits_per_char": -0.6251754760742188, "num_chars": 216}, {"sum_logits": -78.46239471435547, "num_tokens": 32, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -118.82685852050781, "logits_per_token": -2.4519498348236084, "logits_per_char": -0.5486880749255627, "num_chars": 143}, {"sum_logits": -97.9865493774414, "num_tokens": 43, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -122.70632934570312, "logits_per_token": -2.2787569622660793, "logits_per_char": -0.5535963241663356, "num_chars": 177}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 508, "native_id": 22067, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 88.63801574707031, "incorrect_loss_raw": 152.43580627441406, "correct_loss_per_char": 0.38538267716117525, "incorrect_loss_per_char": 0.6410239665699806, "correct_loss_per_token": 1.8859152286610705, "incorrect_loss_per_token": 3.4269502979189497, "correct_loss_uncond": -24.814193725585938, "incorrect_loss_uncond": -6.971844991048177}, "model_output": [{"sum_logits": -191.21194458007812, "num_tokens": 47, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -205.15829467773438, "logits_per_token": -4.068339246384641, "logits_per_char": -0.7270416143729206, "num_chars": 263}, {"sum_logits": -109.64605712890625, "num_tokens": 39, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -119.9201889038086, "logits_per_token": -2.8114373622796474, "logits_per_char": -0.5651858614892075, "num_chars": 194}, {"sum_logits": -88.63801574707031, "num_tokens": 47, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -113.45220947265625, "logits_per_token": -1.8859152286610705, "logits_per_char": -0.38538267716117525, "num_chars": 230}, {"sum_logits": -156.4494171142578, "num_tokens": 46, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -153.14447021484375, "logits_per_token": -3.401074285092561, "logits_per_char": -0.6308444238478138, "num_chars": 248}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 509, "native_id": 5018, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 98.08808135986328, "incorrect_loss_raw": 116.75118509928386, "correct_loss_per_char": 0.5330873986949092, "incorrect_loss_per_char": 0.5549558951677469, "correct_loss_per_token": 2.802516610281808, "incorrect_loss_per_token": 2.6367836771494453, "correct_loss_uncond": -29.061477661132812, "incorrect_loss_uncond": -28.135635375976562}, "model_output": [{"sum_logits": -131.33816528320312, "num_tokens": 40, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -157.0106964111328, "logits_per_token": -3.2834541320800783, "logits_per_char": -0.7176948922579406, "num_chars": 183}, {"sum_logits": -80.81938171386719, "num_tokens": 41, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -108.91998291015625, "logits_per_token": -1.971204432045541, "logits_per_char": -0.4592010324651545, "num_chars": 176}, {"sum_logits": -98.08808135986328, "num_tokens": 35, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -127.1495590209961, "logits_per_token": -2.802516610281808, "logits_per_char": -0.5330873986949092, "num_chars": 184}, {"sum_logits": -138.09600830078125, "num_tokens": 52, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -168.7297821044922, "logits_per_token": -2.6556924673227162, "logits_per_char": -0.48797176078014576, "num_chars": 283}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 510, "native_id": 12915, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 104.60060119628906, "incorrect_loss_raw": 140.3963851928711, "correct_loss_per_char": 0.615297654095818, "incorrect_loss_per_char": 0.7026653906185887, "correct_loss_per_token": 2.6150150299072266, "incorrect_loss_per_token": 3.791053919663834, "correct_loss_uncond": -10.692359924316406, "incorrect_loss_uncond": -5.713124593098958}, "model_output": [{"sum_logits": -104.60060119628906, "num_tokens": 40, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -115.29296112060547, "logits_per_token": -2.6150150299072266, "logits_per_char": -0.615297654095818, "num_chars": 170}, {"sum_logits": -163.53070068359375, "num_tokens": 35, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -167.19839477539062, "logits_per_token": -4.672305733816964, "logits_per_char": -0.830105079612151, "num_chars": 197}, {"sum_logits": -78.68143463134766, "num_tokens": 28, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -89.48751068115234, "logits_per_token": -2.810051236833845, "logits_per_char": -0.5352478546350181, "num_chars": 147}, {"sum_logits": -178.97702026367188, "num_tokens": 46, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -181.6426239013672, "logits_per_token": -3.890804788340693, "logits_per_char": -0.742643237608597, "num_chars": 241}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 511, "native_id": 38054, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 127.88900756835938, "incorrect_loss_raw": 105.41336059570312, "correct_loss_per_char": 0.636263719245569, "incorrect_loss_per_char": 0.6764111785862781, "correct_loss_per_token": 2.9065683538263496, "incorrect_loss_per_token": 2.972606911054834, "correct_loss_uncond": -11.597610473632812, "incorrect_loss_uncond": -20.845486958821613}, "model_output": [{"sum_logits": -90.13362121582031, "num_tokens": 31, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -99.49772644042969, "logits_per_token": -2.907536168252268, "logits_per_char": -0.6726389642971665, "num_chars": 134}, {"sum_logits": -141.5854034423828, "num_tokens": 38, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -164.2617950439453, "logits_per_token": -3.72593166953639, "logits_per_char": -0.7736907291933487, "num_chars": 183}, {"sum_logits": -127.88900756835938, "num_tokens": 44, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -139.4866180419922, "logits_per_token": -2.9065683538263496, "logits_per_char": -0.636263719245569, "num_chars": 201}, {"sum_logits": -84.52105712890625, "num_tokens": 37, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -115.01702117919922, "logits_per_token": -2.2843528953758447, "logits_per_char": -0.5829038422683189, "num_chars": 145}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 512, "native_id": 15816, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 63.12933349609375, "incorrect_loss_raw": 60.711560567220054, "correct_loss_per_char": 0.6715886542137633, "incorrect_loss_per_char": 0.5404569192779903, "correct_loss_per_token": 2.744753630264946, "incorrect_loss_per_token": 2.3573882753309663, "correct_loss_uncond": -0.1515350341796875, "incorrect_loss_uncond": -17.09375254313151}, "model_output": [{"sum_logits": -52.960289001464844, "num_tokens": 22, "num_tokens_all": 439, "is_greedy": false, "sum_logits_uncond": -72.83029174804688, "logits_per_token": -2.4072858637029473, "logits_per_char": -0.5243592970442064, "num_chars": 101}, {"sum_logits": -61.53857421875, "num_tokens": 23, "num_tokens_all": 440, "is_greedy": false, "sum_logits_uncond": -75.14439392089844, "logits_per_token": -2.675590183423913, "logits_per_char": -0.6033193550857843, "num_chars": 102}, {"sum_logits": -63.12933349609375, "num_tokens": 23, "num_tokens_all": 440, "is_greedy": false, "sum_logits_uncond": -63.28086853027344, "logits_per_token": -2.744753630264946, "logits_per_char": -0.6715886542137633, "num_chars": 94}, {"sum_logits": -67.63581848144531, "num_tokens": 34, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -85.44125366210938, "logits_per_token": -1.9892887788660385, "logits_per_char": -0.49369210570398037, "num_chars": 137}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 513, "native_id": 38948, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 118.78033447265625, "incorrect_loss_raw": 111.87929789225261, "correct_loss_per_char": 0.5738180409307065, "incorrect_loss_per_char": 0.5922443918421316, "correct_loss_per_token": 2.375606689453125, "incorrect_loss_per_token": 2.5158344703793136, "correct_loss_uncond": -36.56504821777344, "incorrect_loss_uncond": -20.118759155273438}, "model_output": [{"sum_logits": -99.67340850830078, "num_tokens": 35, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -115.80741882324219, "logits_per_token": -2.8478116716657365, "logits_per_char": -0.6114933037319066, "num_chars": 163}, {"sum_logits": -118.78033447265625, "num_tokens": 50, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -155.3453826904297, "logits_per_token": -2.375606689453125, "logits_per_char": -0.5738180409307065, "num_chars": 207}, {"sum_logits": -125.94409942626953, "num_tokens": 48, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -137.27047729492188, "logits_per_token": -2.6238354047139487, "logits_per_char": -0.6628636811908922, "num_chars": 190}, {"sum_logits": -110.0203857421875, "num_tokens": 53, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -142.91627502441406, "logits_per_token": -2.075856334758255, "logits_per_char": -0.5023761906035958, "num_chars": 219}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 514, "native_id": 38733, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 55.365325927734375, "incorrect_loss_raw": 104.29347229003906, "correct_loss_per_char": 0.5427973130170036, "incorrect_loss_per_char": 0.7105195950686162, "correct_loss_per_token": 2.3068885803222656, "incorrect_loss_per_token": 3.547798666350293, "correct_loss_uncond": -26.536956787109375, "incorrect_loss_uncond": -21.541356404622395}, "model_output": [{"sum_logits": -64.58555603027344, "num_tokens": 20, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -94.53959655761719, "logits_per_token": -3.229277801513672, "logits_per_char": -0.5665399651778372, "num_chars": 114}, {"sum_logits": -55.365325927734375, "num_tokens": 24, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -81.90228271484375, "logits_per_token": -2.3068885803222656, "logits_per_char": -0.5427973130170036, "num_chars": 102}, {"sum_logits": -89.06806945800781, "num_tokens": 24, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -111.39216613769531, "logits_per_token": -3.7111695607503257, "logits_per_char": -0.7484711719160321, "num_chars": 119}, {"sum_logits": -159.22679138183594, "num_tokens": 43, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -171.57272338867188, "logits_per_token": -3.7029486367868825, "logits_per_char": -0.8165476481119792, "num_chars": 195}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 515, "native_id": 30979, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 117.04536437988281, "incorrect_loss_raw": 163.05084737141928, "correct_loss_per_char": 0.5654365428979846, "incorrect_loss_per_char": 0.7095505286638187, "correct_loss_per_token": 2.4384450912475586, "incorrect_loss_per_token": 3.4722397365267312, "correct_loss_uncond": -19.956390380859375, "incorrect_loss_uncond": -16.65673828125}, "model_output": [{"sum_logits": -160.52877807617188, "num_tokens": 42, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -171.56869506835938, "logits_per_token": -3.822113763718378, "logits_per_char": -0.6949297752215232, "num_chars": 231}, {"sum_logits": -235.19277954101562, "num_tokens": 64, "num_tokens_all": 494, "is_greedy": false, "sum_logits_uncond": -254.8475799560547, "logits_per_token": -3.674887180328369, "logits_per_char": -0.8775849982873717, "num_chars": 268}, {"sum_logits": -93.43098449707031, "num_tokens": 32, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -112.70648193359375, "logits_per_token": -2.9197182655334473, "logits_per_char": -0.5561368124825614, "num_chars": 168}, {"sum_logits": -117.04536437988281, "num_tokens": 48, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -137.0017547607422, "logits_per_token": -2.4384450912475586, "logits_per_char": -0.5654365428979846, "num_chars": 207}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 516, "native_id": 15065, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 25.473552703857422, "incorrect_loss_raw": 31.68994649251302, "correct_loss_per_char": 0.7075986862182617, "incorrect_loss_per_char": 0.9348401658741224, "correct_loss_per_token": 2.830394744873047, "incorrect_loss_per_token": 3.665001268740054, "correct_loss_uncond": -17.172100067138672, "incorrect_loss_uncond": -19.28088124593099}, "model_output": [{"sum_logits": -38.898460388183594, "num_tokens": 9, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -57.122344970703125, "logits_per_token": -4.322051154242621, "logits_per_char": -1.0513097402211782, "num_chars": 37}, {"sum_logits": -31.081558227539062, "num_tokens": 8, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -53.878868103027344, "logits_per_token": -3.885194778442383, "logits_per_char": -0.8880445207868304, "num_chars": 35}, {"sum_logits": -25.473552703857422, "num_tokens": 9, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -42.645652770996094, "logits_per_token": -2.830394744873047, "logits_per_char": -0.7075986862182617, "num_chars": 36}, {"sum_logits": -25.089820861816406, "num_tokens": 9, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -41.91127014160156, "logits_per_token": -2.7877578735351562, "logits_per_char": -0.8651662366143589, "num_chars": 29}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 517, "native_id": 12916, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 35.54878234863281, "incorrect_loss_raw": 103.96090189615886, "correct_loss_per_char": 0.41335793428642803, "incorrect_loss_per_char": 0.7496597360329567, "correct_loss_per_token": 1.8709885446648848, "incorrect_loss_per_token": 3.305383923174477, "correct_loss_uncond": -38.503822326660156, "incorrect_loss_uncond": -15.860725402832031}, "model_output": [{"sum_logits": -78.90769958496094, "num_tokens": 23, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -93.7253646850586, "logits_per_token": -3.430769547172215, "logits_per_char": -0.7108801764410895, "num_chars": 111}, {"sum_logits": -35.54878234863281, "num_tokens": 19, "num_tokens_all": 441, "is_greedy": false, "sum_logits_uncond": -74.05260467529297, "logits_per_token": -1.8709885446648848, "logits_per_char": -0.41335793428642803, "num_chars": 86}, {"sum_logits": -123.22305297851562, "num_tokens": 39, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -139.5341033935547, "logits_per_token": -3.1595654609875803, "logits_per_char": -0.8160467084669909, "num_chars": 151}, {"sum_logits": -109.751953125, "num_tokens": 33, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -126.20541381835938, "logits_per_token": -3.3258167613636362, "logits_per_char": -0.7220523231907895, "num_chars": 152}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 518, "native_id": 6180, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 58.72861099243164, "incorrect_loss_raw": 75.38921610514323, "correct_loss_per_char": 0.5488655232937536, "incorrect_loss_per_char": 0.7599439777648088, "correct_loss_per_token": 2.6694823178378018, "incorrect_loss_per_token": 3.923998923106969, "correct_loss_uncond": -30.234188079833984, "incorrect_loss_uncond": -20.063636779785156}, "model_output": [{"sum_logits": -46.631439208984375, "num_tokens": 17, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -65.3144302368164, "logits_per_token": -2.7430258358226105, "logits_per_char": -0.5239487551571278, "num_chars": 89}, {"sum_logits": -58.72861099243164, "num_tokens": 22, "num_tokens_all": 427, "is_greedy": false, "sum_logits_uncond": -88.96279907226562, "logits_per_token": -2.6694823178378018, "logits_per_char": -0.5488655232937536, "num_chars": 107}, {"sum_logits": -95.68571472167969, "num_tokens": 19, "num_tokens_all": 424, "is_greedy": false, "sum_logits_uncond": -112.47718048095703, "logits_per_token": -5.036090248509457, "logits_per_char": -1.0072180497018914, "num_chars": 95}, {"sum_logits": -83.85049438476562, "num_tokens": 21, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -108.56694793701172, "logits_per_token": -3.9928806849888394, "logits_per_char": -0.7486651284354073, "num_chars": 112}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 519, "native_id": 36587, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 148.58184814453125, "incorrect_loss_raw": 111.51761372884114, "correct_loss_per_char": 0.7143358083871695, "incorrect_loss_per_char": 0.5814140010029684, "correct_loss_per_token": 2.4763641357421875, "incorrect_loss_per_token": 2.54538490269236, "correct_loss_uncond": -16.195770263671875, "incorrect_loss_uncond": -26.219444274902344}, "model_output": [{"sum_logits": -107.20862579345703, "num_tokens": 37, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -126.25823211669922, "logits_per_token": -2.89753042685019, "logits_per_char": -0.6742680867513021, "num_chars": 159}, {"sum_logits": -148.58184814453125, "num_tokens": 60, "num_tokens_all": 494, "is_greedy": false, "sum_logits_uncond": -164.77761840820312, "logits_per_token": -2.4763641357421875, "logits_per_char": -0.7143358083871695, "num_chars": 208}, {"sum_logits": -113.93679809570312, "num_tokens": 47, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -141.06466674804688, "logits_per_token": -2.4241871935255985, "logits_per_char": -0.5299385957939681, "num_chars": 215}, {"sum_logits": -113.40741729736328, "num_tokens": 49, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -145.88827514648438, "logits_per_token": -2.3144370877012914, "logits_per_char": -0.5400353204636347, "num_chars": 210}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 520, "native_id": 45237, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 26.56499481201172, "incorrect_loss_raw": 27.588512420654297, "correct_loss_per_char": 0.5903332180447048, "incorrect_loss_per_char": 0.6784227661222045, "correct_loss_per_token": 2.951666090223524, "incorrect_loss_per_token": 2.804294245583671, "correct_loss_uncond": -23.171001434326172, "incorrect_loss_uncond": -25.209618886311848}, "model_output": [{"sum_logits": -26.56499481201172, "num_tokens": 9, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -49.73599624633789, "logits_per_token": -2.951666090223524, "logits_per_char": -0.5903332180447048, "num_chars": 45}, {"sum_logits": -36.07910919189453, "num_tokens": 14, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -65.53116607666016, "logits_per_token": -2.5770792279924666, "logits_per_char": -0.6329668279279742, "num_chars": 57}, {"sum_logits": -18.98212432861328, "num_tokens": 8, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -41.43421936035156, "logits_per_token": -2.37276554107666, "logits_per_char": -0.6327374776204427, "num_chars": 30}, {"sum_logits": -27.704303741455078, "num_tokens": 8, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -51.42900848388672, "logits_per_token": -3.4630379676818848, "logits_per_char": -0.7695639928181967, "num_chars": 36}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 521, "native_id": 36095, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 25.35894203186035, "incorrect_loss_raw": 37.09539286295573, "correct_loss_per_char": 0.5763395916331898, "incorrect_loss_per_char": 0.8604053789260885, "correct_loss_per_token": 3.169867753982544, "incorrect_loss_per_token": 3.4706929524739585, "correct_loss_uncond": -20.3461971282959, "incorrect_loss_uncond": -16.587905883789062}, "model_output": [{"sum_logits": -36.57249450683594, "num_tokens": 11, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -50.00312042236328, "logits_per_token": -3.3247722278941763, "logits_per_char": -0.850523128065952, "num_chars": 43}, {"sum_logits": -32.466888427734375, "num_tokens": 10, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -51.85057830810547, "logits_per_token": -3.2466888427734375, "logits_per_char": -0.7918753275057164, "num_chars": 41}, {"sum_logits": -42.246795654296875, "num_tokens": 11, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -59.196197509765625, "logits_per_token": -3.8406177867542612, "logits_per_char": -0.9388176812065973, "num_chars": 45}, {"sum_logits": -25.35894203186035, "num_tokens": 8, "num_tokens_all": 394, "is_greedy": false, "sum_logits_uncond": -45.70513916015625, "logits_per_token": -3.169867753982544, "logits_per_char": -0.5763395916331898, "num_chars": 44}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 522, "native_id": 37068, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 34.46073532104492, "incorrect_loss_raw": 22.669832229614258, "correct_loss_per_char": 0.8204936981201172, "incorrect_loss_per_char": 0.5817548784858736, "correct_loss_per_token": 3.8289705912272134, "incorrect_loss_per_token": 2.6509169583598142, "correct_loss_uncond": -11.010913848876953, "incorrect_loss_uncond": -20.834433873494465}, "model_output": [{"sum_logits": -28.823511123657227, "num_tokens": 9, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -45.95515823364258, "logits_per_token": -3.2026123470730252, "logits_per_char": -0.6862740743727911, "num_chars": 42}, {"sum_logits": -34.46073532104492, "num_tokens": 9, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -45.471649169921875, "logits_per_token": -3.8289705912272134, "logits_per_char": -0.8204936981201172, "num_chars": 42}, {"sum_logits": -19.78338623046875, "num_tokens": 10, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -44.62207794189453, "logits_per_token": -1.978338623046875, "logits_per_char": -0.47103300548735116, "num_chars": 42}, {"sum_logits": -19.402599334716797, "num_tokens": 7, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -39.93556213378906, "logits_per_token": -2.7717999049595425, "logits_per_char": -0.5879575555974786, "num_chars": 33}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 523, "native_id": 21550, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 106.47479248046875, "incorrect_loss_raw": 153.53197224934897, "correct_loss_per_char": 0.5118980407714844, "incorrect_loss_per_char": 0.6560254713142778, "correct_loss_per_token": 2.3661064995659724, "incorrect_loss_per_token": 3.000416976330327, "correct_loss_uncond": -23.07232666015625, "incorrect_loss_uncond": -22.157190958658855}, "model_output": [{"sum_logits": -142.97994995117188, "num_tokens": 51, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -168.9173583984375, "logits_per_token": -2.8035284304151347, "logits_per_char": -0.5765320562547253, "num_chars": 248}, {"sum_logits": -106.47479248046875, "num_tokens": 45, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -129.547119140625, "logits_per_token": -2.3661064995659724, "logits_per_char": -0.5118980407714844, "num_chars": 208}, {"sum_logits": -209.12120056152344, "num_tokens": 60, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -220.3730926513672, "logits_per_token": -3.4853533426920573, "logits_per_char": -0.7921257597027402, "num_chars": 264}, {"sum_logits": -108.49476623535156, "num_tokens": 40, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -137.77703857421875, "logits_per_token": -2.7123691558837892, "logits_per_char": -0.5994185979853678, "num_chars": 181}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 524, "native_id": 27953, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 61.727378845214844, "incorrect_loss_raw": 78.95124816894531, "correct_loss_per_char": 0.4860423531119279, "incorrect_loss_per_char": 0.6013510525396131, "correct_loss_per_token": 2.0575792948404947, "incorrect_loss_per_token": 2.5678870791480652, "correct_loss_uncond": -18.55510711669922, "incorrect_loss_uncond": -24.640581766764324}, "model_output": [{"sum_logits": -93.92037200927734, "num_tokens": 44, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -125.14521026611328, "logits_per_token": -2.134553909301758, "logits_per_char": -0.5104368043982465, "num_chars": 184}, {"sum_logits": -61.727378845214844, "num_tokens": 30, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -80.28248596191406, "logits_per_token": -2.0575792948404947, "logits_per_char": -0.4860423531119279, "num_chars": 127}, {"sum_logits": -78.00979614257812, "num_tokens": 24, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -103.43612670898438, "logits_per_token": -3.250408172607422, "logits_per_char": -0.7290635153511974, "num_chars": 107}, {"sum_logits": -64.92357635498047, "num_tokens": 28, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -82.19415283203125, "logits_per_token": -2.318699155535017, "logits_per_char": -0.5645528378693954, "num_chars": 115}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 525, "native_id": 44404, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 39.342594146728516, "incorrect_loss_raw": 40.6571896870931, "correct_loss_per_char": 0.49800752084466476, "incorrect_loss_per_char": 0.8299463279228335, "correct_loss_per_token": 2.1856996748182507, "incorrect_loss_per_token": 3.178397595138686, "correct_loss_uncond": -25.59408950805664, "incorrect_loss_uncond": -12.758262634277344}, "model_output": [{"sum_logits": -39.8797721862793, "num_tokens": 11, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -46.93669509887695, "logits_per_token": -3.6254338351163, "logits_per_char": -0.8669515692669413, "num_chars": 46}, {"sum_logits": -39.342594146728516, "num_tokens": 18, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -64.93668365478516, "logits_per_token": -2.1856996748182507, "logits_per_char": -0.49800752084466476, "num_chars": 79}, {"sum_logits": -48.40593719482422, "num_tokens": 17, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -61.08720397949219, "logits_per_token": -2.8474080702837776, "logits_per_char": -0.7807409224971649, "num_chars": 62}, {"sum_logits": -33.68585968017578, "num_tokens": 11, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -52.22245788574219, "logits_per_token": -3.06235088001598, "logits_per_char": -0.8421464920043945, "num_chars": 40}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 526, "native_id": 39923, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 62.969482421875, "incorrect_loss_raw": 84.63440450032552, "correct_loss_per_char": 0.6360553779987373, "incorrect_loss_per_char": 0.6389723574556575, "correct_loss_per_token": 2.2489100864955356, "incorrect_loss_per_token": 2.753427561057382, "correct_loss_uncond": -23.331161499023438, "incorrect_loss_uncond": -22.06269073486328}, "model_output": [{"sum_logits": -62.96733856201172, "num_tokens": 28, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -87.70258331298828, "logits_per_token": -2.2488335200718472, "logits_per_char": -0.4843641427847055, "num_chars": 130}, {"sum_logits": -62.969482421875, "num_tokens": 28, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -86.30064392089844, "logits_per_token": -2.2489100864955356, "logits_per_char": -0.6360553779987373, "num_chars": 99}, {"sum_logits": -90.035400390625, "num_tokens": 34, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -114.365478515625, "logits_per_token": -2.6481000114889706, "logits_per_char": -0.5627212524414062, "num_chars": 160}, {"sum_logits": -100.90047454833984, "num_tokens": 30, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -118.02322387695312, "logits_per_token": -3.3633491516113283, "logits_per_char": -0.8698316771408607, "num_chars": 116}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 527, "native_id": 34195, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 34.77524185180664, "incorrect_loss_raw": 38.332496643066406, "correct_loss_per_char": 0.3951732028614391, "incorrect_loss_per_char": 0.5912046102458325, "correct_loss_per_token": 2.173452615737915, "incorrect_loss_per_token": 2.8925486549498545, "correct_loss_uncond": -34.5507926940918, "incorrect_loss_uncond": -23.29449462890625}, "model_output": [{"sum_logits": -39.62786865234375, "num_tokens": 14, "num_tokens_all": 427, "is_greedy": false, "sum_logits_uncond": -67.76420593261719, "logits_per_token": -2.830562046595982, "logits_per_char": -0.5661124093191964, "num_chars": 70}, {"sum_logits": -50.696346282958984, "num_tokens": 15, "num_tokens_all": 428, "is_greedy": false, "sum_logits_uncond": -71.70079040527344, "logits_per_token": -3.3797564188639324, "logits_per_char": -0.6337043285369873, "num_chars": 80}, {"sum_logits": -34.77524185180664, "num_tokens": 16, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -69.32603454589844, "logits_per_token": -2.173452615737915, "logits_per_char": -0.3951732028614391, "num_chars": 88}, {"sum_logits": -24.673274993896484, "num_tokens": 10, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -45.415977478027344, "logits_per_token": -2.4673274993896483, "logits_per_char": -0.5737970928813136, "num_chars": 43}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 528, "native_id": 12746, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 40.26884078979492, "incorrect_loss_raw": 104.90972646077473, "correct_loss_per_char": 0.4425147339537904, "incorrect_loss_per_char": 0.5769492552588859, "correct_loss_per_token": 1.8304018540815874, "incorrect_loss_per_token": 2.5666884822753726, "correct_loss_uncond": -26.409168243408203, "incorrect_loss_uncond": -24.90577443440755}, "model_output": [{"sum_logits": -137.1986083984375, "num_tokens": 40, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -163.87059020996094, "logits_per_token": -3.4299652099609377, "logits_per_char": -0.7108736186447538, "num_chars": 193}, {"sum_logits": -40.26884078979492, "num_tokens": 22, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -66.67800903320312, "logits_per_token": -1.8304018540815874, "logits_per_char": -0.4425147339537904, "num_chars": 91}, {"sum_logits": -90.23155975341797, "num_tokens": 53, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -110.92723083496094, "logits_per_token": -1.7024822594984523, "logits_per_char": -0.46036510078274473, "num_chars": 196}, {"sum_logits": -87.29901123046875, "num_tokens": 34, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -114.648681640625, "logits_per_token": -2.567617977366728, "logits_per_char": -0.5596090463491586, "num_chars": 156}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 529, "native_id": 22912, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 38.44917297363281, "incorrect_loss_raw": 52.34614181518555, "correct_loss_per_char": 0.5825632268732245, "incorrect_loss_per_char": 0.650796934563758, "correct_loss_per_token": 2.7463694981166293, "incorrect_loss_per_token": 3.1634254223261125, "correct_loss_uncond": -28.29034423828125, "incorrect_loss_uncond": -20.491985321044922}, "model_output": [{"sum_logits": -38.44917297363281, "num_tokens": 14, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -66.73951721191406, "logits_per_token": -2.7463694981166293, "logits_per_char": -0.5825632268732245, "num_chars": 66}, {"sum_logits": -76.44732666015625, "num_tokens": 20, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -92.17664337158203, "logits_per_token": -3.8223663330078126, "logits_per_char": -0.7350704486553485, "num_chars": 104}, {"sum_logits": -43.74699401855469, "num_tokens": 13, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -64.52021789550781, "logits_per_token": -3.3651533860426683, "logits_per_char": -0.705596677718624, "num_chars": 62}, {"sum_logits": -36.8441047668457, "num_tokens": 16, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -61.81752014160156, "logits_per_token": -2.3027565479278564, "logits_per_char": -0.5117236773173014, "num_chars": 72}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 530, "native_id": 35218, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 113.5826644897461, "incorrect_loss_raw": 119.36007690429688, "correct_loss_per_char": 0.5595205147278133, "incorrect_loss_per_char": 0.6122926786396895, "correct_loss_per_token": 2.6414573137150255, "incorrect_loss_per_token": 2.8939358682343452, "correct_loss_uncond": -27.804237365722656, "incorrect_loss_uncond": -17.387435913085938}, "model_output": [{"sum_logits": -113.5826644897461, "num_tokens": 43, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -141.38690185546875, "logits_per_token": -2.6414573137150255, "logits_per_char": -0.5595205147278133, "num_chars": 203}, {"sum_logits": -115.02536010742188, "num_tokens": 40, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -135.91580200195312, "logits_per_token": -2.8756340026855467, "logits_per_char": -0.5990904172261556, "num_chars": 192}, {"sum_logits": -124.16767883300781, "num_tokens": 40, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -133.48019409179688, "logits_per_token": -3.1041919708251955, "logits_per_char": -0.6822399935879551, "num_chars": 182}, {"sum_logits": -118.88719177246094, "num_tokens": 44, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -140.84654235839844, "logits_per_token": -2.701981631192294, "logits_per_char": -0.5555476251049577, "num_chars": 214}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 531, "native_id": 41590, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 59.01164245605469, "incorrect_loss_raw": 108.83612314860027, "correct_loss_per_char": 0.5176459864566201, "incorrect_loss_per_char": 0.7235678039534609, "correct_loss_per_token": 2.3604656982421877, "incorrect_loss_per_token": 3.12915099246276, "correct_loss_uncond": -23.196792602539062, "incorrect_loss_uncond": -21.39599863688151}, "model_output": [{"sum_logits": -87.8595199584961, "num_tokens": 33, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -111.28776550292969, "logits_per_token": -2.662409695712003, "logits_per_char": -0.5936454051249737, "num_chars": 148}, {"sum_logits": -59.01164245605469, "num_tokens": 25, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -82.20843505859375, "logits_per_token": -2.3604656982421877, "logits_per_char": -0.5176459864566201, "num_chars": 114}, {"sum_logits": -117.80404663085938, "num_tokens": 36, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -143.45321655273438, "logits_per_token": -3.2723346286349826, "logits_per_char": -0.8662062252269072, "num_chars": 136}, {"sum_logits": -120.84480285644531, "num_tokens": 35, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -135.95538330078125, "logits_per_token": -3.4527086530412947, "logits_per_char": -0.7108517815085018, "num_chars": 170}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 532, "native_id": 16914, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 75.57310485839844, "incorrect_loss_raw": 96.68253835042317, "correct_loss_per_char": 0.5725235216545336, "incorrect_loss_per_char": 0.7217536063301727, "correct_loss_per_token": 2.2227383781881893, "incorrect_loss_per_token": 2.872890789700339, "correct_loss_uncond": -42.70234680175781, "incorrect_loss_uncond": -31.92358144124349}, "model_output": [{"sum_logits": -75.57310485839844, "num_tokens": 34, "num_tokens_all": 498, "is_greedy": false, "sum_logits_uncond": -118.27545166015625, "logits_per_token": -2.2227383781881893, "logits_per_char": -0.5725235216545336, "num_chars": 132}, {"sum_logits": -98.53216552734375, "num_tokens": 34, "num_tokens_all": 498, "is_greedy": false, "sum_logits_uncond": -139.29550170898438, "logits_per_token": -2.8980048684512867, "logits_per_char": -0.7245012171128217, "num_chars": 136}, {"sum_logits": -89.45719909667969, "num_tokens": 31, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -115.62188720703125, "logits_per_token": -2.885716099892893, "logits_per_char": -0.667591038034923, "num_chars": 134}, {"sum_logits": -102.0582504272461, "num_tokens": 36, "num_tokens_all": 500, "is_greedy": false, "sum_logits_uncond": -130.90097045898438, "logits_per_token": -2.834951400756836, "logits_per_char": -0.7731685638427734, "num_chars": 132}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 533, "native_id": 49056, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 149.19351196289062, "incorrect_loss_raw": 100.48615010579427, "correct_loss_per_char": 0.43244496221127715, "incorrect_loss_per_char": 0.6253232281195641, "correct_loss_per_token": 2.2267688352670243, "incorrect_loss_per_token": 3.0512072494606564, "correct_loss_uncond": -18.11846923828125, "incorrect_loss_uncond": -16.792643229166668}, "model_output": [{"sum_logits": -73.23516845703125, "num_tokens": 30, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -88.34963989257812, "logits_per_token": -2.4411722819010415, "logits_per_char": -0.5384938857134651, "num_chars": 136}, {"sum_logits": -120.06735229492188, "num_tokens": 34, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -131.59725952148438, "logits_per_token": -3.531392714556526, "logits_per_char": -0.7366095232817292, "num_chars": 163}, {"sum_logits": -149.19351196289062, "num_tokens": 67, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -167.31198120117188, "logits_per_token": -2.2267688352670243, "logits_per_char": -0.43244496221127715, "num_chars": 345}, {"sum_logits": -108.15592956542969, "num_tokens": 34, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -131.8894805908203, "logits_per_token": -3.1810567519244026, "logits_per_char": -0.6008662753634982, "num_chars": 180}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 534, "native_id": 33294, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 35.838138580322266, "incorrect_loss_raw": 75.59072494506836, "correct_loss_per_char": 0.7027085996141621, "incorrect_loss_per_char": 0.7761318676201303, "correct_loss_per_token": 2.986511548360189, "incorrect_loss_per_token": 3.36302147496249, "correct_loss_uncond": -25.09170150756836, "incorrect_loss_uncond": -17.742677052815754}, "model_output": [{"sum_logits": -35.838138580322266, "num_tokens": 12, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -60.929840087890625, "logits_per_token": -2.986511548360189, "logits_per_char": -0.7027085996141621, "num_chars": 51}, {"sum_logits": -76.29176330566406, "num_tokens": 20, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -90.70877838134766, "logits_per_token": -3.814588165283203, "logits_per_char": -0.9536470413208008, "num_chars": 80}, {"sum_logits": -98.96866607666016, "num_tokens": 29, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -117.482666015625, "logits_per_token": -3.4127126233331087, "logits_per_char": -0.6969624371595786, "num_chars": 142}, {"sum_logits": -51.51174545288086, "num_tokens": 18, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -71.80876159667969, "logits_per_token": -2.8617636362711587, "logits_per_char": -0.6777861243800113, "num_chars": 76}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 535, "native_id": 8561, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 70.47455596923828, "incorrect_loss_raw": 121.16943359375, "correct_loss_per_char": 0.5421119689941406, "incorrect_loss_per_char": 0.6384866795901464, "correct_loss_per_token": 2.6101687396014177, "incorrect_loss_per_token": 2.985353353994864, "correct_loss_uncond": -22.56158447265625, "incorrect_loss_uncond": -17.287384033203125}, "model_output": [{"sum_logits": -159.2033233642578, "num_tokens": 50, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -173.28213500976562, "logits_per_token": -3.1840664672851564, "logits_per_char": -0.6661226919006603, "num_chars": 239}, {"sum_logits": -82.26725769042969, "num_tokens": 36, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -102.7978515625, "logits_per_token": -2.2852016025119357, "logits_per_char": -0.5141703605651855, "num_chars": 160}, {"sum_logits": -122.0377197265625, "num_tokens": 35, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -139.29046630859375, "logits_per_token": -3.4867919921875, "logits_per_char": -0.7351669863045934, "num_chars": 166}, {"sum_logits": -70.47455596923828, "num_tokens": 27, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -93.03614044189453, "logits_per_token": -2.6101687396014177, "logits_per_char": -0.5421119689941406, "num_chars": 130}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 536, "native_id": 28345, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 143.7139129638672, "incorrect_loss_raw": 91.83162307739258, "correct_loss_per_char": 0.6331009381668158, "incorrect_loss_per_char": 0.6259266920124841, "correct_loss_per_token": 2.9329369992625955, "incorrect_loss_per_token": 3.024801564063941, "correct_loss_uncond": -11.951950073242188, "incorrect_loss_uncond": -24.98239262898763}, "model_output": [{"sum_logits": -120.49945831298828, "num_tokens": 39, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -144.5233154296875, "logits_per_token": -3.089729700333033, "logits_per_char": -0.6657428636076701, "num_chars": 181}, {"sum_logits": -96.66346740722656, "num_tokens": 29, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -127.9622573852539, "logits_per_token": -3.333223014042295, "logits_per_char": -0.6401554132928912, "num_chars": 151}, {"sum_logits": -58.33194351196289, "num_tokens": 22, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -77.95647430419922, "logits_per_token": -2.651451977816495, "logits_per_char": -0.5718817991368911, "num_chars": 102}, {"sum_logits": -143.7139129638672, "num_tokens": 49, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -155.66586303710938, "logits_per_token": -2.9329369992625955, "logits_per_char": -0.6331009381668158, "num_chars": 227}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 537, "native_id": 33226, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 59.45277404785156, "incorrect_loss_raw": 105.80013020833333, "correct_loss_per_char": 0.49543978373209635, "incorrect_loss_per_char": 0.5754188214281298, "correct_loss_per_token": 1.9817591349283854, "incorrect_loss_per_token": 2.973707523471596, "correct_loss_uncond": -49.25624084472656, "incorrect_loss_uncond": -28.844624837239582}, "model_output": [{"sum_logits": -59.45277404785156, "num_tokens": 30, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -108.70901489257812, "logits_per_token": -1.9817591349283854, "logits_per_char": -0.49543978373209635, "num_chars": 120}, {"sum_logits": -79.2220458984375, "num_tokens": 33, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -110.6092529296875, "logits_per_token": -2.400668057528409, "logits_per_char": -0.5144288694703734, "num_chars": 154}, {"sum_logits": -74.83039855957031, "num_tokens": 23, "num_tokens_all": 441, "is_greedy": false, "sum_logits_uncond": -102.73580932617188, "logits_per_token": -3.253495589546535, "logits_per_char": -0.5584358101460472, "num_chars": 134}, {"sum_logits": -163.3479461669922, "num_tokens": 50, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -190.58920288085938, "logits_per_token": -3.266958923339844, "logits_per_char": -0.6533917846679688, "num_chars": 250}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 538, "native_id": 10148, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 101.80211639404297, "incorrect_loss_raw": 115.38213857014973, "correct_loss_per_char": 0.44846747310151086, "incorrect_loss_per_char": 0.599174105724887, "correct_loss_per_token": 2.120877424875895, "incorrect_loss_per_token": 2.9066796314068157, "correct_loss_uncond": -27.823585510253906, "incorrect_loss_uncond": -28.7323735555013}, "model_output": [{"sum_logits": -108.78733825683594, "num_tokens": 42, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -138.53146362304688, "logits_per_token": -2.5901747204008556, "logits_per_char": -0.5522199911514515, "num_chars": 197}, {"sum_logits": -162.57765197753906, "num_tokens": 47, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -188.04129028320312, "logits_per_token": -3.459098978245512, "logits_per_char": -0.7597086541006498, "num_chars": 214}, {"sum_logits": -101.80211639404297, "num_tokens": 48, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -129.62570190429688, "logits_per_token": -2.120877424875895, "logits_per_char": -0.44846747310151086, "num_chars": 227}, {"sum_logits": -74.78142547607422, "num_tokens": 28, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -105.77078247070312, "logits_per_token": -2.6707651955740794, "logits_per_char": -0.48559367192255987, "num_chars": 154}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 539, "native_id": 14377, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 56.27059555053711, "incorrect_loss_raw": 75.37118784586589, "correct_loss_per_char": 0.4809452611157018, "incorrect_loss_per_char": 0.5923459756139831, "correct_loss_per_token": 2.0840961315013744, "incorrect_loss_per_token": 2.710273824028644, "correct_loss_uncond": -21.55960464477539, "incorrect_loss_uncond": -22.499895731608074}, "model_output": [{"sum_logits": -57.873085021972656, "num_tokens": 30, "num_tokens_all": 497, "is_greedy": false, "sum_logits_uncond": -90.36007690429688, "logits_per_token": -1.9291028340657552, "logits_per_char": -0.47436954936043163, "num_chars": 122}, {"sum_logits": -85.91088104248047, "num_tokens": 31, "num_tokens_all": 498, "is_greedy": false, "sum_logits_uncond": -100.62611389160156, "logits_per_token": -2.7713187433058217, "logits_per_char": -0.5804789259627059, "num_chars": 148}, {"sum_logits": -56.27059555053711, "num_tokens": 27, "num_tokens_all": 494, "is_greedy": false, "sum_logits_uncond": -77.8302001953125, "logits_per_token": -2.0840961315013744, "logits_per_char": -0.4809452611157018, "num_chars": 117}, {"sum_logits": -82.32959747314453, "num_tokens": 24, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -102.62705993652344, "logits_per_token": -3.4303998947143555, "logits_per_char": -0.7221894515188116, "num_chars": 114}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 540, "native_id": 41916, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 37.702449798583984, "incorrect_loss_raw": 32.77254867553711, "correct_loss_per_char": 0.5464123159215071, "incorrect_loss_per_char": 0.6720327313966822, "correct_loss_per_token": 3.1418708165486655, "incorrect_loss_per_token": 2.817031436496311, "correct_loss_uncond": -25.520366668701172, "incorrect_loss_uncond": -17.82975133260091}, "model_output": [{"sum_logits": -34.05034255981445, "num_tokens": 11, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -52.54381561279297, "logits_per_token": -3.0954856872558594, "logits_per_char": -0.7566742791069878, "num_chars": 45}, {"sum_logits": -30.406509399414062, "num_tokens": 12, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -49.69828796386719, "logits_per_token": -2.5338757832845054, "logits_per_char": -0.6205410081513074, "num_chars": 49}, {"sum_logits": -33.86079406738281, "num_tokens": 12, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -49.564796447753906, "logits_per_token": -2.821732838948568, "logits_per_char": -0.6388829069317512, "num_chars": 53}, {"sum_logits": -37.702449798583984, "num_tokens": 12, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -63.222816467285156, "logits_per_token": -3.1418708165486655, "logits_per_char": -0.5464123159215071, "num_chars": 69}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 541, "native_id": 15582, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 25.21062469482422, "incorrect_loss_raw": 32.58625793457031, "correct_loss_per_char": 0.6813682349952491, "incorrect_loss_per_char": 0.830315973856547, "correct_loss_per_token": 3.1513280868530273, "incorrect_loss_per_token": 3.6177871386210128, "correct_loss_uncond": -26.22597885131836, "incorrect_loss_uncond": -13.100337982177734}, "model_output": [{"sum_logits": -25.21062469482422, "num_tokens": 8, "num_tokens_all": 393, "is_greedy": false, "sum_logits_uncond": -51.43660354614258, "logits_per_token": -3.1513280868530273, "logits_per_char": -0.6813682349952491, "num_chars": 37}, {"sum_logits": -32.58592987060547, "num_tokens": 9, "num_tokens_all": 394, "is_greedy": false, "sum_logits_uncond": -44.093833923339844, "logits_per_token": -3.6206588745117188, "logits_per_char": -0.9051647186279297, "num_chars": 36}, {"sum_logits": -28.61672592163086, "num_tokens": 8, "num_tokens_all": 393, "is_greedy": false, "sum_logits_uncond": -46.544185638427734, "logits_per_token": -3.5770907402038574, "logits_per_char": -0.7734250249089422, "num_chars": 37}, {"sum_logits": -36.55611801147461, "num_tokens": 10, "num_tokens_all": 395, "is_greedy": false, "sum_logits_uncond": -46.42176818847656, "logits_per_token": -3.6556118011474608, "logits_per_char": -0.8123581780327691, "num_chars": 45}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 542, "native_id": 29383, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 103.12026977539062, "incorrect_loss_raw": 119.10731760660808, "correct_loss_per_char": 0.610179111096986, "incorrect_loss_per_char": 0.5579315048276073, "correct_loss_per_token": 2.864451938205295, "incorrect_loss_per_token": 2.6239345163706083, "correct_loss_uncond": -29.290695190429688, "incorrect_loss_uncond": -27.601468404134113}, "model_output": [{"sum_logits": -98.13055419921875, "num_tokens": 41, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -120.45616149902344, "logits_per_token": -2.393428151200457, "logits_per_char": -0.5058275989650451, "num_chars": 194}, {"sum_logits": -103.12026977539062, "num_tokens": 36, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -132.4109649658203, "logits_per_token": -2.864451938205295, "logits_per_char": -0.610179111096986, "num_chars": 169}, {"sum_logits": -108.60588836669922, "num_tokens": 43, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -132.69293212890625, "logits_per_token": -2.525718334109284, "logits_per_char": -0.507504151246258, "num_chars": 214}, {"sum_logits": -150.58551025390625, "num_tokens": 51, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -186.97726440429688, "logits_per_token": -2.9526570638020835, "logits_per_char": -0.6604627642715186, "num_chars": 228}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 543, "native_id": 4021, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 150.84996032714844, "incorrect_loss_raw": 110.3614896138509, "correct_loss_per_char": 0.6704442681206597, "incorrect_loss_per_char": 0.6185468363716734, "correct_loss_per_token": 3.0169992065429687, "incorrect_loss_per_token": 3.117684990302921, "correct_loss_uncond": -26.882675170898438, "incorrect_loss_uncond": -20.617743174235027}, "model_output": [{"sum_logits": -159.56668090820312, "num_tokens": 45, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -183.73020935058594, "logits_per_token": -3.545926242404514, "logits_per_char": -0.6408300438080446, "num_chars": 249}, {"sum_logits": -150.84996032714844, "num_tokens": 50, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -177.73263549804688, "logits_per_token": -3.0169992065429687, "logits_per_char": -0.6704442681206597, "num_chars": 225}, {"sum_logits": -114.12117004394531, "num_tokens": 31, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -133.54623413085938, "logits_per_token": -3.68132806593372, "logits_per_char": -0.6833602996643432, "num_chars": 167}, {"sum_logits": -57.3966178894043, "num_tokens": 27, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -75.6612548828125, "logits_per_token": -2.1258006625705295, "logits_per_char": -0.5314501656426324, "num_chars": 108}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 544, "native_id": 29694, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 89.03421020507812, "incorrect_loss_raw": 130.3098347981771, "correct_loss_per_char": 0.43644220688763785, "incorrect_loss_per_char": 0.5877381945203607, "correct_loss_per_token": 2.070563028025073, "incorrect_loss_per_token": 2.738787062176796, "correct_loss_uncond": -32.31488800048828, "incorrect_loss_uncond": -25.51904296875}, "model_output": [{"sum_logits": -89.03421020507812, "num_tokens": 43, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -121.3490982055664, "logits_per_token": -2.070563028025073, "logits_per_char": -0.43644220688763785, "num_chars": 204}, {"sum_logits": -104.43557739257812, "num_tokens": 43, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -124.16018676757812, "logits_per_token": -2.428734357966933, "logits_per_char": -0.5645166345544763, "num_chars": 185}, {"sum_logits": -141.48330688476562, "num_tokens": 49, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -180.6856689453125, "logits_per_token": -2.887414426219707, "logits_per_char": -0.6344542909630746, "num_chars": 223}, {"sum_logits": -145.0106201171875, "num_tokens": 50, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -162.64077758789062, "logits_per_token": -2.90021240234375, "logits_per_char": -0.5642436580435312, "num_chars": 257}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 545, "native_id": 6492, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 100.399169921875, "incorrect_loss_raw": 97.43738301595052, "correct_loss_per_char": 0.42362518954377637, "incorrect_loss_per_char": 0.545109348200254, "correct_loss_per_token": 2.1825906504755435, "incorrect_loss_per_token": 2.654809586661869, "correct_loss_uncond": -21.388084411621094, "incorrect_loss_uncond": -11.112078348795572}, "model_output": [{"sum_logits": -65.42631530761719, "num_tokens": 32, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -77.01179504394531, "logits_per_token": -2.044572353363037, "logits_per_char": -0.4882560843852029, "num_chars": 134}, {"sum_logits": -144.15943908691406, "num_tokens": 47, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -158.5427703857422, "logits_per_token": -3.067222108232214, "logits_per_char": -0.6267801699431046, "num_chars": 230}, {"sum_logits": -100.399169921875, "num_tokens": 46, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -121.7872543334961, "logits_per_token": -2.1825906504755435, "logits_per_char": -0.42362518954377637, "num_chars": 237}, {"sum_logits": -82.72639465332031, "num_tokens": 29, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -90.09381866455078, "logits_per_token": -2.8526342983903556, "logits_per_char": -0.5202917902724548, "num_chars": 159}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 546, "native_id": 15769, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 25.498632431030273, "incorrect_loss_raw": 33.38883972167969, "correct_loss_per_char": 0.3922866527850811, "incorrect_loss_per_char": 0.6100002616342872, "correct_loss_per_token": 1.8213308879307337, "incorrect_loss_per_token": 3.0900964044098163, "correct_loss_uncond": -30.786149978637695, "incorrect_loss_uncond": -21.696491241455078}, "model_output": [{"sum_logits": -28.219844818115234, "num_tokens": 8, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -54.81953811645508, "logits_per_token": -3.5274806022644043, "logits_per_char": -0.641360109502619, "num_chars": 44}, {"sum_logits": -39.42862319946289, "num_tokens": 13, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -54.153175354003906, "logits_per_token": -3.0329710153432994, "logits_per_char": -0.5974033818100438, "num_chars": 66}, {"sum_logits": -25.498632431030273, "num_tokens": 14, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -56.28478240966797, "logits_per_token": -1.8213308879307337, "logits_per_char": -0.3922866527850811, "num_chars": 65}, {"sum_logits": -32.51805114746094, "num_tokens": 12, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -56.28327941894531, "logits_per_token": -2.7098375956217446, "logits_per_char": -0.5912372935901988, "num_chars": 55}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 547, "native_id": 8522, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 96.2850341796875, "incorrect_loss_raw": 100.92735290527344, "correct_loss_per_char": 0.5204596442145271, "incorrect_loss_per_char": 0.588245533736506, "correct_loss_per_token": 2.7510009765625, "incorrect_loss_per_token": 2.6424352265602002, "correct_loss_uncond": -25.89324951171875, "incorrect_loss_uncond": -21.34099833170573}, "model_output": [{"sum_logits": -106.25811767578125, "num_tokens": 39, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -134.60462951660156, "logits_per_token": -2.724567119891827, "logits_per_char": -0.5712802025579637, "num_chars": 186}, {"sum_logits": -66.02897644042969, "num_tokens": 26, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -82.93453979492188, "logits_per_token": -2.5395760169396033, "logits_per_char": -0.5948556436074747, "num_chars": 111}, {"sum_logits": -130.49496459960938, "num_tokens": 49, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -149.26588439941406, "logits_per_token": -2.6631625428491708, "logits_per_char": -0.5986007550440797, "num_chars": 218}, {"sum_logits": -96.2850341796875, "num_tokens": 35, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -122.17828369140625, "logits_per_token": -2.7510009765625, "logits_per_char": -0.5204596442145271, "num_chars": 185}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 548, "native_id": 39183, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 23.064010620117188, "incorrect_loss_raw": 38.82037226359049, "correct_loss_per_char": 0.5363723400027253, "incorrect_loss_per_char": 0.7469399939900936, "correct_loss_per_token": 2.3064010620117186, "incorrect_loss_per_token": 3.572969190038816, "correct_loss_uncond": -25.988651275634766, "incorrect_loss_uncond": -22.02971903483073}, "model_output": [{"sum_logits": -35.33790588378906, "num_tokens": 11, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -54.79256057739258, "logits_per_token": -3.2125368985262783, "logits_per_char": -0.7362063725789388, "num_chars": 48}, {"sum_logits": -23.064010620117188, "num_tokens": 10, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -49.05266189575195, "logits_per_token": -2.3064010620117186, "logits_per_char": -0.5363723400027253, "num_chars": 43}, {"sum_logits": -44.76618576049805, "num_tokens": 10, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -56.32231903076172, "logits_per_token": -4.4766185760498045, "logits_per_char": -0.8777683482450598, "num_chars": 51}, {"sum_logits": -36.357025146484375, "num_tokens": 12, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -71.43539428710938, "logits_per_token": -3.0297520955403647, "logits_per_char": -0.6268452611462824, "num_chars": 58}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 549, "native_id": 44021, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 75.80054473876953, "incorrect_loss_raw": 82.09413019816081, "correct_loss_per_char": 0.45389547747766185, "incorrect_loss_per_char": 0.5349265917432792, "correct_loss_per_token": 1.9947511773360402, "incorrect_loss_per_token": 2.4101526260375974, "correct_loss_uncond": -33.670989990234375, "incorrect_loss_uncond": -33.3940060933431}, "model_output": [{"sum_logits": -106.968017578125, "num_tokens": 44, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -145.591064453125, "logits_per_token": -2.43109130859375, "logits_per_char": -0.5485539362980769, "num_chars": 195}, {"sum_logits": -75.80054473876953, "num_tokens": 38, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -109.4715347290039, "logits_per_token": -1.9947511773360402, "logits_per_char": -0.45389547747766185, "num_chars": 167}, {"sum_logits": -50.94770431518555, "num_tokens": 25, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -77.9261703491211, "logits_per_token": -2.037908172607422, "logits_per_char": -0.4631609483198686, "num_chars": 110}, {"sum_logits": -88.36666870117188, "num_tokens": 32, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -122.94717407226562, "logits_per_token": -2.761458396911621, "logits_per_char": -0.5930648906118918, "num_chars": 149}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 550, "native_id": 36579, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.472794532775879, "incorrect_loss_raw": 19.298169771830242, "correct_loss_per_char": 0.27331595267018965, "incorrect_loss_per_char": 0.7130981891874283, "correct_loss_per_token": 1.2103992189679826, "incorrect_loss_per_token": 3.185808996170286, "correct_loss_uncond": -25.475245475769043, "incorrect_loss_uncond": -8.675559679667154}, "model_output": [{"sum_logits": -8.472794532775879, "num_tokens": 7, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -33.94804000854492, "logits_per_token": -1.2103992189679826, "logits_per_char": -0.27331595267018965, "num_chars": 31}, {"sum_logits": -16.348064422607422, "num_tokens": 6, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -26.485919952392578, "logits_per_token": -2.724677403767904, "logits_per_char": -0.5838594436645508, "num_chars": 28}, {"sum_logits": -25.839439392089844, "num_tokens": 7, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -31.09395980834961, "logits_per_token": -3.6913484845842635, "logits_per_char": -0.8074824810028076, "num_chars": 32}, {"sum_logits": -15.707005500793457, "num_tokens": 5, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -26.34130859375, "logits_per_token": -3.1414011001586912, "logits_per_char": -0.7479526428949266, "num_chars": 21}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 551, "native_id": 23783, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 83.45189666748047, "incorrect_loss_raw": 95.75343831380208, "correct_loss_per_char": 0.5876894131512709, "incorrect_loss_per_char": 0.5551428508017621, "correct_loss_per_token": 2.6919966666929183, "incorrect_loss_per_token": 2.3857985008824194, "correct_loss_uncond": -17.89173126220703, "incorrect_loss_uncond": -19.42089335123698}, "model_output": [{"sum_logits": -112.66381072998047, "num_tokens": 46, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -126.41744995117188, "logits_per_token": -2.449213276738706, "logits_per_char": -0.5992755889892578, "num_chars": 188}, {"sum_logits": -83.45189666748047, "num_tokens": 31, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -101.3436279296875, "logits_per_token": -2.6919966666929183, "logits_per_char": -0.5876894131512709, "num_chars": 142}, {"sum_logits": -96.11549377441406, "num_tokens": 35, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -115.41043090820312, "logits_per_token": -2.746156964983259, "logits_per_char": -0.6044999608453715, "num_chars": 159}, {"sum_logits": -78.48101043701172, "num_tokens": 40, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -103.69511413574219, "logits_per_token": -1.962025260925293, "logits_per_char": -0.46165300257065717, "num_chars": 170}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 552, "native_id": 2547, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 78.56907653808594, "incorrect_loss_raw": 111.68463389078777, "correct_loss_per_char": 0.5308721387708509, "incorrect_loss_per_char": 0.6257501514557372, "correct_loss_per_token": 2.9099657977068865, "incorrect_loss_per_token": 2.8868324626764696, "correct_loss_uncond": -24.888839721679688, "incorrect_loss_uncond": -18.371815999348957}, "model_output": [{"sum_logits": -78.56907653808594, "num_tokens": 27, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -103.45791625976562, "logits_per_token": -2.9099657977068865, "logits_per_char": -0.5308721387708509, "num_chars": 148}, {"sum_logits": -91.69184875488281, "num_tokens": 34, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -95.57132720947266, "logits_per_token": -2.696819081025965, "logits_per_char": -0.5992931291168812, "num_chars": 153}, {"sum_logits": -114.67853546142578, "num_tokens": 48, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -145.54534912109375, "logits_per_token": -2.3891361554463706, "logits_per_char": -0.5383968801005905, "num_chars": 213}, {"sum_logits": -128.6835174560547, "num_tokens": 36, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -149.05267333984375, "logits_per_token": -3.5745421515570746, "logits_per_char": -0.7395604451497396, "num_chars": 174}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 553, "native_id": 386, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 91.40592956542969, "incorrect_loss_raw": 150.38886006673178, "correct_loss_per_char": 0.5193518725308505, "incorrect_loss_per_char": 0.593502734217654, "correct_loss_per_token": 2.3437417837289662, "incorrect_loss_per_token": 2.961005300821668, "correct_loss_uncond": -14.272758483886719, "incorrect_loss_uncond": -13.288726806640625}, "model_output": [{"sum_logits": -145.4471893310547, "num_tokens": 46, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -161.2594451904297, "logits_per_token": -3.1618954202403193, "logits_per_char": -0.6464319525824653, "num_chars": 225}, {"sum_logits": -91.40592956542969, "num_tokens": 39, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -105.6786880493164, "logits_per_token": -2.3437417837289662, "logits_per_char": -0.5193518725308505, "num_chars": 176}, {"sum_logits": -105.84550476074219, "num_tokens": 40, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -118.85845947265625, "logits_per_token": -2.6461376190185546, "logits_per_char": -0.563008004046501, "num_chars": 188}, {"sum_logits": -199.87388610839844, "num_tokens": 65, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -210.91485595703125, "logits_per_token": -3.07498286320613, "logits_per_char": -0.5710682460239955, "num_chars": 350}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 554, "native_id": 43867, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 60.16413116455078, "incorrect_loss_raw": 108.52349344889323, "correct_loss_per_char": 0.43283547600396244, "incorrect_loss_per_char": 0.671675301437118, "correct_loss_per_token": 1.7189751761300223, "incorrect_loss_per_token": 2.7003119211348277, "correct_loss_uncond": -43.583038330078125, "incorrect_loss_uncond": -29.499127705891926}, "model_output": [{"sum_logits": -137.93511962890625, "num_tokens": 49, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -178.75689697265625, "logits_per_token": -2.81500244140625, "logits_per_char": -0.78820068359375, "num_chars": 175}, {"sum_logits": -69.74188232421875, "num_tokens": 21, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -95.48168182373047, "logits_per_token": -3.321042015438988, "logits_per_char": -0.7836166553283006, "num_chars": 89}, {"sum_logits": -60.16413116455078, "num_tokens": 35, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -103.7471694946289, "logits_per_token": -1.7189751761300223, "logits_per_char": -0.43283547600396244, "num_chars": 139}, {"sum_logits": -117.89347839355469, "num_tokens": 60, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -139.82928466796875, "logits_per_token": -1.9648913065592448, "logits_per_char": -0.4432085653893033, "num_chars": 266}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 555, "native_id": 39299, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 83.93466186523438, "incorrect_loss_raw": 70.06773630777995, "correct_loss_per_char": 0.4054814582861564, "incorrect_loss_per_char": 0.5418098651957903, "correct_loss_per_token": 1.7486387888590496, "incorrect_loss_per_token": 2.4129533266706535, "correct_loss_uncond": -31.132232666015625, "incorrect_loss_uncond": -28.49927012125651}, "model_output": [{"sum_logits": -81.73399353027344, "num_tokens": 29, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -116.79890441894531, "logits_per_token": -2.818413570009429, "logits_per_char": -0.6699507666415856, "num_chars": 122}, {"sum_logits": -83.93466186523438, "num_tokens": 48, "num_tokens_all": 507, "is_greedy": false, "sum_logits_uncond": -115.06689453125, "logits_per_token": -1.7486387888590496, "logits_per_char": -0.4054814582861564, "num_chars": 207}, {"sum_logits": -37.29759216308594, "num_tokens": 27, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -68.84622955322266, "logits_per_token": -1.3813923023365162, "logits_per_char": -0.3134251442276129, "num_chars": 119}, {"sum_logits": -91.17162322998047, "num_tokens": 30, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -110.0558853149414, "logits_per_token": -3.039054107666016, "logits_per_char": -0.6420536847181724, "num_chars": 142}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 556, "native_id": 36901, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 31.399169921875, "incorrect_loss_raw": 43.96610768636068, "correct_loss_per_char": 0.5321893207097458, "incorrect_loss_per_char": 0.7643781907054416, "correct_loss_per_token": 3.1399169921875, "incorrect_loss_per_token": 3.4426116980758366, "correct_loss_uncond": -19.318313598632812, "incorrect_loss_uncond": -22.956153869628906}, "model_output": [{"sum_logits": -31.399169921875, "num_tokens": 10, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -50.71748352050781, "logits_per_token": -3.1399169921875, "logits_per_char": -0.5321893207097458, "num_chars": 59}, {"sum_logits": -32.94892883300781, "num_tokens": 10, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -56.808563232421875, "logits_per_token": -3.2948928833007813, "logits_per_char": -0.8036324105611662, "num_chars": 41}, {"sum_logits": -49.46549606323242, "num_tokens": 12, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -65.59393310546875, "logits_per_token": -4.122124671936035, "logits_per_char": -0.8383982383598716, "num_chars": 59}, {"sum_logits": -49.4838981628418, "num_tokens": 17, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -78.36428833007812, "logits_per_token": -2.910817538990694, "logits_per_char": -0.6511039231952868, "num_chars": 76}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 557, "native_id": 1826, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 106.24650573730469, "incorrect_loss_raw": 61.731072743733726, "correct_loss_per_char": 0.5869972692668767, "incorrect_loss_per_char": 0.43669075721460726, "correct_loss_per_token": 2.795960677297492, "incorrect_loss_per_token": 2.088886033039467, "correct_loss_uncond": -14.001747131347656, "incorrect_loss_uncond": -27.818541208902996}, "model_output": [{"sum_logits": -106.24650573730469, "num_tokens": 38, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -120.24825286865234, "logits_per_token": -2.795960677297492, "logits_per_char": -0.5869972692668767, "num_chars": 181}, {"sum_logits": -66.93324279785156, "num_tokens": 25, "num_tokens_all": 440, "is_greedy": false, "sum_logits_uncond": -81.35997009277344, "logits_per_token": -2.6773297119140627, "logits_per_char": -0.5820281982421875, "num_chars": 115}, {"sum_logits": -57.8249397277832, "num_tokens": 34, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -93.00421905517578, "logits_per_token": -1.7007335214053882, "logits_per_char": -0.368311718011358, "num_chars": 157}, {"sum_logits": -60.435035705566406, "num_tokens": 32, "num_tokens_all": 447, "is_greedy": false, "sum_logits_uncond": -94.28465270996094, "logits_per_token": -1.8885948657989502, "logits_per_char": -0.3597323553902762, "num_chars": 168}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 558, "native_id": 17646, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 98.3874740600586, "incorrect_loss_raw": 137.44520823160806, "correct_loss_per_char": 0.7453596519701409, "incorrect_loss_per_char": 0.7625691893117991, "correct_loss_per_token": 3.1737894858083417, "incorrect_loss_per_token": 3.5508488150069346, "correct_loss_uncond": -16.04460906982422, "incorrect_loss_uncond": -16.24230194091797}, "model_output": [{"sum_logits": -110.1625747680664, "num_tokens": 37, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -132.53067016601562, "logits_per_token": -2.9773668856234163, "logits_per_char": -0.6367778888327538, "num_chars": 173}, {"sum_logits": -188.71229553222656, "num_tokens": 47, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -197.12022399902344, "logits_per_token": -4.015155224089927, "logits_per_char": -0.846243477722989, "num_chars": 223}, {"sum_logits": -113.46075439453125, "num_tokens": 31, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -131.41163635253906, "logits_per_token": -3.6600243353074595, "logits_per_char": -0.8046862013796543, "num_chars": 141}, {"sum_logits": -98.3874740600586, "num_tokens": 31, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -114.43208312988281, "logits_per_token": -3.1737894858083417, "logits_per_char": -0.7453596519701409, "num_chars": 132}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 559, "native_id": 26038, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 82.3157958984375, "incorrect_loss_raw": 122.65337371826172, "correct_loss_per_char": 0.47307928677262934, "incorrect_loss_per_char": 0.6765435044133454, "correct_loss_per_token": 2.110661433293269, "incorrect_loss_per_token": 3.108023557857591, "correct_loss_uncond": -19.513694763183594, "incorrect_loss_uncond": -18.87902577718099}, "model_output": [{"sum_logits": -97.62215423583984, "num_tokens": 40, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -128.19601440429688, "logits_per_token": -2.4405538558959963, "logits_per_char": -0.5546713308854536, "num_chars": 176}, {"sum_logits": -82.3157958984375, "num_tokens": 39, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -101.8294906616211, "logits_per_token": -2.110661433293269, "logits_per_char": -0.47307928677262934, "num_chars": 174}, {"sum_logits": -181.06549072265625, "num_tokens": 49, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -193.7032928466797, "logits_per_token": -3.69521409638074, "logits_per_char": -0.76077937278427, "num_chars": 238}, {"sum_logits": -89.27247619628906, "num_tokens": 28, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -102.69789123535156, "logits_per_token": -3.1883027212960378, "logits_per_char": -0.7141798095703125, "num_chars": 125}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 560, "native_id": 402, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 28.861289978027344, "incorrect_loss_raw": 29.227245966593426, "correct_loss_per_char": 0.7215322494506836, "incorrect_loss_per_char": 0.606782375869935, "correct_loss_per_token": 3.2068099975585938, "incorrect_loss_per_token": 2.852022120891473, "correct_loss_uncond": -20.101348876953125, "incorrect_loss_uncond": -14.621161778767904}, "model_output": [{"sum_logits": -33.08496856689453, "num_tokens": 10, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -40.963565826416016, "logits_per_token": -3.308496856689453, "logits_per_char": -0.7039355014232879, "num_chars": 47}, {"sum_logits": -28.861289978027344, "num_tokens": 9, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -48.96263885498047, "logits_per_token": -3.2068099975585938, "logits_per_char": -0.7215322494506836, "num_chars": 40}, {"sum_logits": -21.794614791870117, "num_tokens": 8, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -35.04534912109375, "logits_per_token": -2.7243268489837646, "logits_per_char": -0.6410180821138269, "num_chars": 34}, {"sum_logits": -32.802154541015625, "num_tokens": 13, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -55.53630828857422, "logits_per_token": -2.523242657001202, "logits_per_char": -0.47539354407269024, "num_chars": 69}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 561, "native_id": 38736, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 87.07633209228516, "incorrect_loss_raw": 120.4929936726888, "correct_loss_per_char": 0.3994327160196567, "incorrect_loss_per_char": 0.5225416185980244, "correct_loss_per_token": 2.0250309788903524, "incorrect_loss_per_token": 2.5235082999739067, "correct_loss_uncond": -21.326499938964844, "incorrect_loss_uncond": -20.174888610839844}, "model_output": [{"sum_logits": -134.4683837890625, "num_tokens": 50, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -153.0072479248047, "logits_per_token": -2.68936767578125, "logits_per_char": -0.5488505460778061, "num_chars": 245}, {"sum_logits": -87.07633209228516, "num_tokens": 43, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -108.40283203125, "logits_per_token": -2.0250309788903524, "logits_per_char": -0.3994327160196567, "num_chars": 218}, {"sum_logits": -83.40157318115234, "num_tokens": 36, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -106.25308227539062, "logits_per_token": -2.3167103661431208, "logits_per_char": -0.4685481639390581, "num_chars": 178}, {"sum_logits": -143.60902404785156, "num_tokens": 56, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -162.74331665039062, "logits_per_token": -2.5644468579973494, "logits_per_char": -0.5502261457772091, "num_chars": 261}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 562, "native_id": 18101, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 154.16078186035156, "incorrect_loss_raw": 139.7648951212565, "correct_loss_per_char": 0.8029207388559977, "incorrect_loss_per_char": 0.9376741727920637, "correct_loss_per_token": 3.1461384053132972, "incorrect_loss_per_token": 3.7720946827463044, "correct_loss_uncond": -28.238143920898438, "incorrect_loss_uncond": -21.136924743652344}, "model_output": [{"sum_logits": -85.82518005371094, "num_tokens": 33, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -113.01948547363281, "logits_per_token": -2.6007630319306343, "logits_per_char": -0.7152098337809245, "num_chars": 120}, {"sum_logits": -85.54564666748047, "num_tokens": 29, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -113.05792236328125, "logits_per_token": -2.9498498850855333, "logits_per_char": -0.7504004093638638, "num_chars": 114}, {"sum_logits": -247.92385864257812, "num_tokens": 43, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -256.6280517578125, "logits_per_token": -5.765671131222747, "logits_per_char": -1.3474122752314028, "num_chars": 184}, {"sum_logits": -154.16078186035156, "num_tokens": 49, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -182.39892578125, "logits_per_token": -3.1461384053132972, "logits_per_char": -0.8029207388559977, "num_chars": 192}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 563, "native_id": 48177, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 30.558799743652344, "incorrect_loss_raw": 49.60922622680664, "correct_loss_per_char": 0.41861369511852525, "incorrect_loss_per_char": 0.7031482095379644, "correct_loss_per_token": 1.7975764555089615, "incorrect_loss_per_token": 3.0675700528610155, "correct_loss_uncond": -28.31873321533203, "incorrect_loss_uncond": -25.547611236572266}, "model_output": [{"sum_logits": -64.47772216796875, "num_tokens": 23, "num_tokens_all": 432, "is_greedy": false, "sum_logits_uncond": -89.90132904052734, "logits_per_token": -2.8033792246942935, "logits_per_char": -0.6025955342800817, "num_chars": 107}, {"sum_logits": -30.558799743652344, "num_tokens": 17, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -58.877532958984375, "logits_per_token": -1.7975764555089615, "logits_per_char": -0.41861369511852525, "num_chars": 73}, {"sum_logits": -49.73296356201172, "num_tokens": 24, "num_tokens_all": 433, "is_greedy": false, "sum_logits_uncond": -84.01575469970703, "logits_per_token": -2.0722068150838218, "logits_per_char": -0.4250680646325788, "num_chars": 117}, {"sum_logits": -34.61699295043945, "num_tokens": 8, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -51.553428649902344, "logits_per_token": -4.327124118804932, "logits_per_char": -1.081781029701233, "num_chars": 32}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 564, "native_id": 3226, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 122.56623840332031, "incorrect_loss_raw": 113.98790486653645, "correct_loss_per_char": 0.41974739179219284, "incorrect_loss_per_char": 0.48649680534225137, "correct_loss_per_token": 2.1502848842687774, "incorrect_loss_per_token": 2.5400087667483953, "correct_loss_uncond": -43.27516174316406, "incorrect_loss_uncond": -31.023239135742188}, "model_output": [{"sum_logits": -122.56623840332031, "num_tokens": 57, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -165.84140014648438, "logits_per_token": -2.1502848842687774, "logits_per_char": -0.41974739179219284, "num_chars": 292}, {"sum_logits": -135.15792846679688, "num_tokens": 51, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -160.98104858398438, "logits_per_token": -2.650155460133272, "logits_per_char": -0.47758985323956493, "num_chars": 283}, {"sum_logits": -103.58929443359375, "num_tokens": 38, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -128.8214111328125, "logits_per_token": -2.726034064041941, "logits_per_char": -0.5787111420871159, "num_chars": 179}, {"sum_logits": -103.21649169921875, "num_tokens": 46, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -145.23097229003906, "logits_per_token": -2.2438367760699727, "logits_per_char": -0.40318942070007324, "num_chars": 256}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 565, "native_id": 43055, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 104.74382019042969, "incorrect_loss_raw": 101.99752298990886, "correct_loss_per_char": 0.6546488761901855, "incorrect_loss_per_char": 0.7460285266169033, "correct_loss_per_token": 3.2732443809509277, "incorrect_loss_per_token": 3.070306074564995, "correct_loss_uncond": -25.319381713867188, "incorrect_loss_uncond": -18.1407470703125}, "model_output": [{"sum_logits": -142.96131896972656, "num_tokens": 46, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -156.86569213867188, "logits_per_token": -3.107854760211447, "logits_per_char": -0.7331349690755208, "num_chars": 195}, {"sum_logits": -104.74382019042969, "num_tokens": 32, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -130.06320190429688, "logits_per_token": -3.2732443809509277, "logits_per_char": -0.6546488761901855, "num_chars": 160}, {"sum_logits": -65.45439147949219, "num_tokens": 25, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -91.1312255859375, "logits_per_token": -2.6181756591796876, "logits_per_char": -0.5844142096383231, "num_chars": 112}, {"sum_logits": -97.57685852050781, "num_tokens": 28, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -112.41789245605469, "logits_per_token": -3.4848878043038503, "logits_per_char": -0.9205364011368662, "num_chars": 106}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 566, "native_id": 3808, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 26.777002334594727, "incorrect_loss_raw": 36.076714833577476, "correct_loss_per_char": 0.46167245404473667, "incorrect_loss_per_char": 0.6859284257795842, "correct_loss_per_token": 1.9126430238996233, "incorrect_loss_per_token": 3.084462144639757, "correct_loss_uncond": -37.52474021911621, "incorrect_loss_uncond": -18.932493845621746}, "model_output": [{"sum_logits": -26.777002334594727, "num_tokens": 14, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -64.30174255371094, "logits_per_token": -1.9126430238996233, "logits_per_char": -0.46167245404473667, "num_chars": 58}, {"sum_logits": -31.95244598388672, "num_tokens": 12, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -59.82242965698242, "logits_per_token": -2.66270383199056, "logits_per_char": -0.532540766398112, "num_chars": 60}, {"sum_logits": -36.29805374145508, "num_tokens": 14, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -55.394805908203125, "logits_per_token": -2.5927181243896484, "logits_per_char": -0.6368079603764049, "num_chars": 57}, {"sum_logits": -39.979644775390625, "num_tokens": 10, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -49.81039047241211, "logits_per_token": -3.9979644775390626, "logits_per_char": -0.8884365505642361, "num_chars": 45}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 567, "native_id": 4061, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 71.10203552246094, "incorrect_loss_raw": 46.00823847452799, "correct_loss_per_char": 0.703980549727336, "incorrect_loss_per_char": 0.6392353435810375, "correct_loss_per_token": 3.7422123959189966, "incorrect_loss_per_token": 3.181558190587579, "correct_loss_uncond": -23.917938232421875, "incorrect_loss_uncond": -23.236573537190754}, "model_output": [{"sum_logits": -61.7481689453125, "num_tokens": 18, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -80.18679809570312, "logits_per_token": -3.430453830295139, "logits_per_char": -0.7530264505525914, "num_chars": 82}, {"sum_logits": -38.4998779296875, "num_tokens": 12, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -59.28883361816406, "logits_per_token": -3.2083231608072915, "logits_per_char": -0.5923058143028846, "num_chars": 65}, {"sum_logits": -37.776668548583984, "num_tokens": 13, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -68.25880432128906, "logits_per_token": -2.9058975806603065, "logits_per_char": -0.5723737658876361, "num_chars": 66}, {"sum_logits": -71.10203552246094, "num_tokens": 19, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -95.01997375488281, "logits_per_token": -3.7422123959189966, "logits_per_char": -0.703980549727336, "num_chars": 101}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 568, "native_id": 20103, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 178.0765380859375, "incorrect_loss_raw": 136.82437896728516, "correct_loss_per_char": 0.5363751147166792, "incorrect_loss_per_char": 0.731548990807729, "correct_loss_per_token": 2.5439505440848214, "incorrect_loss_per_token": 3.327238861004119, "correct_loss_uncond": -9.21282958984375, "incorrect_loss_uncond": -14.85784657796224}, "model_output": [{"sum_logits": -175.28382873535156, "num_tokens": 43, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -185.46746826171875, "logits_per_token": -4.076368110124455, "logits_per_char": -0.8346848987397694, "num_chars": 210}, {"sum_logits": -131.895263671875, "num_tokens": 46, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -156.91175842285156, "logits_per_token": -2.8672883406929346, "logits_per_char": -0.6221474701503538, "num_chars": 212}, {"sum_logits": -103.2940444946289, "num_tokens": 34, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -112.66744995117188, "logits_per_token": -3.0380601321949676, "logits_per_char": -0.7378146035330636, "num_chars": 140}, {"sum_logits": -178.0765380859375, "num_tokens": 70, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -187.28936767578125, "logits_per_token": -2.5439505440848214, "logits_per_char": -0.5363751147166792, "num_chars": 332}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 569, "native_id": 29459, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 150.12948608398438, "incorrect_loss_raw": 152.20291900634766, "correct_loss_per_char": 0.5499248574504922, "incorrect_loss_per_char": 0.6814596629598008, "correct_loss_per_token": 2.6338506330523574, "incorrect_loss_per_token": 3.2766953414819913, "correct_loss_uncond": -30.992935180664062, "incorrect_loss_uncond": -16.2198003133138}, "model_output": [{"sum_logits": -219.37200927734375, "num_tokens": 65, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -238.15505981445312, "logits_per_token": -3.3749539888822113, "logits_per_char": -0.7076516428301411, "num_chars": 310}, {"sum_logits": -94.01314544677734, "num_tokens": 35, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -102.43536376953125, "logits_per_token": -2.6860898699079243, "logits_per_char": -0.5497844762969435, "num_chars": 171}, {"sum_logits": -143.22360229492188, "num_tokens": 38, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -164.677734375, "logits_per_token": -3.7690421656558386, "logits_per_char": -0.786942869752318, "num_chars": 182}, {"sum_logits": -150.12948608398438, "num_tokens": 57, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -181.12242126464844, "logits_per_token": -2.6338506330523574, "logits_per_char": -0.5499248574504922, "num_chars": 273}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 570, "native_id": 4874, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 50.693424224853516, "incorrect_loss_raw": 108.49458821614583, "correct_loss_per_char": 0.4296052900411315, "incorrect_loss_per_char": 0.8133685556511784, "correct_loss_per_token": 2.1122260093688965, "incorrect_loss_per_token": 3.516097231144544, "correct_loss_uncond": -26.008625030517578, "incorrect_loss_uncond": -17.681223551432293}, "model_output": [{"sum_logits": -50.693424224853516, "num_tokens": 24, "num_tokens_all": 442, "is_greedy": false, "sum_logits_uncond": -76.7020492553711, "logits_per_token": -2.1122260093688965, "logits_per_char": -0.4296052900411315, "num_chars": 118}, {"sum_logits": -136.5526580810547, "num_tokens": 39, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -159.76565551757812, "logits_per_token": -3.5013502072065306, "logits_per_char": -0.7302281180805064, "num_chars": 187}, {"sum_logits": -96.43872833251953, "num_tokens": 25, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -105.02374267578125, "logits_per_token": -3.857549133300781, "logits_per_char": -0.9454777287501915, "num_chars": 102}, {"sum_logits": -92.49237823486328, "num_tokens": 29, "num_tokens_all": 447, "is_greedy": false, "sum_logits_uncond": -113.738037109375, "logits_per_token": -3.18939235292632, "logits_per_char": -0.764399820122837, "num_chars": 121}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 571, "native_id": 2513, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 22.720727920532227, "incorrect_loss_raw": 55.58289082845052, "correct_loss_per_char": 0.33911534209749594, "incorrect_loss_per_char": 0.7103042146000531, "correct_loss_per_token": 1.4200454950332642, "incorrect_loss_per_token": 3.508911534292286, "correct_loss_uncond": -27.571752548217773, "incorrect_loss_uncond": -19.91594950358073}, "model_output": [{"sum_logits": -22.720727920532227, "num_tokens": 16, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -50.29248046875, "logits_per_token": -1.4200454950332642, "logits_per_char": -0.33911534209749594, "num_chars": 67}, {"sum_logits": -56.948974609375, "num_tokens": 18, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -75.08023071289062, "logits_per_token": -3.1638319227430554, "logits_per_char": -0.6944996903582317, "num_chars": 82}, {"sum_logits": -49.95135498046875, "num_tokens": 13, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -69.10303497314453, "logits_per_token": -3.842411921574519, "logits_per_char": -0.7239326808763586, "num_chars": 69}, {"sum_logits": -59.84834289550781, "num_tokens": 17, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -82.3132553100586, "logits_per_token": -3.520490758559283, "logits_per_char": -0.7124802725655692, "num_chars": 84}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 572, "native_id": 26225, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 106.28512573242188, "incorrect_loss_raw": 162.82158915201822, "correct_loss_per_char": 0.5478614728475354, "incorrect_loss_per_char": 0.8063693635377192, "correct_loss_per_token": 2.4155710393732246, "incorrect_loss_per_token": 3.373854310006806, "correct_loss_uncond": -26.979644775390625, "incorrect_loss_uncond": -17.957082112630207}, "model_output": [{"sum_logits": -200.50167846679688, "num_tokens": 50, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -202.90151977539062, "logits_per_token": -4.0100335693359375, "logits_per_char": -0.9876929973733837, "num_chars": 203}, {"sum_logits": -158.79830932617188, "num_tokens": 50, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -182.94349670410156, "logits_per_token": -3.1759661865234374, "logits_per_char": -0.705770263671875, "num_chars": 225}, {"sum_logits": -129.16477966308594, "num_tokens": 44, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -156.49099731445312, "logits_per_token": -2.935563174161044, "logits_per_char": -0.7256448295678986, "num_chars": 178}, {"sum_logits": -106.28512573242188, "num_tokens": 44, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -133.2647705078125, "logits_per_token": -2.4155710393732246, "logits_per_char": -0.5478614728475354, "num_chars": 194}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 573, "native_id": 27247, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 87.7529067993164, "incorrect_loss_raw": 109.55585479736328, "correct_loss_per_char": 0.4769179717354152, "incorrect_loss_per_char": 0.6664152092857052, "correct_loss_per_token": 2.2500745333158054, "incorrect_loss_per_token": 3.1417871416847603, "correct_loss_uncond": -17.681678771972656, "incorrect_loss_uncond": -14.634633382161459}, "model_output": [{"sum_logits": -87.7529067993164, "num_tokens": 39, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -105.43458557128906, "logits_per_token": -2.2500745333158054, "logits_per_char": -0.4769179717354152, "num_chars": 184}, {"sum_logits": -135.66522216796875, "num_tokens": 52, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -152.7501220703125, "logits_per_token": -2.608946580153245, "logits_per_char": -0.542660888671875, "num_chars": 250}, {"sum_logits": -95.19389343261719, "num_tokens": 26, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -106.31250762939453, "logits_per_token": -3.6613035935621996, "logits_per_char": -0.726670942233719, "num_chars": 131}, {"sum_logits": -97.8084487915039, "num_tokens": 31, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -113.50883483886719, "logits_per_token": -3.1551112513388357, "logits_per_char": -0.7299137969515217, "num_chars": 134}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 574, "native_id": 36108, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 79.13966369628906, "incorrect_loss_raw": 109.22787221272786, "correct_loss_per_char": 0.5138939201057732, "incorrect_loss_per_char": 0.6993274702899978, "correct_loss_per_token": 2.138909829629434, "incorrect_loss_per_token": 2.858080700175503, "correct_loss_uncond": -30.174026489257812, "incorrect_loss_uncond": -19.25420633951823}, "model_output": [{"sum_logits": -79.13966369628906, "num_tokens": 37, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -109.31369018554688, "logits_per_token": -2.138909829629434, "logits_per_char": -0.5138939201057732, "num_chars": 154}, {"sum_logits": -107.24579620361328, "num_tokens": 35, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -134.55718994140625, "logits_per_token": -3.0641656058175224, "logits_per_char": -0.7009529163634854, "num_chars": 153}, {"sum_logits": -104.12279510498047, "num_tokens": 44, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -128.79147338867188, "logits_per_token": -2.3664271614768286, "logits_per_char": -0.5480147110788446, "num_chars": 190}, {"sum_logits": -116.31502532958984, "num_tokens": 37, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -122.09757232666016, "logits_per_token": -3.143649333232158, "logits_per_char": -0.8490147834276631, "num_chars": 137}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 575, "native_id": 27185, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 103.42086791992188, "incorrect_loss_raw": 111.98533884684245, "correct_loss_per_char": 0.5020430481549606, "incorrect_loss_per_char": 0.6355473074834599, "correct_loss_per_token": 2.20044399829621, "incorrect_loss_per_token": 2.8214831776202303, "correct_loss_uncond": -12.655838012695312, "incorrect_loss_uncond": -17.612650553385418}, "model_output": [{"sum_logits": -117.18624114990234, "num_tokens": 37, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -117.940673828125, "logits_per_token": -3.1671957067541174, "logits_per_char": -0.7416850705690021, "num_chars": 158}, {"sum_logits": -88.08670043945312, "num_tokens": 35, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -106.63268280029297, "logits_per_token": -2.5167628696986606, "logits_per_char": -0.5992292546901573, "num_chars": 147}, {"sum_logits": -103.42086791992188, "num_tokens": 47, "num_tokens_all": 496, "is_greedy": false, "sum_logits_uncond": -116.07670593261719, "logits_per_token": -2.20044399829621, "logits_per_char": -0.5020430481549606, "num_chars": 206}, {"sum_logits": -130.68307495117188, "num_tokens": 47, "num_tokens_all": 496, "is_greedy": false, "sum_logits_uncond": -164.22061157226562, "logits_per_token": -2.7804909564079123, "logits_per_char": -0.5657275971912202, "num_chars": 231}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 576, "native_id": 16449, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 91.99923706054688, "incorrect_loss_raw": 132.17728169759116, "correct_loss_per_char": 0.4816713982227585, "incorrect_loss_per_char": 0.8342944881026909, "correct_loss_per_token": 1.9999834143597146, "incorrect_loss_per_token": 3.6294012479753683, "correct_loss_uncond": -23.65277099609375, "incorrect_loss_uncond": -16.41509501139323}, "model_output": [{"sum_logits": -192.63824462890625, "num_tokens": 54, "num_tokens_all": 498, "is_greedy": false, "sum_logits_uncond": -206.09654235839844, "logits_per_token": -3.567374900535301, "logits_per_char": -0.8162637484275689, "num_chars": 236}, {"sum_logits": -90.35271453857422, "num_tokens": 29, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -111.02827453613281, "logits_per_token": -3.1156108461577317, "logits_per_char": -0.7995815445891524, "num_chars": 113}, {"sum_logits": -113.54088592529297, "num_tokens": 27, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -128.65231323242188, "logits_per_token": -4.205217997233073, "logits_per_char": -0.8870381712913513, "num_chars": 128}, {"sum_logits": -91.99923706054688, "num_tokens": 46, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -115.65200805664062, "logits_per_token": -1.9999834143597146, "logits_per_char": -0.4816713982227585, "num_chars": 191}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 577, "native_id": 46573, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 73.4514389038086, "incorrect_loss_raw": 134.49016571044922, "correct_loss_per_char": 0.42954057838484555, "incorrect_loss_per_char": 0.6264163578090741, "correct_loss_per_token": 2.2953574657440186, "incorrect_loss_per_token": 2.909128779014107, "correct_loss_uncond": -21.76366424560547, "incorrect_loss_uncond": -23.58618418375651}, "model_output": [{"sum_logits": -128.39205932617188, "num_tokens": 48, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -152.6041259765625, "logits_per_token": -2.6748345692952475, "logits_per_char": -0.608493172161952, "num_chars": 211}, {"sum_logits": -73.4514389038086, "num_tokens": 32, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -95.21510314941406, "logits_per_token": -2.2953574657440186, "logits_per_char": -0.42954057838484555, "num_chars": 171}, {"sum_logits": -152.14996337890625, "num_tokens": 54, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -175.20558166503906, "logits_per_token": -2.81759191442419, "logits_per_char": -0.5990156038539616, "num_chars": 254}, {"sum_logits": -122.92847442626953, "num_tokens": 38, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -146.41934204101562, "logits_per_token": -3.2349598533228825, "logits_per_char": -0.6717402974113089, "num_chars": 183}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 578, "native_id": 38737, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 105.90094757080078, "incorrect_loss_raw": 157.82892608642578, "correct_loss_per_char": 0.5091391710134653, "incorrect_loss_per_char": 0.8516790246721223, "correct_loss_per_token": 2.4068397175181997, "incorrect_loss_per_token": 4.396739939209197, "correct_loss_uncond": -25.26062774658203, "incorrect_loss_uncond": -13.801973978678385}, "model_output": [{"sum_logits": -205.79335021972656, "num_tokens": 45, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -225.27735900878906, "logits_per_token": -4.573185560438368, "logits_per_char": -0.7915128854604868, "num_chars": 260}, {"sum_logits": -169.41024780273438, "num_tokens": 39, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -184.55191040039062, "logits_per_token": -4.34385250776242, "logits_per_char": -0.780692386187716, "num_chars": 217}, {"sum_logits": -105.90094757080078, "num_tokens": 44, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -131.1615753173828, "logits_per_token": -2.4068397175181997, "logits_per_char": -0.5091391710134653, "num_chars": 208}, {"sum_logits": -98.2831802368164, "num_tokens": 23, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -105.06343078613281, "logits_per_token": -4.2731817494268, "logits_per_char": -0.982831802368164, "num_chars": 100}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 579, "native_id": 6436, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 101.60086822509766, "incorrect_loss_raw": 116.79508209228516, "correct_loss_per_char": 0.4146974213269292, "incorrect_loss_per_char": 0.5715863956489862, "correct_loss_per_token": 1.8472885131835937, "incorrect_loss_per_token": 2.5197886119765225, "correct_loss_uncond": -28.11455535888672, "incorrect_loss_uncond": -18.502606709798176}, "model_output": [{"sum_logits": -87.15930938720703, "num_tokens": 36, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -102.03672790527344, "logits_per_token": -2.4210919274224176, "logits_per_char": -0.562318125078755, "num_chars": 155}, {"sum_logits": -111.51626586914062, "num_tokens": 53, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -134.6523895263672, "logits_per_token": -2.1040804880969928, "logits_per_char": -0.453318153939596, "num_chars": 246}, {"sum_logits": -101.60086822509766, "num_tokens": 55, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -129.71542358398438, "logits_per_token": -1.8472885131835937, "logits_per_char": -0.4146974213269292, "num_chars": 245}, {"sum_logits": -151.7096710205078, "num_tokens": 50, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -169.20394897460938, "logits_per_token": -3.034193420410156, "logits_per_char": -0.6991229079286074, "num_chars": 217}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 580, "native_id": 47085, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 125.14848327636719, "incorrect_loss_raw": 90.61105092366536, "correct_loss_per_char": 0.672841307937458, "incorrect_loss_per_char": 0.6273116432027973, "correct_loss_per_token": 2.844283710826527, "incorrect_loss_per_token": 2.7443500562645924, "correct_loss_uncond": -22.4842529296875, "incorrect_loss_uncond": -22.206629435221355}, "model_output": [{"sum_logits": -91.47077178955078, "num_tokens": 40, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -116.12606811523438, "logits_per_token": -2.2867692947387694, "logits_per_char": -0.5412471703523715, "num_chars": 169}, {"sum_logits": -95.87986755371094, "num_tokens": 29, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -115.08541107177734, "logits_per_token": -3.306202329438308, "logits_per_char": -0.7102212411385995, "num_chars": 135}, {"sum_logits": -125.14848327636719, "num_tokens": 44, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -147.6327362060547, "logits_per_token": -2.844283710826527, "logits_per_char": -0.672841307937458, "num_chars": 186}, {"sum_logits": -84.48251342773438, "num_tokens": 32, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -107.24156188964844, "logits_per_token": -2.640078544616699, "logits_per_char": -0.6304665181174207, "num_chars": 134}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 581, "native_id": 45876, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 103.66792297363281, "incorrect_loss_raw": 82.25071207682292, "correct_loss_per_char": 0.5056971852372333, "incorrect_loss_per_char": 0.48492890948421374, "correct_loss_per_token": 2.3037316216362846, "incorrect_loss_per_token": 2.3562099373902856, "correct_loss_uncond": -29.437728881835938, "incorrect_loss_uncond": -21.743080139160156}, "model_output": [{"sum_logits": -103.66792297363281, "num_tokens": 45, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -133.10565185546875, "logits_per_token": -2.3037316216362846, "logits_per_char": -0.5056971852372333, "num_chars": 205}, {"sum_logits": -84.59858703613281, "num_tokens": 34, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -98.15827941894531, "logits_per_token": -2.4881937363568474, "logits_per_char": -0.477958118848208, "num_chars": 177}, {"sum_logits": -82.98930358886719, "num_tokens": 34, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -106.5647201538086, "logits_per_token": -2.4408618702608, "logits_per_char": -0.4999355637883566, "num_chars": 166}, {"sum_logits": -79.16424560546875, "num_tokens": 37, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -107.25837707519531, "logits_per_token": -2.1395742055532097, "logits_per_char": -0.47689304581607683, "num_chars": 166}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 582, "native_id": 44265, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 43.681976318359375, "incorrect_loss_raw": 82.8431002298991, "correct_loss_per_char": 0.43249481503326115, "incorrect_loss_per_char": 0.5642407366044254, "correct_loss_per_token": 1.9855443781072444, "incorrect_loss_per_token": 2.6285428418679784, "correct_loss_uncond": -33.90678405761719, "incorrect_loss_uncond": -17.156763712565105}, "model_output": [{"sum_logits": -49.2074089050293, "num_tokens": 20, "num_tokens_all": 441, "is_greedy": false, "sum_logits_uncond": -59.987491607666016, "logits_per_token": -2.4603704452514648, "logits_per_char": -0.5528922348879697, "num_chars": 89}, {"sum_logits": -75.1480712890625, "num_tokens": 27, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -91.75474548339844, "logits_per_token": -2.7832618995949074, "logits_per_char": -0.5485260678033759, "num_chars": 137}, {"sum_logits": -124.17382049560547, "num_tokens": 47, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -148.25735473632812, "logits_per_token": -2.6419961807575634, "logits_per_char": -0.5913039071219308, "num_chars": 210}, {"sum_logits": -43.681976318359375, "num_tokens": 22, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -77.58876037597656, "logits_per_token": -1.9855443781072444, "logits_per_char": -0.43249481503326115, "num_chars": 101}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 583, "native_id": 34789, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 57.14230728149414, "incorrect_loss_raw": 80.4455337524414, "correct_loss_per_char": 0.6802655628749302, "incorrect_loss_per_char": 0.6212267000713045, "correct_loss_per_token": 2.5973776037042793, "incorrect_loss_per_token": 2.644369490689071, "correct_loss_uncond": -9.40518569946289, "incorrect_loss_uncond": -9.432256062825521}, "model_output": [{"sum_logits": -103.87235260009766, "num_tokens": 45, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -111.0680160522461, "logits_per_token": -2.3082745022243922, "logits_per_char": -0.5935563005719866, "num_chars": 175}, {"sum_logits": -60.09330749511719, "num_tokens": 21, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -68.90650939941406, "logits_per_token": -2.8615860711960566, "logits_per_char": -0.6461645967216902, "num_chars": 93}, {"sum_logits": -57.14230728149414, "num_tokens": 22, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -66.54749298095703, "logits_per_token": -2.5973776037042793, "logits_per_char": -0.6802655628749302, "num_chars": 84}, {"sum_logits": -77.37094116210938, "num_tokens": 28, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -89.65884399414062, "logits_per_token": -2.7632478986467635, "logits_per_char": -0.6239592029202369, "num_chars": 124}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 584, "native_id": 13280, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 69.79456329345703, "incorrect_loss_raw": 78.86886342366536, "correct_loss_per_char": 0.5628593813988471, "incorrect_loss_per_char": 0.625654917836107, "correct_loss_per_token": 2.5849838256835938, "incorrect_loss_per_token": 2.741876632998688, "correct_loss_uncond": -24.11211395263672, "incorrect_loss_uncond": -14.380821228027344}, "model_output": [{"sum_logits": -75.69038391113281, "num_tokens": 24, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -92.46953582763672, "logits_per_token": -3.1537659962972007, "logits_per_char": -0.6581772514011549, "num_chars": 115}, {"sum_logits": -69.79456329345703, "num_tokens": 27, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -93.90667724609375, "logits_per_token": -2.5849838256835938, "logits_per_char": -0.5628593813988471, "num_chars": 124}, {"sum_logits": -96.3631820678711, "num_tokens": 33, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -115.17164611816406, "logits_per_token": -2.920096426299124, "logits_per_char": -0.6424212137858073, "num_chars": 150}, {"sum_logits": -64.55302429199219, "num_tokens": 30, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -72.10787200927734, "logits_per_token": -2.1517674763997396, "logits_per_char": -0.5763662883213588, "num_chars": 112}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 585, "native_id": 32832, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 73.3436279296875, "incorrect_loss_raw": 97.92635091145833, "correct_loss_per_char": 0.46420017677017406, "incorrect_loss_per_char": 0.6177554814046315, "correct_loss_per_token": 2.2225341796875, "incorrect_loss_per_token": 3.0469896992678365, "correct_loss_uncond": -40.905784606933594, "incorrect_loss_uncond": -23.8404057820638}, "model_output": [{"sum_logits": -147.59515380859375, "num_tokens": 45, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -184.0464324951172, "logits_per_token": -3.279892306857639, "logits_per_char": -0.6445203223082696, "num_chars": 229}, {"sum_logits": -69.17961120605469, "num_tokens": 21, "num_tokens_all": 442, "is_greedy": false, "sum_logits_uncond": -89.70186614990234, "logits_per_token": -3.2942672002883184, "logits_per_char": -0.69878395157631, "num_chars": 99}, {"sum_logits": -77.00428771972656, "num_tokens": 30, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -91.55197143554688, "logits_per_token": -2.566809590657552, "logits_per_char": -0.509962170329315, "num_chars": 151}, {"sum_logits": -73.3436279296875, "num_tokens": 33, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -114.2494125366211, "logits_per_token": -2.2225341796875, "logits_per_char": -0.46420017677017406, "num_chars": 158}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 586, "native_id": 47705, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 96.08541107177734, "incorrect_loss_raw": 118.00860850016277, "correct_loss_per_char": 0.6363272256409096, "incorrect_loss_per_char": 0.6456120393556288, "correct_loss_per_token": 3.0995293894121723, "incorrect_loss_per_token": 2.918968651881895, "correct_loss_uncond": -30.602699279785156, "incorrect_loss_uncond": -18.872540791829426}, "model_output": [{"sum_logits": -192.6338653564453, "num_tokens": 59, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -220.423095703125, "logits_per_token": -3.2649807687533103, "logits_per_char": -0.7466428889784702, "num_chars": 258}, {"sum_logits": -96.08541107177734, "num_tokens": 31, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -126.6881103515625, "logits_per_token": -3.0995293894121723, "logits_per_char": -0.6363272256409096, "num_chars": 151}, {"sum_logits": -68.68666076660156, "num_tokens": 23, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -78.19831085205078, "logits_per_token": -2.9863765550696333, "logits_per_char": -0.6604486612173227, "num_chars": 104}, {"sum_logits": -92.7052993774414, "num_tokens": 37, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -112.02204132080078, "logits_per_token": -2.505548631822741, "logits_per_char": -0.5297445678710937, "num_chars": 175}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 587, "native_id": 3957, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 41.23151397705078, "incorrect_loss_raw": 23.64365577697754, "correct_loss_per_char": 1.0307878494262694, "incorrect_loss_per_char": 0.6651265033088051, "correct_loss_per_token": 3.1716549213115988, "incorrect_loss_per_token": 2.5945906196436437, "correct_loss_uncond": -30.88062286376953, "incorrect_loss_uncond": -21.116857528686523}, "model_output": [{"sum_logits": -41.23151397705078, "num_tokens": 13, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -72.11213684082031, "logits_per_token": -3.1716549213115988, "logits_per_char": -1.0307878494262694, "num_chars": 40}, {"sum_logits": -31.17165756225586, "num_tokens": 11, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -52.0802001953125, "logits_per_token": -2.833787051114169, "logits_per_char": -0.7602843307867283, "num_chars": 41}, {"sum_logits": -22.992372512817383, "num_tokens": 9, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -43.43670654296875, "logits_per_token": -2.554708056979709, "logits_per_char": -0.6569249289376395, "num_chars": 35}, {"sum_logits": -16.766937255859375, "num_tokens": 7, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -38.76463317871094, "logits_per_token": -2.3952767508370534, "logits_per_char": -0.5781702502020474, "num_chars": 29}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 588, "native_id": 4579, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 25.278371810913086, "incorrect_loss_raw": 74.60913594563802, "correct_loss_per_char": 0.39497455954551697, "incorrect_loss_per_char": 0.7499729595613499, "correct_loss_per_token": 1.6852247873942057, "incorrect_loss_per_token": 3.057491561789918, "correct_loss_uncond": -34.50142478942871, "incorrect_loss_uncond": -31.937918345133465}, "model_output": [{"sum_logits": -125.419677734375, "num_tokens": 34, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -166.96060180664062, "logits_per_token": -3.6888140510110294, "logits_per_char": -0.8039722931690705, "num_chars": 156}, {"sum_logits": -25.278371810913086, "num_tokens": 15, "num_tokens_all": 432, "is_greedy": false, "sum_logits_uncond": -59.7797966003418, "logits_per_token": -1.6852247873942057, "logits_per_char": -0.39497455954551697, "num_chars": 64}, {"sum_logits": -64.61128234863281, "num_tokens": 20, "num_tokens_all": 437, "is_greedy": false, "sum_logits_uncond": -88.91677856445312, "logits_per_token": -3.2305641174316406, "logits_per_char": -0.8731254371436866, "num_chars": 74}, {"sum_logits": -33.79644775390625, "num_tokens": 15, "num_tokens_all": 432, "is_greedy": false, "sum_logits_uncond": -63.7637825012207, "logits_per_token": -2.253096516927083, "logits_per_char": -0.5728211483712924, "num_chars": 59}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 589, "native_id": 11810, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 172.569580078125, "incorrect_loss_raw": 116.17976888020833, "correct_loss_per_char": 0.5810423571654041, "incorrect_loss_per_char": 0.6137020312991539, "correct_loss_per_token": 2.6146906072443183, "incorrect_loss_per_token": 3.102598012288411, "correct_loss_uncond": -18.040206909179688, "incorrect_loss_uncond": -15.305595397949219}, "model_output": [{"sum_logits": -151.33230590820312, "num_tokens": 40, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -159.33938598632812, "logits_per_token": -3.783307647705078, "logits_per_char": -0.6696119730451465, "num_chars": 226}, {"sum_logits": -172.569580078125, "num_tokens": 66, "num_tokens_all": 494, "is_greedy": false, "sum_logits_uncond": -190.6097869873047, "logits_per_token": -2.6146906072443183, "logits_per_char": -0.5810423571654041, "num_chars": 297}, {"sum_logits": -113.81228637695312, "num_tokens": 52, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -134.41598510742188, "logits_per_token": -2.1886978149414062, "logits_per_char": -0.446322691674326, "num_chars": 255}, {"sum_logits": -83.39471435546875, "num_tokens": 25, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -100.70072174072266, "logits_per_token": -3.33578857421875, "logits_per_char": -0.7251714291779892, "num_chars": 115}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 590, "native_id": 44709, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 61.163238525390625, "incorrect_loss_raw": 56.48684946695963, "correct_loss_per_char": 0.4972621018324441, "incorrect_loss_per_char": 0.40996199692480406, "correct_loss_per_token": 2.038774617513021, "incorrect_loss_per_token": 1.8696193351401937, "correct_loss_uncond": -20.57696533203125, "incorrect_loss_uncond": -29.077959696451824}, "model_output": [{"sum_logits": -61.163238525390625, "num_tokens": 30, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -81.74020385742188, "logits_per_token": -2.038774617513021, "logits_per_char": -0.4972621018324441, "num_chars": 123}, {"sum_logits": -56.726417541503906, "num_tokens": 28, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -74.38725280761719, "logits_per_token": -2.0259434836251393, "logits_per_char": -0.4727201461791992, "num_chars": 120}, {"sum_logits": -51.029266357421875, "num_tokens": 37, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -93.13871765136719, "logits_per_token": -1.379169361011402, "logits_per_char": -0.2966817811478016, "num_chars": 172}, {"sum_logits": -61.704864501953125, "num_tokens": 28, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -89.16845703125, "logits_per_token": -2.2037451607840404, "logits_per_char": -0.4604840634474114, "num_chars": 134}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 591, "native_id": 31152, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 36.63450622558594, "incorrect_loss_raw": 40.99394861857096, "correct_loss_per_char": 0.4884600830078125, "incorrect_loss_per_char": 0.7143874227831958, "correct_loss_per_token": 2.4423004150390626, "incorrect_loss_per_token": 3.0123009151882596, "correct_loss_uncond": -34.05436706542969, "incorrect_loss_uncond": -19.87293752034505}, "model_output": [{"sum_logits": -36.63450622558594, "num_tokens": 15, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -70.68887329101562, "logits_per_token": -2.4423004150390626, "logits_per_char": -0.4884600830078125, "num_chars": 75}, {"sum_logits": -45.527191162109375, "num_tokens": 16, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -65.87895965576172, "logits_per_token": -2.845449447631836, "logits_per_char": -0.5836819379757612, "num_chars": 78}, {"sum_logits": -42.39714813232422, "num_tokens": 11, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -47.63920974731445, "logits_per_token": -3.8542861938476562, "logits_per_char": -1.1157144245348478, "num_chars": 38}, {"sum_logits": -35.0575065612793, "num_tokens": 15, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -69.08248901367188, "logits_per_token": -2.3371671040852866, "logits_per_char": -0.44376590583897846, "num_chars": 79}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 592, "native_id": 31018, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 107.63148498535156, "incorrect_loss_raw": 108.04446919759114, "correct_loss_per_char": 0.44292792175041795, "incorrect_loss_per_char": 0.6911541859250749, "correct_loss_per_token": 2.030782735572671, "incorrect_loss_per_token": 3.229746214449094, "correct_loss_uncond": -7.612152099609375, "incorrect_loss_uncond": -8.118189493815104}, "model_output": [{"sum_logits": -153.62884521484375, "num_tokens": 55, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -157.7003936767578, "logits_per_token": -2.793251731178977, "logits_per_char": -0.634829938904313, "num_chars": 242}, {"sum_logits": -65.50901794433594, "num_tokens": 20, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -74.30108642578125, "logits_per_token": -3.275450897216797, "logits_per_char": -0.7043980424122144, "num_chars": 93}, {"sum_logits": -107.63148498535156, "num_tokens": 53, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -115.24363708496094, "logits_per_token": -2.030782735572671, "logits_per_char": -0.44292792175041795, "num_chars": 243}, {"sum_logits": -104.99554443359375, "num_tokens": 29, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -116.48649597167969, "logits_per_token": -3.6205360149515085, "logits_per_char": -0.7342345764586976, "num_chars": 143}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 593, "native_id": 45359, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 58.209102630615234, "incorrect_loss_raw": 105.93999099731445, "correct_loss_per_char": 0.4810669638893821, "incorrect_loss_per_char": 0.5170926037755099, "correct_loss_per_token": 2.1558926900227866, "incorrect_loss_per_token": 2.41966629532218, "correct_loss_uncond": -14.333377838134766, "incorrect_loss_uncond": -22.500067392985027}, "model_output": [{"sum_logits": -135.5140380859375, "num_tokens": 53, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -160.493408203125, "logits_per_token": -2.5568686431308962, "logits_per_char": -0.5056493958430504, "num_chars": 268}, {"sum_logits": -58.209102630615234, "num_tokens": 27, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -72.54248046875, "logits_per_token": -2.1558926900227866, "logits_per_char": -0.4810669638893821, "num_chars": 121}, {"sum_logits": -57.84920883178711, "num_tokens": 32, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -81.02146911621094, "logits_per_token": -1.8077877759933472, "logits_per_char": -0.40738879459005006, "num_chars": 142}, {"sum_logits": -124.45672607421875, "num_tokens": 43, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -143.8052978515625, "logits_per_token": -2.8943424668422963, "logits_per_char": -0.6382396208934294, "num_chars": 195}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 594, "native_id": 5867, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 50.37168884277344, "incorrect_loss_raw": 83.13910420735677, "correct_loss_per_char": 0.3228954412998297, "incorrect_loss_per_char": 0.563936333212749, "correct_loss_per_token": 1.57411527633667, "incorrect_loss_per_token": 2.4370028177897134, "correct_loss_uncond": -19.757186889648438, "incorrect_loss_uncond": -19.368390401204426}, "model_output": [{"sum_logits": -70.45185852050781, "num_tokens": 41, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -93.73188781738281, "logits_per_token": -1.7183380126953125, "logits_per_char": -0.4218674162904659, "num_chars": 167}, {"sum_logits": -89.12437438964844, "num_tokens": 32, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -106.91202545166016, "logits_per_token": -2.7851366996765137, "logits_per_char": -0.6189192665947808, "num_chars": 144}, {"sum_logits": -89.84107971191406, "num_tokens": 32, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -106.87857055664062, "logits_per_token": -2.8075337409973145, "logits_per_char": -0.6510223167530005, "num_chars": 138}, {"sum_logits": -50.37168884277344, "num_tokens": 32, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -70.12887573242188, "logits_per_token": -1.57411527633667, "logits_per_char": -0.3228954412998297, "num_chars": 156}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 595, "native_id": 26816, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 47.91469192504883, "incorrect_loss_raw": 113.21480305989583, "correct_loss_per_char": 0.39598918946321343, "incorrect_loss_per_char": 0.7314869220659861, "correct_loss_per_token": 1.7112389973231725, "incorrect_loss_per_token": 3.1974229956464346, "correct_loss_uncond": -27.326534271240234, "incorrect_loss_uncond": -12.065190633138021}, "model_output": [{"sum_logits": -47.91469192504883, "num_tokens": 28, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -75.24122619628906, "logits_per_token": -1.7112389973231725, "logits_per_char": -0.39598918946321343, "num_chars": 121}, {"sum_logits": -146.098388671875, "num_tokens": 39, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -153.8251190185547, "logits_per_token": -3.746112530048077, "logits_per_char": -0.7983518506659836, "num_chars": 183}, {"sum_logits": -83.57278442382812, "num_tokens": 32, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -100.82510375976562, "logits_per_token": -2.611649513244629, "logits_per_char": -0.6428675724909856, "num_chars": 130}, {"sum_logits": -109.97323608398438, "num_tokens": 34, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -121.18975830078125, "logits_per_token": -3.234506943646599, "logits_per_char": -0.7532413430409889, "num_chars": 146}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 596, "native_id": 1567, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 100.19133758544922, "incorrect_loss_raw": 144.37042236328125, "correct_loss_per_char": 0.5273228293971012, "incorrect_loss_per_char": 0.9025833331938391, "correct_loss_per_token": 2.5047834396362303, "incorrect_loss_per_token": 3.425552035597427, "correct_loss_uncond": -26.673500061035156, "incorrect_loss_uncond": -20.248123168945312}, "model_output": [{"sum_logits": -161.17103576660156, "num_tokens": 42, "num_tokens_all": 501, "is_greedy": false, "sum_logits_uncond": -183.66964721679688, "logits_per_token": -3.8374056134905135, "logits_per_char": -0.9209773472377232, "num_chars": 175}, {"sum_logits": -172.18292236328125, "num_tokens": 46, "num_tokens_all": 505, "is_greedy": false, "sum_logits_uncond": -193.77061462402344, "logits_per_token": -3.7431070078974185, "logits_per_char": -1.031035463253181, "num_chars": 167}, {"sum_logits": -99.75730895996094, "num_tokens": 37, "num_tokens_all": 496, "is_greedy": false, "sum_logits_uncond": -116.41537475585938, "logits_per_token": -2.69614348540435, "logits_per_char": -0.7557371890906132, "num_chars": 132}, {"sum_logits": -100.19133758544922, "num_tokens": 40, "num_tokens_all": 499, "is_greedy": false, "sum_logits_uncond": -126.86483764648438, "logits_per_token": -2.5047834396362303, "logits_per_char": -0.5273228293971012, "num_chars": 190}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 597, "native_id": 9779, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 14.256080627441406, "incorrect_loss_raw": 54.01681900024414, "correct_loss_per_char": 0.40731658935546877, "incorrect_loss_per_char": 0.8354802070606824, "correct_loss_per_token": 1.5840089586046007, "incorrect_loss_per_token": 3.5011802832285563, "correct_loss_uncond": -25.12685775756836, "incorrect_loss_uncond": -18.250621795654297}, "model_output": [{"sum_logits": -34.30070495605469, "num_tokens": 10, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -53.888916015625, "logits_per_token": -3.430070495605469, "logits_per_char": -0.92704607989337, "num_chars": 37}, {"sum_logits": -54.87862014770508, "num_tokens": 16, "num_tokens_all": 432, "is_greedy": false, "sum_logits_uncond": -72.22465515136719, "logits_per_token": -3.4299137592315674, "logits_per_char": -0.7220871072066458, "num_chars": 76}, {"sum_logits": -72.87113189697266, "num_tokens": 20, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -90.68875122070312, "logits_per_token": -3.643556594848633, "logits_per_char": -0.8573074340820312, "num_chars": 85}, {"sum_logits": -14.256080627441406, "num_tokens": 9, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -39.382938385009766, "logits_per_token": -1.5840089586046007, "logits_per_char": -0.40731658935546877, "num_chars": 35}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 598, "native_id": 28388, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 66.85189819335938, "incorrect_loss_raw": 147.0550791422526, "correct_loss_per_char": 0.442727802605029, "incorrect_loss_per_char": 0.8138964289710634, "correct_loss_per_token": 1.910054234095982, "incorrect_loss_per_token": 3.793937129389201, "correct_loss_uncond": -48.6920166015625, "incorrect_loss_uncond": -11.655260721842447}, "model_output": [{"sum_logits": -66.85189819335938, "num_tokens": 35, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -115.54391479492188, "logits_per_token": -1.910054234095982, "logits_per_char": -0.442727802605029, "num_chars": 151}, {"sum_logits": -208.4642791748047, "num_tokens": 46, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -229.14805603027344, "logits_per_token": -4.531832155974015, "logits_per_char": -0.9306441034589495, "num_chars": 224}, {"sum_logits": -116.40376281738281, "num_tokens": 35, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -126.03902435302734, "logits_per_token": -3.325821794782366, "logits_per_char": -0.755868589723265, "num_chars": 154}, {"sum_logits": -116.29719543457031, "num_tokens": 33, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -120.94393920898438, "logits_per_token": -3.5241574374112217, "logits_per_char": -0.755176593730976, "num_chars": 154}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 599, "native_id": 23525, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 75.54437255859375, "incorrect_loss_raw": 150.5131581624349, "correct_loss_per_char": 0.6192161685130635, "incorrect_loss_per_char": 0.7163550315720039, "correct_loss_per_token": 2.436915243825605, "incorrect_loss_per_token": 3.1478432001580896, "correct_loss_uncond": -39.9591064453125, "incorrect_loss_uncond": -21.54778544108073}, "model_output": [{"sum_logits": -150.10928344726562, "num_tokens": 43, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -172.47586059570312, "logits_per_token": -3.490913568541061, "logits_per_char": -0.8339404635959201, "num_chars": 180}, {"sum_logits": -75.54437255859375, "num_tokens": 31, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -115.50347900390625, "logits_per_token": -2.436915243825605, "logits_per_char": -0.6192161685130635, "num_chars": 122}, {"sum_logits": -168.7268829345703, "num_tokens": 55, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -189.72933959960938, "logits_per_token": -3.0677615079012783, "logits_per_char": -0.7119277760952334, "num_chars": 237}, {"sum_logits": -132.70330810546875, "num_tokens": 46, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -153.97763061523438, "logits_per_token": -2.884854524031929, "logits_per_char": -0.603196855024858, "num_chars": 220}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 600, "native_id": 28055, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 9.314044952392578, "incorrect_loss_raw": 26.31016476949056, "correct_loss_per_char": 0.4435259501139323, "incorrect_loss_per_char": 1.0410141813361082, "correct_loss_per_token": 1.8628089904785157, "incorrect_loss_per_token": 4.084874030871269, "correct_loss_uncond": -12.175149917602539, "incorrect_loss_uncond": -13.592060089111328}, "model_output": [{"sum_logits": -36.89893341064453, "num_tokens": 13, "num_tokens_all": 424, "is_greedy": false, "sum_logits_uncond": -58.37871170043945, "logits_per_token": -2.8383794931265025, "logits_per_char": -0.6049005477154841, "num_chars": 61}, {"sum_logits": -27.56566619873047, "num_tokens": 6, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -36.52903747558594, "logits_per_token": -4.594277699788411, "logits_per_char": -1.3126507713681175, "num_chars": 21}, {"sum_logits": -14.46589469909668, "num_tokens": 3, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -24.798925399780273, "logits_per_token": -4.8219648996988935, "logits_per_char": -1.2054912249247234, "num_chars": 12}, {"sum_logits": -9.314044952392578, "num_tokens": 5, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -21.489194869995117, "logits_per_token": -1.8628089904785157, "logits_per_char": -0.4435259501139323, "num_chars": 21}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 601, "native_id": 25930, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 81.14183044433594, "incorrect_loss_raw": 87.59442647298177, "correct_loss_per_char": 0.47451362832944993, "incorrect_loss_per_char": 0.6299885530408955, "correct_loss_per_token": 2.4588433467980586, "incorrect_loss_per_token": 2.7379950229381316, "correct_loss_uncond": -25.46092987060547, "incorrect_loss_uncond": -27.809824625651043}, "model_output": [{"sum_logits": -81.14183044433594, "num_tokens": 33, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -106.6027603149414, "logits_per_token": -2.4588433467980586, "logits_per_char": -0.47451362832944993, "num_chars": 171}, {"sum_logits": -98.33930969238281, "num_tokens": 31, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -124.5506591796875, "logits_per_token": -3.172235796528478, "logits_per_char": -0.7564562284029447, "num_chars": 130}, {"sum_logits": -76.98479461669922, "num_tokens": 38, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -106.866943359375, "logits_per_token": -2.0259156478078744, "logits_per_char": -0.4555313290928948, "num_chars": 169}, {"sum_logits": -87.45917510986328, "num_tokens": 29, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -114.79515075683594, "logits_per_token": -3.015833624478044, "logits_per_char": -0.6779781016268471, "num_chars": 129}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 602, "native_id": 44593, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 33.33973693847656, "incorrect_loss_raw": 56.19807815551758, "correct_loss_per_char": 0.3584917950373824, "incorrect_loss_per_char": 0.7471343490544999, "correct_loss_per_token": 1.666986846923828, "incorrect_loss_per_token": 3.991421198665107, "correct_loss_uncond": -30.12250518798828, "incorrect_loss_uncond": -18.964462280273438}, "model_output": [{"sum_logits": -20.498950958251953, "num_tokens": 10, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -48.457279205322266, "logits_per_token": -2.049895095825195, "logits_per_char": -0.3942105953509991, "num_chars": 52}, {"sum_logits": -67.01168823242188, "num_tokens": 13, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -80.67891693115234, "logits_per_token": -5.154745248647837, "logits_per_char": -0.8702816653561283, "num_chars": 77}, {"sum_logits": -33.33973693847656, "num_tokens": 20, "num_tokens_all": 424, "is_greedy": false, "sum_logits_uncond": -63.462242126464844, "logits_per_token": -1.666986846923828, "logits_per_char": -0.3584917950373824, "num_chars": 93}, {"sum_logits": -81.0835952758789, "num_tokens": 17, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -96.35142517089844, "logits_per_token": -4.7696232515222885, "logits_per_char": -0.9769107864563724, "num_chars": 83}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 603, "native_id": 43820, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 118.42794799804688, "incorrect_loss_raw": 78.3182856241862, "correct_loss_per_char": 0.8904356992334351, "incorrect_loss_per_char": 0.865704038048278, "correct_loss_per_token": 4.083722344760237, "incorrect_loss_per_token": 3.720935045208847, "correct_loss_uncond": -53.435699462890625, "incorrect_loss_uncond": -29.838280995686848}, "model_output": [{"sum_logits": -137.89602661132812, "num_tokens": 35, "num_tokens_all": 447, "is_greedy": false, "sum_logits_uncond": -173.07907104492188, "logits_per_token": -3.939886474609375, "logits_per_char": -0.951007080078125, "num_chars": 145}, {"sum_logits": -118.42794799804688, "num_tokens": 29, "num_tokens_all": 441, "is_greedy": false, "sum_logits_uncond": -171.8636474609375, "logits_per_token": -4.083722344760237, "logits_per_char": -0.8904356992334351, "num_chars": 133}, {"sum_logits": -52.41813659667969, "num_tokens": 19, "num_tokens_all": 431, "is_greedy": false, "sum_logits_uncond": -98.49215698242188, "logits_per_token": -2.7588492945620886, "logits_per_char": -0.6315438144178276, "num_chars": 83}, {"sum_logits": -44.64069366455078, "num_tokens": 10, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -52.89847183227539, "logits_per_token": -4.464069366455078, "logits_per_char": -1.0145612196488814, "num_chars": 44}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 604, "native_id": 50235, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 69.69613647460938, "incorrect_loss_raw": 93.82383473714192, "correct_loss_per_char": 0.5087309231723312, "incorrect_loss_per_char": 0.5608665413053222, "correct_loss_per_token": 2.3232045491536457, "incorrect_loss_per_token": 2.7562716233457, "correct_loss_uncond": -20.922897338867188, "incorrect_loss_uncond": -19.6283442179362}, "model_output": [{"sum_logits": -69.69613647460938, "num_tokens": 30, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -90.61903381347656, "logits_per_token": -2.3232045491536457, "logits_per_char": -0.5087309231723312, "num_chars": 137}, {"sum_logits": -67.86112213134766, "num_tokens": 27, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -93.79959106445312, "logits_per_token": -2.513374893753617, "logits_per_char": -0.5180238330637226, "num_chars": 131}, {"sum_logits": -116.25051879882812, "num_tokens": 35, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -125.2455825805664, "logits_per_token": -3.321443394252232, "logits_per_char": -0.6494442390995985, "num_chars": 179}, {"sum_logits": -97.35986328125, "num_tokens": 40, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -121.31136322021484, "logits_per_token": -2.43399658203125, "logits_per_char": -0.5151315517526455, "num_chars": 189}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 605, "native_id": 16923, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 16.160633087158203, "incorrect_loss_raw": 50.95260111490885, "correct_loss_per_char": 0.40401582717895507, "incorrect_loss_per_char": 0.7950801688407544, "correct_loss_per_token": 1.7956258985731337, "incorrect_loss_per_token": 3.565397450418183, "correct_loss_uncond": -22.765625, "incorrect_loss_uncond": -12.926642100016275}, "model_output": [{"sum_logits": -16.160633087158203, "num_tokens": 9, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -38.9262580871582, "logits_per_token": -1.7956258985731337, "logits_per_char": -0.40401582717895507, "num_chars": 40}, {"sum_logits": -51.811553955078125, "num_tokens": 11, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -59.552310943603516, "logits_per_token": -4.710141268643466, "logits_per_char": -0.9963760375976562, "num_chars": 52}, {"sum_logits": -47.424888610839844, "num_tokens": 18, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -67.2669448852539, "logits_per_token": -2.634716033935547, "logits_per_char": -0.6003150457068335, "num_chars": 79}, {"sum_logits": -53.621360778808594, "num_tokens": 16, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -64.81847381591797, "logits_per_token": -3.351335048675537, "logits_per_char": -0.7885494232177734, "num_chars": 68}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 606, "native_id": 20682, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 99.6204833984375, "incorrect_loss_raw": 87.77727762858073, "correct_loss_per_char": 0.3816876758560824, "incorrect_loss_per_char": 0.48727840931323496, "correct_loss_per_token": 1.6603413899739583, "incorrect_loss_per_token": 2.366019290813656, "correct_loss_uncond": -39.79869079589844, "incorrect_loss_uncond": -31.030059814453125}, "model_output": [{"sum_logits": -72.60809326171875, "num_tokens": 36, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -100.7186279296875, "logits_per_token": -2.0168914794921875, "logits_per_char": -0.4509819457249612, "num_chars": 161}, {"sum_logits": -96.41230773925781, "num_tokens": 34, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -122.22091674804688, "logits_per_token": -2.8356561099781707, "logits_per_char": -0.5416421783104372, "num_chars": 178}, {"sum_logits": -94.31143188476562, "num_tokens": 42, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -133.4824676513672, "logits_per_token": -2.24551028297061, "logits_per_char": -0.4692111039043066, "num_chars": 201}, {"sum_logits": -99.6204833984375, "num_tokens": 60, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -139.41917419433594, "logits_per_token": -1.6603413899739583, "logits_per_char": -0.3816876758560824, "num_chars": 261}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 607, "native_id": 42712, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 56.700653076171875, "incorrect_loss_raw": 41.56620852152506, "correct_loss_per_char": 1.1340130615234374, "incorrect_loss_per_char": 1.0395648927349859, "correct_loss_per_token": 4.725054423014323, "incorrect_loss_per_token": 4.638437554330537, "correct_loss_uncond": -25.56517791748047, "incorrect_loss_uncond": -14.566001256306967}, "model_output": [{"sum_logits": -47.9437370300293, "num_tokens": 11, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -68.46221923828125, "logits_per_token": -4.3585215481844815, "logits_per_char": -0.9045988118873453, "num_chars": 53}, {"sum_logits": -56.700653076171875, "num_tokens": 12, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -82.26583099365234, "logits_per_token": -4.725054423014323, "logits_per_char": -1.1340130615234374, "num_chars": 50}, {"sum_logits": -48.53535461425781, "num_tokens": 10, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -56.997840881347656, "logits_per_token": -4.853535461425781, "logits_per_char": -1.128729177075763, "num_chars": 43}, {"sum_logits": -28.219533920288086, "num_tokens": 6, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -42.93656921386719, "logits_per_token": -4.703255653381348, "logits_per_char": -1.0853666892418494, "num_chars": 26}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 608, "native_id": 14926, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 40.547119140625, "incorrect_loss_raw": 34.50262705485026, "correct_loss_per_char": 0.5710861850792254, "incorrect_loss_per_char": 0.6077489607714955, "correct_loss_per_token": 2.8962227957589284, "incorrect_loss_per_token": 2.5197405333470813, "correct_loss_uncond": -27.478073120117188, "incorrect_loss_uncond": -28.859729766845703}, "model_output": [{"sum_logits": -40.547119140625, "num_tokens": 14, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -68.02519226074219, "logits_per_token": -2.8962227957589284, "logits_per_char": -0.5710861850792254, "num_chars": 71}, {"sum_logits": -41.44309997558594, "num_tokens": 15, "num_tokens_all": 424, "is_greedy": false, "sum_logits_uncond": -67.98866271972656, "logits_per_token": -2.762873331705729, "logits_per_char": -0.6279257572058475, "num_chars": 66}, {"sum_logits": -29.775840759277344, "num_tokens": 16, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -69.79988098144531, "logits_per_token": -1.860990047454834, "logits_per_char": -0.44441553372055737, "num_chars": 67}, {"sum_logits": -32.2889404296875, "num_tokens": 11, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -52.298526763916016, "logits_per_token": -2.9353582208806817, "logits_per_char": -0.7509055913880814, "num_chars": 43}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 609, "native_id": 37977, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 79.15060424804688, "incorrect_loss_raw": 155.04043579101562, "correct_loss_per_char": 0.5276706949869792, "incorrect_loss_per_char": 0.8109007226787495, "correct_loss_per_token": 2.3985031590317236, "incorrect_loss_per_token": 4.156206509096831, "correct_loss_uncond": -27.746490478515625, "incorrect_loss_uncond": -7.668141682942708}, "model_output": [{"sum_logits": -128.0333251953125, "num_tokens": 29, "num_tokens_all": 437, "is_greedy": false, "sum_logits_uncond": -128.68048095703125, "logits_per_token": -4.414942248114224, "logits_per_char": -0.9277777188066123, "num_chars": 138}, {"sum_logits": -188.98907470703125, "num_tokens": 53, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -210.34194946289062, "logits_per_token": -3.5658315982458726, "logits_per_char": -0.7213323462100429, "num_chars": 262}, {"sum_logits": -79.15060424804688, "num_tokens": 33, "num_tokens_all": 441, "is_greedy": false, "sum_logits_uncond": -106.8970947265625, "logits_per_token": -2.3985031590317236, "logits_per_char": -0.5276706949869792, "num_chars": 150}, {"sum_logits": -148.09890747070312, "num_tokens": 33, "num_tokens_all": 441, "is_greedy": false, "sum_logits_uncond": -149.10330200195312, "logits_per_token": -4.4878456809303975, "logits_per_char": -0.7835921030195933, "num_chars": 189}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 610, "native_id": 22086, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 57.901145935058594, "incorrect_loss_raw": 83.03484344482422, "correct_loss_per_char": 0.37355578022618446, "incorrect_loss_per_char": 0.5128230760486331, "correct_loss_per_token": 1.6083651648627386, "incorrect_loss_per_token": 2.4013362976324237, "correct_loss_uncond": -25.964981079101562, "incorrect_loss_uncond": -30.089492797851562}, "model_output": [{"sum_logits": -107.45339965820312, "num_tokens": 42, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -139.77706909179688, "logits_per_token": -2.558414277576265, "logits_per_char": -0.5510430751702724, "num_chars": 195}, {"sum_logits": -57.901145935058594, "num_tokens": 36, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -83.86612701416016, "logits_per_token": -1.6083651648627386, "logits_per_char": -0.37355578022618446, "num_chars": 155}, {"sum_logits": -70.78205871582031, "num_tokens": 31, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -96.0460433959961, "logits_per_token": -2.283292216639365, "logits_per_char": -0.48480862134123504, "num_chars": 146}, {"sum_logits": -70.86907196044922, "num_tokens": 30, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -103.54989624023438, "logits_per_token": -2.3623023986816407, "logits_per_char": -0.5026175316343916, "num_chars": 141}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 611, "native_id": 7515, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 22.01767349243164, "incorrect_loss_raw": 48.30756123860677, "correct_loss_per_char": 0.41542780174399324, "incorrect_loss_per_char": 0.7661584634149655, "correct_loss_per_token": 1.8348061243693035, "incorrect_loss_per_token": 3.460319852651528, "correct_loss_uncond": -26.546642303466797, "incorrect_loss_uncond": -20.50969950358073}, "model_output": [{"sum_logits": -42.14539337158203, "num_tokens": 12, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -55.114261627197266, "logits_per_token": -3.5121161142985025, "logits_per_char": -0.7266447133031385, "num_chars": 58}, {"sum_logits": -22.01767349243164, "num_tokens": 12, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -48.56431579589844, "logits_per_token": -1.8348061243693035, "logits_per_char": -0.41542780174399324, "num_chars": 53}, {"sum_logits": -38.1297607421875, "num_tokens": 11, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -62.25869369506836, "logits_per_token": -3.466341885653409, "logits_per_char": -0.8112715051529256, "num_chars": 47}, {"sum_logits": -64.64752960205078, "num_tokens": 19, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -89.07882690429688, "logits_per_token": -3.4025015580026725, "logits_per_char": -0.7605591717888327, "num_chars": 85}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 612, "native_id": 32025, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 22.762592315673828, "incorrect_loss_raw": 29.194514592488606, "correct_loss_per_char": 0.5173316435380415, "incorrect_loss_per_char": 0.5652052793386835, "correct_loss_per_token": 2.2762592315673826, "incorrect_loss_per_token": 2.3492989199502126, "correct_loss_uncond": -21.574054718017578, "incorrect_loss_uncond": -22.619848251342773}, "model_output": [{"sum_logits": -22.762592315673828, "num_tokens": 10, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -44.336647033691406, "logits_per_token": -2.2762592315673826, "logits_per_char": -0.5173316435380415, "num_chars": 44}, {"sum_logits": -43.8089485168457, "num_tokens": 16, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -66.0672607421875, "logits_per_token": -2.7380592823028564, "logits_per_char": -0.6349122973455898, "num_chars": 69}, {"sum_logits": -27.21146583557129, "num_tokens": 14, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -55.00993347167969, "logits_per_token": -1.943676131112235, "logits_per_char": -0.46916320406157397, "num_chars": 58}, {"sum_logits": -16.563129425048828, "num_tokens": 7, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -34.36589431762695, "logits_per_token": -2.366161346435547, "logits_per_char": -0.5915403366088867, "num_chars": 28}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 613, "native_id": 8721, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 65.47509765625, "incorrect_loss_raw": 83.29701741536458, "correct_loss_per_char": 0.48500072337962963, "incorrect_loss_per_char": 0.6715679805157985, "correct_loss_per_token": 2.1120999243951615, "incorrect_loss_per_token": 2.7771283869157757, "correct_loss_uncond": -62.89323425292969, "incorrect_loss_uncond": -24.48859405517578}, "model_output": [{"sum_logits": -106.20262908935547, "num_tokens": 38, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -122.00595092773438, "logits_per_token": -2.7948060286672494, "logits_per_char": -0.6896274616191913, "num_chars": 154}, {"sum_logits": -75.67366790771484, "num_tokens": 28, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -115.79293823242188, "logits_per_token": -2.7026309967041016, "logits_per_char": -0.5776615870817927, "num_chars": 131}, {"sum_logits": -65.47509765625, "num_tokens": 31, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -128.3683319091797, "logits_per_token": -2.1120999243951615, "logits_per_char": -0.48500072337962963, "num_chars": 135}, {"sum_logits": -68.01475524902344, "num_tokens": 24, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -85.55794525146484, "logits_per_token": -2.8339481353759766, "logits_per_char": -0.7474148928464114, "num_chars": 91}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 614, "native_id": 36666, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 114.99139404296875, "incorrect_loss_raw": 87.3389892578125, "correct_loss_per_char": 0.605217863384046, "incorrect_loss_per_char": 0.5479689218187466, "correct_loss_per_token": 2.4998129139775815, "incorrect_loss_per_token": 2.6504535795658044, "correct_loss_uncond": -15.392898559570312, "incorrect_loss_uncond": -16.58356221516927}, "model_output": [{"sum_logits": -114.99139404296875, "num_tokens": 46, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -130.38429260253906, "logits_per_token": -2.4998129139775815, "logits_per_char": -0.605217863384046, "num_chars": 190}, {"sum_logits": -50.078208923339844, "num_tokens": 18, "num_tokens_all": 432, "is_greedy": false, "sum_logits_uncond": -74.18203735351562, "logits_per_token": -2.7821227179633246, "logits_per_char": -0.5271390412983141, "num_chars": 95}, {"sum_logits": -83.83576202392578, "num_tokens": 41, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -100.55491638183594, "logits_per_token": -2.044774683510385, "logits_per_char": -0.4631810056570485, "num_chars": 181}, {"sum_logits": -128.10299682617188, "num_tokens": 41, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -137.03070068359375, "logits_per_token": -3.124463337223704, "logits_per_char": -0.6535867185008769, "num_chars": 196}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 615, "native_id": 6627, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 47.35261535644531, "incorrect_loss_raw": 67.1441879272461, "correct_loss_per_char": 0.7285017747145432, "incorrect_loss_per_char": 0.6400290762062052, "correct_loss_per_token": 2.959538459777832, "incorrect_loss_per_token": 3.2668464834039863, "correct_loss_uncond": -25.703994750976562, "incorrect_loss_uncond": -23.85149637858073}, "model_output": [{"sum_logits": -84.90165710449219, "num_tokens": 24, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -104.62517547607422, "logits_per_token": -3.537569046020508, "logits_per_char": -0.7016665876404313, "num_chars": 121}, {"sum_logits": -56.67851257324219, "num_tokens": 16, "num_tokens_all": 437, "is_greedy": false, "sum_logits_uncond": -82.24998474121094, "logits_per_token": -3.5424070358276367, "logits_per_char": -0.6590524717818859, "num_chars": 86}, {"sum_logits": -47.35261535644531, "num_tokens": 16, "num_tokens_all": 437, "is_greedy": false, "sum_logits_uncond": -73.05661010742188, "logits_per_token": -2.959538459777832, "logits_per_char": -0.7285017747145432, "num_chars": 65}, {"sum_logits": -59.852394104003906, "num_tokens": 22, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -86.11189270019531, "logits_per_token": -2.720563368363814, "logits_per_char": -0.5593681691962982, "num_chars": 107}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 616, "native_id": 17964, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 71.12238311767578, "incorrect_loss_raw": 141.704470316569, "correct_loss_per_char": 0.5347547602832765, "incorrect_loss_per_char": 0.8694812476928373, "correct_loss_per_token": 2.370746103922526, "incorrect_loss_per_token": 3.3316938273111982, "correct_loss_uncond": -34.04199981689453, "incorrect_loss_uncond": -23.598719278971355}, "model_output": [{"sum_logits": -71.12238311767578, "num_tokens": 30, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -105.16438293457031, "logits_per_token": -2.370746103922526, "logits_per_char": -0.5347547602832765, "num_chars": 133}, {"sum_logits": -74.64066314697266, "num_tokens": 25, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -106.0505599975586, "logits_per_token": -2.985626525878906, "logits_per_char": -0.7176986841055063, "num_chars": 104}, {"sum_logits": -171.5728759765625, "num_tokens": 50, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -185.565185546875, "logits_per_token": -3.43145751953125, "logits_per_char": -0.9917507281882225, "num_chars": 173}, {"sum_logits": -178.89987182617188, "num_tokens": 50, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -204.2938232421875, "logits_per_token": -3.5779974365234377, "logits_per_char": -0.8989943307847833, "num_chars": 199}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 617, "native_id": 37505, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 52.5560302734375, "incorrect_loss_raw": 90.0762710571289, "correct_loss_per_char": 0.5839558919270833, "incorrect_loss_per_char": 0.6210697232615878, "correct_loss_per_token": 2.7661068564967106, "incorrect_loss_per_token": 2.5839628726901633, "correct_loss_uncond": -21.571243286132812, "incorrect_loss_uncond": -24.971059163411457}, "model_output": [{"sum_logits": -114.65169525146484, "num_tokens": 41, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -143.63864135742188, "logits_per_token": -2.7963828110113376, "logits_per_char": -0.6865370973141608, "num_chars": 167}, {"sum_logits": -84.11477661132812, "num_tokens": 35, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -106.33147430419922, "logits_per_token": -2.4032793317522323, "logits_per_char": -0.6139764716155338, "num_chars": 137}, {"sum_logits": -71.46234130859375, "num_tokens": 28, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -95.171875, "logits_per_token": -2.5522264753069197, "logits_per_char": -0.5626956008550689, "num_chars": 127}, {"sum_logits": -52.5560302734375, "num_tokens": 19, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -74.12727355957031, "logits_per_token": -2.7661068564967106, "logits_per_char": -0.5839558919270833, "num_chars": 90}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 618, "native_id": 14156, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.206966400146484, "incorrect_loss_raw": 23.070074717203777, "correct_loss_per_char": 0.471677371433803, "incorrect_loss_per_char": 0.6049794676181101, "correct_loss_per_token": 1.886709485735212, "incorrect_loss_per_token": 2.7504661065560803, "correct_loss_uncond": -16.933082580566406, "incorrect_loss_uncond": -16.275549570719402}, "model_output": [{"sum_logits": -13.206966400146484, "num_tokens": 7, "num_tokens_all": 391, "is_greedy": false, "sum_logits_uncond": -30.14004898071289, "logits_per_token": -1.886709485735212, "logits_per_char": -0.471677371433803, "num_chars": 28}, {"sum_logits": -19.17416763305664, "num_tokens": 9, "num_tokens_all": 393, "is_greedy": false, "sum_logits_uncond": -35.601051330566406, "logits_per_token": -2.130463070339627, "logits_per_char": -0.4676626251965034, "num_chars": 41}, {"sum_logits": -23.96503257751465, "num_tokens": 10, "num_tokens_all": 394, "is_greedy": false, "sum_logits_uncond": -42.08190155029297, "logits_per_token": -2.3965032577514647, "logits_per_char": -0.5325562795003255, "num_chars": 45}, {"sum_logits": -26.07102394104004, "num_tokens": 7, "num_tokens_all": 391, "is_greedy": false, "sum_logits_uncond": -40.353919982910156, "logits_per_token": -3.7244319915771484, "logits_per_char": -0.8147194981575012, "num_chars": 32}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 619, "native_id": 9655, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 118.66876220703125, "incorrect_loss_raw": 138.60543314615884, "correct_loss_per_char": 0.5545269262010806, "incorrect_loss_per_char": 0.7274041831871351, "correct_loss_per_token": 2.966719055175781, "incorrect_loss_per_token": 3.8356634606200277, "correct_loss_uncond": -41.52081298828125, "incorrect_loss_uncond": -17.767361958821613}, "model_output": [{"sum_logits": -181.3341522216797, "num_tokens": 53, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -200.97726440429688, "logits_per_token": -3.4213990985222584, "logits_per_char": -0.7311860976680633, "num_chars": 248}, {"sum_logits": -118.66876220703125, "num_tokens": 40, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -160.1895751953125, "logits_per_token": -2.966719055175781, "logits_per_char": -0.5545269262010806, "num_chars": 214}, {"sum_logits": -120.77385711669922, "num_tokens": 29, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -147.8704833984375, "logits_per_token": -4.164615762644801, "logits_per_char": -0.774191391773713, "num_chars": 156}, {"sum_logits": -113.70829010009766, "num_tokens": 29, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -120.27063751220703, "logits_per_token": -3.920975520693023, "logits_per_char": -0.6768350601196289, "num_chars": 168}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 620, "native_id": 38639, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 93.24803161621094, "incorrect_loss_raw": 108.62035878499348, "correct_loss_per_char": 0.5617351302181381, "incorrect_loss_per_char": 0.7331684501707579, "correct_loss_per_token": 2.590223100450304, "incorrect_loss_per_token": 3.215855303718396, "correct_loss_uncond": -13.873947143554688, "incorrect_loss_uncond": -4.778129577636719}, "model_output": [{"sum_logits": -89.49005889892578, "num_tokens": 37, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -90.20936584472656, "logits_per_token": -2.4186502405115076, "logits_per_char": -0.7395872636274858, "num_chars": 121}, {"sum_logits": -159.685791015625, "num_tokens": 41, "num_tokens_all": 447, "is_greedy": false, "sum_logits_uncond": -166.61322021484375, "logits_per_token": -3.894775390625, "logits_per_char": -0.7225601403421946, "num_chars": 221}, {"sum_logits": -93.24803161621094, "num_tokens": 36, "num_tokens_all": 442, "is_greedy": false, "sum_logits_uncond": -107.12197875976562, "logits_per_token": -2.590223100450304, "logits_per_char": -0.5617351302181381, "num_chars": 166}, {"sum_logits": -76.68522644042969, "num_tokens": 23, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -83.37287902832031, "logits_per_token": -3.334140280018682, "logits_per_char": -0.7373579465425931, "num_chars": 104}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 621, "native_id": 10338, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 25.32595443725586, "incorrect_loss_raw": 34.05946159362793, "correct_loss_per_char": 0.6029989151727586, "incorrect_loss_per_char": 0.5674823655368969, "correct_loss_per_token": 2.5325954437255858, "incorrect_loss_per_token": 2.99812573932466, "correct_loss_uncond": -25.747161865234375, "incorrect_loss_uncond": -22.21861203511556}, "model_output": [{"sum_logits": -25.32595443725586, "num_tokens": 10, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -51.073116302490234, "logits_per_token": -2.5325954437255858, "logits_per_char": -0.6029989151727586, "num_chars": 42}, {"sum_logits": -29.847536087036133, "num_tokens": 10, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -40.020713806152344, "logits_per_token": -2.9847536087036133, "logits_per_char": -0.5631610582459647, "num_chars": 53}, {"sum_logits": -42.821144104003906, "num_tokens": 14, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -70.30719757080078, "logits_per_token": -3.0586531502859935, "logits_per_char": -0.639121553791103, "num_chars": 67}, {"sum_logits": -29.50970458984375, "num_tokens": 10, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -58.506309509277344, "logits_per_token": -2.950970458984375, "logits_per_char": -0.5001644845736228, "num_chars": 59}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 622, "native_id": 25821, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 99.11689758300781, "incorrect_loss_raw": 60.67830912272135, "correct_loss_per_char": 0.593514356784478, "incorrect_loss_per_char": 0.5817469164557743, "correct_loss_per_token": 2.4174853069026296, "incorrect_loss_per_token": 2.4303904958238007, "correct_loss_uncond": -18.6607666015625, "incorrect_loss_uncond": -22.426129659016926}, "model_output": [{"sum_logits": -48.559791564941406, "num_tokens": 22, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -72.71400451660156, "logits_per_token": -2.2072632529518823, "logits_per_char": -0.5518158132379706, "num_chars": 88}, {"sum_logits": -66.47576141357422, "num_tokens": 29, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -83.52113342285156, "logits_per_token": -2.2922676349508353, "logits_per_char": -0.5234311922328678, "num_chars": 127}, {"sum_logits": -99.11689758300781, "num_tokens": 41, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -117.77766418457031, "logits_per_token": -2.4174853069026296, "logits_per_char": -0.593514356784478, "num_chars": 167}, {"sum_logits": -66.99937438964844, "num_tokens": 24, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -93.07817840576172, "logits_per_token": -2.791640599568685, "logits_per_char": -0.6699937438964844, "num_chars": 100}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 623, "native_id": 34297, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 63.002166748046875, "incorrect_loss_raw": 66.66725158691406, "correct_loss_per_char": 0.5384800576756144, "incorrect_loss_per_char": 0.46748869534962695, "correct_loss_per_token": 2.1724885085533403, "incorrect_loss_per_token": 1.9122460902831726, "correct_loss_uncond": -18.705764770507812, "incorrect_loss_uncond": -29.675621032714844}, "model_output": [{"sum_logits": -54.42512512207031, "num_tokens": 35, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -84.8363037109375, "logits_per_token": -1.5550035749162947, "logits_per_char": -0.4001847435446346, "num_chars": 136}, {"sum_logits": -61.15769958496094, "num_tokens": 36, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -89.10894775390625, "logits_per_token": -1.698824988471137, "logits_per_char": -0.4160387726868091, "num_chars": 147}, {"sum_logits": -63.002166748046875, "num_tokens": 29, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -81.70793151855469, "logits_per_token": -2.1724885085533403, "logits_per_char": -0.5384800576756144, "num_chars": 117}, {"sum_logits": -84.41893005371094, "num_tokens": 34, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -115.08336639404297, "logits_per_token": -2.4829097074620865, "logits_per_char": -0.586242569817437, "num_chars": 144}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 624, "native_id": 7012, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 25.452682495117188, "incorrect_loss_raw": 37.26524798075358, "correct_loss_per_char": 0.6207971340272485, "incorrect_loss_per_char": 0.6848562824829982, "correct_loss_per_token": 2.3138802268288354, "incorrect_loss_per_token": 2.613154451797245, "correct_loss_uncond": -18.668136596679688, "incorrect_loss_uncond": -23.439672470092773}, "model_output": [{"sum_logits": -29.971750259399414, "num_tokens": 17, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -60.792076110839844, "logits_per_token": -1.763044132905848, "logits_per_char": -0.4343731921652089, "num_chars": 69}, {"sum_logits": -42.196380615234375, "num_tokens": 13, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -60.93622589111328, "logits_per_token": -3.245875431941106, "logits_per_char": -0.8114688579852765, "num_chars": 52}, {"sum_logits": -39.62761306762695, "num_tokens": 14, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -60.38645935058594, "logits_per_token": -2.8305437905447826, "logits_per_char": -0.8087267972985093, "num_chars": 49}, {"sum_logits": -25.452682495117188, "num_tokens": 11, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -44.120819091796875, "logits_per_token": -2.3138802268288354, "logits_per_char": -0.6207971340272485, "num_chars": 41}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 625, "native_id": 11391, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 96.13314819335938, "incorrect_loss_raw": 122.21703847249348, "correct_loss_per_char": 0.39724441402214616, "incorrect_loss_per_char": 0.5535519838529765, "correct_loss_per_token": 1.7166633605957031, "incorrect_loss_per_token": 2.7758273791241392, "correct_loss_uncond": -33.59942626953125, "incorrect_loss_uncond": -13.828948974609375}, "model_output": [{"sum_logits": -162.24696350097656, "num_tokens": 60, "num_tokens_all": 497, "is_greedy": false, "sum_logits_uncond": -178.32643127441406, "logits_per_token": -2.7041160583496096, "logits_per_char": -0.5899889581853693, "num_chars": 275}, {"sum_logits": -81.07695770263672, "num_tokens": 31, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -93.29833221435547, "logits_per_token": -2.6153857323431198, "logits_per_char": -0.49437169330876046, "num_chars": 164}, {"sum_logits": -96.13314819335938, "num_tokens": 56, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -129.73257446289062, "logits_per_token": -1.7166633605957031, "logits_per_char": -0.39724441402214616, "num_chars": 242}, {"sum_logits": -123.32719421386719, "num_tokens": 41, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -136.51319885253906, "logits_per_token": -3.0079803466796875, "logits_per_char": -0.5762953000647999, "num_chars": 214}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 626, "native_id": 45703, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.293248176574707, "incorrect_loss_raw": 28.739094416300457, "correct_loss_per_char": 0.23036800490485299, "incorrect_loss_per_char": 0.6772838015134978, "correct_loss_per_token": 1.0366560220718384, "incorrect_loss_per_token": 3.049571079838557, "correct_loss_uncond": -27.8944730758667, "incorrect_loss_uncond": -16.351477940877277}, "model_output": [{"sum_logits": -36.50395965576172, "num_tokens": 11, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -44.861427307128906, "logits_per_token": -3.318541786887429, "logits_per_char": -0.8903404794088224, "num_chars": 41}, {"sum_logits": -8.293248176574707, "num_tokens": 8, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -36.187721252441406, "logits_per_token": -1.0366560220718384, "logits_per_char": -0.23036800490485299, "num_chars": 36}, {"sum_logits": -27.647567749023438, "num_tokens": 9, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -47.895355224609375, "logits_per_token": -3.071951972113715, "logits_per_char": -0.6283538124778054, "num_chars": 44}, {"sum_logits": -22.06575584411621, "num_tokens": 8, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -42.51493453979492, "logits_per_token": -2.7582194805145264, "logits_per_char": -0.5131571126538653, "num_chars": 43}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 627, "native_id": 4073, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 88.96749114990234, "incorrect_loss_raw": 149.16588846842447, "correct_loss_per_char": 0.49153310027570357, "incorrect_loss_per_char": 0.7713076253112741, "correct_loss_per_token": 2.281217721792368, "incorrect_loss_per_token": 3.337356048621775, "correct_loss_uncond": -26.642959594726562, "incorrect_loss_uncond": -26.53723653157552}, "model_output": [{"sum_logits": -88.96749114990234, "num_tokens": 39, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -115.6104507446289, "logits_per_token": -2.281217721792368, "logits_per_char": -0.49153310027570357, "num_chars": 181}, {"sum_logits": -124.03997802734375, "num_tokens": 42, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -145.0252685546875, "logits_per_token": -2.9533328101748513, "logits_per_char": -0.626464535491635, "num_chars": 198}, {"sum_logits": -136.64303588867188, "num_tokens": 47, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -176.25546264648438, "logits_per_token": -2.907298635929189, "logits_per_char": -0.8383008336728336, "num_chars": 163}, {"sum_logits": -186.8146514892578, "num_tokens": 45, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -205.82864379882812, "logits_per_token": -4.151436699761285, "logits_per_char": -0.8491575067693536, "num_chars": 220}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 628, "native_id": 35418, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 52.08894348144531, "incorrect_loss_raw": 106.9597880045573, "correct_loss_per_char": 0.62757763230657, "incorrect_loss_per_char": 0.6056370962799665, "correct_loss_per_token": 2.367679249156605, "incorrect_loss_per_token": 2.7626993253035246, "correct_loss_uncond": -14.50299072265625, "incorrect_loss_uncond": -22.790257771809895}, "model_output": [{"sum_logits": -52.08894348144531, "num_tokens": 22, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -66.59193420410156, "logits_per_token": -2.367679249156605, "logits_per_char": -0.62757763230657, "num_chars": 83}, {"sum_logits": -144.35166931152344, "num_tokens": 48, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -166.8617401123047, "logits_per_token": -3.0073264439900718, "logits_per_char": -0.6745405108015113, "num_chars": 214}, {"sum_logits": -72.42304992675781, "num_tokens": 24, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -85.44503784179688, "logits_per_token": -3.0176270802815757, "logits_per_char": -0.6466343743460519, "num_chars": 112}, {"sum_logits": -104.10464477539062, "num_tokens": 46, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -136.943359375, "logits_per_token": -2.2631444516389267, "logits_per_char": -0.4957364036923363, "num_chars": 210}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 629, "native_id": 45850, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 68.16474151611328, "incorrect_loss_raw": 75.24548212687175, "correct_loss_per_char": 0.457481486685324, "incorrect_loss_per_char": 0.5593798072656627, "correct_loss_per_token": 1.9475640433175223, "incorrect_loss_per_token": 2.430332059804459, "correct_loss_uncond": -39.94209289550781, "incorrect_loss_uncond": -25.498289744059246}, "model_output": [{"sum_logits": -68.16474151611328, "num_tokens": 35, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -108.1068344116211, "logits_per_token": -1.9475640433175223, "logits_per_char": -0.457481486685324, "num_chars": 149}, {"sum_logits": -56.5646858215332, "num_tokens": 25, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -84.99270629882812, "logits_per_token": -2.262587432861328, "logits_per_char": -0.5438912098224347, "num_chars": 104}, {"sum_logits": -87.01761627197266, "num_tokens": 38, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -113.08445739746094, "logits_per_token": -2.28993727031507, "logits_per_char": -0.5118683310116039, "num_chars": 170}, {"sum_logits": -82.15414428710938, "num_tokens": 30, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -104.1541519165039, "logits_per_token": -2.7384714762369793, "logits_per_char": -0.6223798809629498, "num_chars": 132}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 630, "native_id": 14561, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 12.387439727783203, "incorrect_loss_raw": 12.543815930684408, "correct_loss_per_char": 0.5385843359905741, "incorrect_loss_per_char": 0.5172020059161716, "correct_loss_per_token": 2.064573287963867, "incorrect_loss_per_token": 2.0906359884474015, "correct_loss_uncond": -26.730148315429688, "incorrect_loss_uncond": -18.92377249399821}, "model_output": [{"sum_logits": -12.387439727783203, "num_tokens": 6, "num_tokens_all": 393, "is_greedy": false, "sum_logits_uncond": -39.11758804321289, "logits_per_token": -2.064573287963867, "logits_per_char": -0.5385843359905741, "num_chars": 23}, {"sum_logits": -18.181514739990234, "num_tokens": 6, "num_tokens_all": 393, "is_greedy": false, "sum_logits_uncond": -32.03660202026367, "logits_per_token": -3.030252456665039, "logits_per_char": -0.7575631141662598, "num_chars": 24}, {"sum_logits": -9.627348899841309, "num_tokens": 6, "num_tokens_all": 393, "is_greedy": false, "sum_logits_uncond": -33.68586730957031, "logits_per_token": -1.6045581499735515, "logits_per_char": -0.4011395374933879, "num_chars": 24}, {"sum_logits": -9.82258415222168, "num_tokens": 6, "num_tokens_all": 393, "is_greedy": false, "sum_logits_uncond": -28.680295944213867, "logits_per_token": -1.6370973587036133, "logits_per_char": -0.39290336608886717, "num_chars": 25}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 631, "native_id": 12125, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 72.52726745605469, "incorrect_loss_raw": 108.31709798177083, "correct_loss_per_char": 0.5107554046201034, "incorrect_loss_per_char": 0.6356535374289222, "correct_loss_per_token": 2.4175755818684896, "incorrect_loss_per_token": 3.0205258834822923, "correct_loss_uncond": -32.18341827392578, "incorrect_loss_uncond": -15.684377034505209}, "model_output": [{"sum_logits": -119.27987670898438, "num_tokens": 33, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -133.62594604492188, "logits_per_token": -3.614541718454072, "logits_per_char": -0.769547591670867, "num_chars": 155}, {"sum_logits": -120.52389526367188, "num_tokens": 48, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -132.88987731933594, "logits_per_token": -2.5109144846598306, "logits_per_char": -0.6244761412625486, "num_chars": 193}, {"sum_logits": -72.52726745605469, "num_tokens": 30, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -104.71068572998047, "logits_per_token": -2.4175755818684896, "logits_per_char": -0.5107554046201034, "num_chars": 142}, {"sum_logits": -85.14752197265625, "num_tokens": 29, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -105.48860168457031, "logits_per_token": -2.936121447332974, "logits_per_char": -0.5129368793533509, "num_chars": 166}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 632, "native_id": 5694, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 80.81341552734375, "incorrect_loss_raw": 119.08711242675781, "correct_loss_per_char": 0.7414074819022363, "incorrect_loss_per_char": 0.7012621860676909, "correct_loss_per_token": 3.3672256469726562, "incorrect_loss_per_token": 3.027340368298205, "correct_loss_uncond": -22.970962524414062, "incorrect_loss_uncond": -12.326136271158854}, "model_output": [{"sum_logits": -80.81341552734375, "num_tokens": 24, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -103.78437805175781, "logits_per_token": -3.3672256469726562, "logits_per_char": -0.7414074819022363, "num_chars": 109}, {"sum_logits": -84.411865234375, "num_tokens": 30, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -92.98628234863281, "logits_per_token": -2.813728841145833, "logits_per_char": -0.5742303757440477, "num_chars": 147}, {"sum_logits": -127.0816650390625, "num_tokens": 43, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -143.9521026611328, "logits_per_token": -2.9553875590479652, "logits_per_char": -0.7060092502170139, "num_chars": 180}, {"sum_logits": -145.76780700683594, "num_tokens": 44, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -157.30136108398438, "logits_per_token": -3.312904704700817, "logits_per_char": -0.8235469322420109, "num_chars": 177}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 633, "native_id": 30116, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 88.53792572021484, "incorrect_loss_raw": 70.55416615804036, "correct_loss_per_char": 0.6148467063903809, "incorrect_loss_per_char": 0.618140515768684, "correct_loss_per_token": 2.392916911357158, "incorrect_loss_per_token": 2.619535442705151, "correct_loss_uncond": -31.90526580810547, "incorrect_loss_uncond": -32.89777374267578}, "model_output": [{"sum_logits": -88.53792572021484, "num_tokens": 37, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -120.44319152832031, "logits_per_token": -2.392916911357158, "logits_per_char": -0.6148467063903809, "num_chars": 144}, {"sum_logits": -72.58627319335938, "num_tokens": 21, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -106.1417236328125, "logits_per_token": -3.4564891996837797, "logits_per_char": -0.844026432480923, "num_chars": 86}, {"sum_logits": -56.25952911376953, "num_tokens": 26, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -95.38595581054688, "logits_per_token": -2.16382804283729, "logits_per_char": -0.5023172242300851, "num_chars": 112}, {"sum_logits": -82.81669616699219, "num_tokens": 37, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -108.82814025878906, "logits_per_token": -2.2382890855943836, "logits_per_char": -0.508077890595044, "num_chars": 163}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 634, "native_id": 31760, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 55.34204864501953, "incorrect_loss_raw": 85.13551584879558, "correct_loss_per_char": 0.4770866262501684, "incorrect_loss_per_char": 0.5111975983626595, "correct_loss_per_token": 2.128540332500751, "incorrect_loss_per_token": 2.6368659500860923, "correct_loss_uncond": -36.075233459472656, "incorrect_loss_uncond": -17.230506896972656}, "model_output": [{"sum_logits": -74.08819580078125, "num_tokens": 30, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -87.47557830810547, "logits_per_token": -2.4696065266927083, "logits_per_char": -0.5368709840636322, "num_chars": 138}, {"sum_logits": -120.77743530273438, "num_tokens": 43, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -145.00823974609375, "logits_per_token": -2.808777565179869, "logits_per_char": -0.527412381234648, "num_chars": 229}, {"sum_logits": -55.34204864501953, "num_tokens": 26, "num_tokens_all": 439, "is_greedy": false, "sum_logits_uncond": -91.41728210449219, "logits_per_token": -2.128540332500751, "logits_per_char": -0.4770866262501684, "num_chars": 116}, {"sum_logits": -60.540916442871094, "num_tokens": 23, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -74.61425018310547, "logits_per_token": -2.6322137583856997, "logits_per_char": -0.4693094297896984, "num_chars": 129}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 635, "native_id": 43950, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 54.38109588623047, "incorrect_loss_raw": 65.64183553059895, "correct_loss_per_char": 0.776872798374721, "incorrect_loss_per_char": 0.7926359015380976, "correct_loss_per_token": 3.6254063924153646, "incorrect_loss_per_token": 3.3874893209786525, "correct_loss_uncond": -23.652481079101562, "incorrect_loss_uncond": -19.118932088216145}, "model_output": [{"sum_logits": -48.05492401123047, "num_tokens": 17, "num_tokens_all": 432, "is_greedy": false, "sum_logits_uncond": -70.75624084472656, "logits_per_token": -2.8267602359547332, "logits_per_char": -0.6323016317267167, "num_chars": 76}, {"sum_logits": -45.67970275878906, "num_tokens": 13, "num_tokens_all": 428, "is_greedy": false, "sum_logits_uncond": -66.12865447998047, "logits_per_token": -3.51382328913762, "logits_per_char": -0.878455822284405, "num_chars": 52}, {"sum_logits": -103.19087982177734, "num_tokens": 27, "num_tokens_all": 442, "is_greedy": false, "sum_logits_uncond": -117.39740753173828, "logits_per_token": -3.821884437843605, "logits_per_char": -0.867150250603171, "num_chars": 119}, {"sum_logits": -54.38109588623047, "num_tokens": 15, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -78.03357696533203, "logits_per_token": -3.6254063924153646, "logits_per_char": -0.776872798374721, "num_chars": 70}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 636, "native_id": 8043, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 61.9832878112793, "incorrect_loss_raw": 89.51592254638672, "correct_loss_per_char": 0.48049060318821163, "incorrect_loss_per_char": 0.6484438663426774, "correct_loss_per_token": 2.295677326343678, "incorrect_loss_per_token": 2.732941434787387, "correct_loss_uncond": -17.249637603759766, "incorrect_loss_uncond": -15.416343688964844}, "model_output": [{"sum_logits": -72.58720397949219, "num_tokens": 29, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -90.10415649414062, "logits_per_token": -2.5030070337755927, "logits_per_char": -0.5806976318359375, "num_chars": 125}, {"sum_logits": -119.57307434082031, "num_tokens": 37, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -131.93389892578125, "logits_per_token": -3.2317047119140625, "logits_per_char": -0.7815233617047079, "num_chars": 153}, {"sum_logits": -61.9832878112793, "num_tokens": 27, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -79.23292541503906, "logits_per_token": -2.295677326343678, "logits_per_char": -0.48049060318821163, "num_chars": 129}, {"sum_logits": -76.38748931884766, "num_tokens": 31, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -92.75874328613281, "logits_per_token": -2.4641125586725052, "logits_per_char": -0.5831106054873867, "num_chars": 131}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 637, "native_id": 40211, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 32.45195770263672, "incorrect_loss_raw": 55.99530283610026, "correct_loss_per_char": 0.9271987915039063, "incorrect_loss_per_char": 0.8774356090193375, "correct_loss_per_token": 4.05649471282959, "incorrect_loss_per_token": 4.065033394949777, "correct_loss_uncond": -10.744239807128906, "incorrect_loss_uncond": -17.038731892903645}, "model_output": [{"sum_logits": -32.45195770263672, "num_tokens": 8, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -43.196197509765625, "logits_per_token": -4.05649471282959, "logits_per_char": -0.9271987915039063, "num_chars": 35}, {"sum_logits": -37.96076965332031, "num_tokens": 7, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -44.265830993652344, "logits_per_token": -5.422967093331473, "logits_per_char": -1.054465823703342, "num_chars": 36}, {"sum_logits": -107.76548767089844, "num_tokens": 30, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -140.20855712890625, "logits_per_token": -3.592182922363281, "logits_per_char": -0.8102668245932213, "num_chars": 133}, {"sum_logits": -22.25965118408203, "num_tokens": 7, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -34.627716064453125, "logits_per_token": -3.179950169154576, "logits_per_char": -0.7675741787614494, "num_chars": 29}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 638, "native_id": 26570, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 124.3696060180664, "incorrect_loss_raw": 111.2825101216634, "correct_loss_per_char": 0.5055675041384813, "incorrect_loss_per_char": 0.5224528438489898, "correct_loss_per_token": 2.763769022623698, "incorrect_loss_per_token": 2.609767014639718, "correct_loss_uncond": -8.763664245605469, "incorrect_loss_uncond": -14.167547861735025}, "model_output": [{"sum_logits": -51.91690444946289, "num_tokens": 28, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -69.98435974121094, "logits_per_token": -1.8541751589093889, "logits_per_char": -0.4186847133021201, "num_chars": 124}, {"sum_logits": -85.91118621826172, "num_tokens": 30, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -105.93521118164062, "logits_per_token": -2.8637062072753907, "logits_per_char": -0.5303159643102575, "num_chars": 162}, {"sum_logits": -196.01943969726562, "num_tokens": 63, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -200.43060302734375, "logits_per_token": -3.111419677734375, "logits_per_char": -0.6183578539345919, "num_chars": 317}, {"sum_logits": -124.3696060180664, "num_tokens": 45, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -133.13327026367188, "logits_per_token": -2.763769022623698, "logits_per_char": -0.5055675041384813, "num_chars": 246}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 639, "native_id": 13919, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 108.00045776367188, "incorrect_loss_raw": 146.83362325032553, "correct_loss_per_char": 0.6967771468623992, "incorrect_loss_per_char": 0.8354336562322221, "correct_loss_per_token": 3.2727411443536933, "incorrect_loss_per_token": 3.9268991630861443, "correct_loss_uncond": -17.218231201171875, "incorrect_loss_uncond": -18.416829427083332}, "model_output": [{"sum_logits": -163.2505645751953, "num_tokens": 37, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -181.4217071533203, "logits_per_token": -4.412177420951225, "logits_per_char": -1.0015372059827934, "num_chars": 163}, {"sum_logits": -108.00045776367188, "num_tokens": 33, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -125.21868896484375, "logits_per_token": -3.2727411443536933, "logits_per_char": -0.6967771468623992, "num_chars": 155}, {"sum_logits": -121.46372985839844, "num_tokens": 36, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -148.51229858398438, "logits_per_token": -3.373992496066623, "logits_per_char": -0.6392827887284128, "num_chars": 190}, {"sum_logits": -155.7865753173828, "num_tokens": 39, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -165.81735229492188, "logits_per_token": -3.994527572240585, "logits_per_char": -0.8654809739854601, "num_chars": 180}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 640, "native_id": 39258, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 147.0870361328125, "incorrect_loss_raw": 170.29279073079428, "correct_loss_per_char": 0.8126355587448204, "incorrect_loss_per_char": 0.8410598917381966, "correct_loss_per_token": 3.502072288876488, "incorrect_loss_per_token": 4.125955306159126, "correct_loss_uncond": -9.75640869140625, "incorrect_loss_uncond": -17.46380106608073}, "model_output": [{"sum_logits": -196.53897094726562, "num_tokens": 48, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -211.4576416015625, "logits_per_token": -4.0945618947347, "logits_per_char": -0.8155144022708117, "num_chars": 241}, {"sum_logits": -138.76531982421875, "num_tokens": 30, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -159.44607543945312, "logits_per_token": -4.625510660807292, "logits_per_char": -0.9376035123258024, "num_chars": 148}, {"sum_logits": -175.57408142089844, "num_tokens": 48, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -192.36605834960938, "logits_per_token": -3.6577933629353843, "logits_per_char": -0.7700617606179756, "num_chars": 228}, {"sum_logits": -147.0870361328125, "num_tokens": 42, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -156.84344482421875, "logits_per_token": -3.502072288876488, "logits_per_char": -0.8126355587448204, "num_chars": 181}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 641, "native_id": 2218, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 15.766860961914062, "incorrect_loss_raw": 35.179962158203125, "correct_loss_per_char": 0.3427578469981318, "incorrect_loss_per_char": 0.8042549935658055, "correct_loss_per_token": 1.5766860961914062, "incorrect_loss_per_token": 3.721154414787494, "correct_loss_uncond": -29.19322967529297, "incorrect_loss_uncond": -20.85220464070638}, "model_output": [{"sum_logits": -44.225379943847656, "num_tokens": 8, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -55.318145751953125, "logits_per_token": -5.528172492980957, "logits_per_char": -1.2635822841099331, "num_chars": 35}, {"sum_logits": -15.766860961914062, "num_tokens": 10, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -44.96009063720703, "logits_per_token": -1.5766860961914062, "logits_per_char": -0.3427578469981318, "num_chars": 46}, {"sum_logits": -29.67129135131836, "num_tokens": 14, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -59.486629486083984, "logits_per_token": -2.1193779536655972, "logits_per_char": -0.430018715236498, "num_chars": 69}, {"sum_logits": -31.64321517944336, "num_tokens": 9, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -53.291725158691406, "logits_per_token": -3.515912797715929, "logits_per_char": -0.7191639813509855, "num_chars": 44}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 642, "native_id": 11834, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 94.83367156982422, "incorrect_loss_raw": 125.17493184407552, "correct_loss_per_char": 0.5098584493001302, "incorrect_loss_per_char": 0.810545200474261, "correct_loss_per_token": 2.257944561186291, "incorrect_loss_per_token": 3.5068697788570655, "correct_loss_uncond": -16.52855682373047, "incorrect_loss_uncond": -6.3945261637369795}, "model_output": [{"sum_logits": -184.2479248046875, "num_tokens": 46, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -182.48593139648438, "logits_per_token": -4.005389669667119, "logits_per_char": -0.9076252453432881, "num_chars": 203}, {"sum_logits": -68.03846740722656, "num_tokens": 19, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -74.87808227539062, "logits_per_token": -3.580971968801398, "logits_per_char": -0.8952429922003495, "num_chars": 76}, {"sum_logits": -94.83367156982422, "num_tokens": 42, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -111.36222839355469, "logits_per_token": -2.257944561186291, "logits_per_char": -0.5098584493001302, "num_chars": 186}, {"sum_logits": -123.2384033203125, "num_tokens": 42, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -137.3443603515625, "logits_per_token": -2.9342476981026784, "logits_per_char": -0.6287673638791454, "num_chars": 196}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 643, "native_id": 15645, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 91.23052215576172, "incorrect_loss_raw": 111.24679565429688, "correct_loss_per_char": 0.5243133457227686, "incorrect_loss_per_char": 0.5544326468646456, "correct_loss_per_token": 2.46568978799356, "incorrect_loss_per_token": 2.3663529199267193, "correct_loss_uncond": -25.665924072265625, "incorrect_loss_uncond": -24.220815022786457}, "model_output": [{"sum_logits": -80.66120147705078, "num_tokens": 42, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -103.5565185546875, "logits_per_token": -1.9205047970726377, "logits_per_char": -0.44811778598361546, "num_chars": 180}, {"sum_logits": -112.71488189697266, "num_tokens": 50, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -136.694091796875, "logits_per_token": -2.254297637939453, "logits_per_char": -0.5995472441328332, "num_chars": 188}, {"sum_logits": -91.23052215576172, "num_tokens": 37, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -116.89644622802734, "logits_per_token": -2.46568978799356, "logits_per_char": -0.5243133457227686, "num_chars": 174}, {"sum_logits": -140.3643035888672, "num_tokens": 48, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -166.1522216796875, "logits_per_token": -2.9242563247680664, "logits_per_char": -0.6156329104774877, "num_chars": 228}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 644, "native_id": 48190, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 108.68696594238281, "incorrect_loss_raw": 96.43302917480469, "correct_loss_per_char": 0.48091577850611866, "incorrect_loss_per_char": 0.5208134150185512, "correct_loss_per_token": 2.2643117904663086, "incorrect_loss_per_token": 2.373329074374391, "correct_loss_uncond": -34.80912780761719, "incorrect_loss_uncond": -22.2520751953125}, "model_output": [{"sum_logits": -70.90438842773438, "num_tokens": 36, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -106.72174072265625, "logits_per_token": -1.9695663452148438, "logits_per_char": -0.4403999281225738, "num_chars": 161}, {"sum_logits": -46.04783630371094, "num_tokens": 26, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -63.34382629394531, "logits_per_token": -1.7710706270658052, "logits_per_char": -0.4039283886290433, "num_chars": 114}, {"sum_logits": -172.34686279296875, "num_tokens": 51, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -185.98974609375, "logits_per_token": -3.3793502508425246, "logits_per_char": -0.7181119283040365, "num_chars": 240}, {"sum_logits": -108.68696594238281, "num_tokens": 48, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -143.49609375, "logits_per_token": -2.2643117904663086, "logits_per_char": -0.48091577850611866, "num_chars": 226}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 645, "native_id": 46472, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 31.635761260986328, "incorrect_loss_raw": 61.238450368245445, "correct_loss_per_char": 0.39056495383933737, "incorrect_loss_per_char": 0.599681592137007, "correct_loss_per_token": 1.6650400663677014, "incorrect_loss_per_token": 2.681825827471803, "correct_loss_uncond": -53.129817962646484, "incorrect_loss_uncond": -38.792441050211586}, "model_output": [{"sum_logits": -64.51542663574219, "num_tokens": 29, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -111.82843780517578, "logits_per_token": -2.22466988399111, "logits_per_char": -0.4962725125826322, "num_chars": 130}, {"sum_logits": -56.09967041015625, "num_tokens": 19, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -94.8175048828125, "logits_per_token": -2.9526142321134867, "logits_per_char": -0.6841423220750762, "num_chars": 82}, {"sum_logits": -31.635761260986328, "num_tokens": 19, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -84.76557922363281, "logits_per_token": -1.6650400663677014, "logits_per_char": -0.39056495383933737, "num_chars": 81}, {"sum_logits": -63.10025405883789, "num_tokens": 22, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -93.44673156738281, "logits_per_token": -2.868193366310813, "logits_per_char": -0.6186299417533127, "num_chars": 102}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 646, "native_id": 7469, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 20.195253372192383, "incorrect_loss_raw": 46.92068672180176, "correct_loss_per_char": 0.4487834082709418, "incorrect_loss_per_char": 0.6515323009053346, "correct_loss_per_token": 2.243917041354709, "incorrect_loss_per_token": 2.7610014458858605, "correct_loss_uncond": -37.66800117492676, "incorrect_loss_uncond": -29.65363121032715}, "model_output": [{"sum_logits": -20.195253372192383, "num_tokens": 9, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -57.86325454711914, "logits_per_token": -2.243917041354709, "logits_per_char": -0.4487834082709418, "num_chars": 45}, {"sum_logits": -23.469621658325195, "num_tokens": 11, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -46.969181060791016, "logits_per_token": -2.1336019689386543, "logits_per_char": -0.5334004922346636, "num_chars": 44}, {"sum_logits": -88.65895080566406, "num_tokens": 25, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -129.62416076660156, "logits_per_token": -3.5463580322265624, "logits_per_char": -0.7987292865375141, "num_chars": 111}, {"sum_logits": -28.633487701416016, "num_tokens": 11, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -53.12961196899414, "logits_per_token": -2.603044336492365, "logits_per_char": -0.6224671239438264, "num_chars": 46}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 647, "native_id": 13434, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 82.64385223388672, "incorrect_loss_raw": 99.24383163452148, "correct_loss_per_char": 0.547310279694614, "incorrect_loss_per_char": 0.6430753776476676, "correct_loss_per_token": 2.7547950744628906, "incorrect_loss_per_token": 3.1009217973722207, "correct_loss_uncond": -17.538124084472656, "incorrect_loss_uncond": -8.921736399332682}, "model_output": [{"sum_logits": -112.61998748779297, "num_tokens": 43, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -130.8143310546875, "logits_per_token": -2.6190694764603015, "logits_per_char": -0.5575246905336285, "num_chars": 202}, {"sum_logits": -82.64385223388672, "num_tokens": 30, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -100.18197631835938, "logits_per_token": -2.7547950744628906, "logits_per_char": -0.547310279694614, "num_chars": 151}, {"sum_logits": -51.30001449584961, "num_tokens": 15, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -62.0379638671875, "logits_per_token": -3.4200009663899738, "logits_per_char": -0.7434784709543422, "num_chars": 69}, {"sum_logits": -133.81149291992188, "num_tokens": 41, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -131.6444091796875, "logits_per_token": -3.2636949492663874, "logits_per_char": -0.6282229714550323, "num_chars": 213}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 648, "native_id": 49729, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 100.3329086303711, "incorrect_loss_raw": 82.79822031656902, "correct_loss_per_char": 0.49182798348221124, "incorrect_loss_per_char": 0.571688595697379, "correct_loss_per_token": 2.5083227157592773, "incorrect_loss_per_token": 2.362350991296091, "correct_loss_uncond": -15.778244018554688, "incorrect_loss_uncond": -23.94104512532552}, "model_output": [{"sum_logits": -92.69676971435547, "num_tokens": 38, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -121.57047271728516, "logits_per_token": -2.439388676693565, "logits_per_char": -0.5686918387383771, "num_chars": 163}, {"sum_logits": -103.02587890625, "num_tokens": 42, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -117.63279724121094, "logits_per_token": -2.4529971168154763, "logits_per_char": -0.6399122913431677, "num_chars": 161}, {"sum_logits": -100.3329086303711, "num_tokens": 40, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -116.11115264892578, "logits_per_token": -2.5083227157592773, "logits_per_char": -0.49182798348221124, "num_chars": 204}, {"sum_logits": -52.67201232910156, "num_tokens": 24, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -81.0145263671875, "logits_per_token": -2.194667180379232, "logits_per_char": -0.506461657010592, "num_chars": 104}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 649, "native_id": 36403, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 118.61341857910156, "incorrect_loss_raw": 121.74711354573567, "correct_loss_per_char": 0.6114093741190803, "incorrect_loss_per_char": 0.9009956779143319, "correct_loss_per_token": 2.758451594862827, "incorrect_loss_per_token": 3.8247605170403323, "correct_loss_uncond": -24.787307739257812, "incorrect_loss_uncond": -19.412213643391926}, "model_output": [{"sum_logits": -118.61341857910156, "num_tokens": 43, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -143.40072631835938, "logits_per_token": -2.758451594862827, "logits_per_char": -0.6114093741190803, "num_chars": 194}, {"sum_logits": -73.76042938232422, "num_tokens": 22, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -93.94868469238281, "logits_per_token": -3.3527467901056465, "logits_per_char": -0.7931228965841314, "num_chars": 93}, {"sum_logits": -152.5567169189453, "num_tokens": 39, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -169.4371337890625, "logits_per_token": -3.911710690229367, "logits_per_char": -0.9245861631451231, "num_chars": 165}, {"sum_logits": -138.9241943359375, "num_tokens": 33, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -160.0921630859375, "logits_per_token": -4.209824070785984, "logits_per_char": -0.9852779740137412, "num_chars": 141}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 650, "native_id": 4122, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 19.85125732421875, "incorrect_loss_raw": 41.8978640238444, "correct_loss_per_char": 0.397025146484375, "incorrect_loss_per_char": 0.7482371194607073, "correct_loss_per_token": 1.8046597567471592, "incorrect_loss_per_token": 3.535070696744052, "correct_loss_uncond": -26.18334197998047, "incorrect_loss_uncond": -21.622706095377605}, "model_output": [{"sum_logits": -58.8729362487793, "num_tokens": 14, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -85.77835083007812, "logits_per_token": -4.205209732055664, "logits_per_char": -0.8920141855875651, "num_chars": 66}, {"sum_logits": -31.026954650878906, "num_tokens": 11, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -53.306766510009766, "logits_per_token": -2.8206322409889917, "logits_per_char": -0.5745732342755353, "num_chars": 54}, {"sum_logits": -19.85125732421875, "num_tokens": 11, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -46.03459930419922, "logits_per_token": -1.8046597567471592, "logits_per_char": -0.397025146484375, "num_chars": 50}, {"sum_logits": -35.793701171875, "num_tokens": 10, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -51.476593017578125, "logits_per_token": -3.5793701171875, "logits_per_char": -0.7781239385190217, "num_chars": 46}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 651, "native_id": 27559, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 69.4500503540039, "incorrect_loss_raw": 153.0204620361328, "correct_loss_per_char": 0.4037793625232785, "incorrect_loss_per_char": 0.6527189508858694, "correct_loss_per_token": 1.8276329040527344, "incorrect_loss_per_token": 3.047899648498229, "correct_loss_uncond": -36.61768341064453, "incorrect_loss_uncond": -28.87835693359375}, "model_output": [{"sum_logits": -116.71493530273438, "num_tokens": 45, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -135.15708923339844, "logits_per_token": -2.5936652289496527, "logits_per_char": -0.6274996521652386, "num_chars": 186}, {"sum_logits": -146.2274627685547, "num_tokens": 49, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -185.5834503173828, "logits_per_token": -2.9842339340521367, "logits_per_char": -0.5734410304649203, "num_chars": 255}, {"sum_logits": -69.4500503540039, "num_tokens": 38, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -106.06773376464844, "logits_per_token": -1.8276329040527344, "logits_per_char": -0.4037793625232785, "num_chars": 172}, {"sum_logits": -196.11898803710938, "num_tokens": 55, "num_tokens_all": 499, "is_greedy": false, "sum_logits_uncond": -224.95591735839844, "logits_per_token": -3.5657997824928978, "logits_per_char": -0.7572161700274493, "num_chars": 259}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 652, "native_id": 40475, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 70.17686462402344, "incorrect_loss_raw": 41.21406173706055, "correct_loss_per_char": 0.8066306278623384, "incorrect_loss_per_char": 0.6500046782341707, "correct_loss_per_token": 3.6935191907380758, "incorrect_loss_per_token": 2.89950670514788, "correct_loss_uncond": -16.3724365234375, "incorrect_loss_uncond": -16.98053741455078}, "model_output": [{"sum_logits": -20.95620346069336, "num_tokens": 7, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -29.517276763916016, "logits_per_token": -2.993743351527623, "logits_per_char": -0.5821167627970377, "num_chars": 36}, {"sum_logits": -70.17686462402344, "num_tokens": 19, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -86.54930114746094, "logits_per_token": -3.6935191907380758, "logits_per_char": -0.8066306278623384, "num_chars": 87}, {"sum_logits": -54.675621032714844, "num_tokens": 18, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -74.51980590820312, "logits_per_token": -3.0375345018174915, "logits_per_char": -0.8160540452644006, "num_chars": 67}, {"sum_logits": -48.01036071777344, "num_tokens": 18, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -70.54671478271484, "logits_per_token": -2.667242262098524, "logits_per_char": -0.551843226641074, "num_chars": 87}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 653, "native_id": 28807, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 94.33074188232422, "incorrect_loss_raw": 130.6579386393229, "correct_loss_per_char": 0.5329420445329052, "incorrect_loss_per_char": 0.6139909305141616, "correct_loss_per_token": 2.695164053780692, "incorrect_loss_per_token": 3.09830130376395, "correct_loss_uncond": -18.50450897216797, "incorrect_loss_uncond": -20.491864522298176}, "model_output": [{"sum_logits": -104.35282897949219, "num_tokens": 37, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -121.64969635009766, "logits_per_token": -2.8203467291754647, "logits_per_char": -0.5640693458350929, "num_chars": 185}, {"sum_logits": -128.01527404785156, "num_tokens": 39, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -150.40054321289062, "logits_per_token": -3.2824429243038864, "logits_per_char": -0.6845736580099014, "num_chars": 187}, {"sum_logits": -94.33074188232422, "num_tokens": 35, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -112.83525085449219, "logits_per_token": -2.695164053780692, "logits_per_char": -0.5329420445329052, "num_chars": 177}, {"sum_logits": -159.605712890625, "num_tokens": 50, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -181.399169921875, "logits_per_token": -3.1921142578125, "logits_per_char": -0.5933297876974907, "num_chars": 269}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 654, "native_id": 17240, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 82.90359497070312, "incorrect_loss_raw": 139.81971740722656, "correct_loss_per_char": 0.4409765689931017, "incorrect_loss_per_char": 0.716500690536703, "correct_loss_per_token": 2.763453165690104, "incorrect_loss_per_token": 3.4349161120428557, "correct_loss_uncond": -13.420509338378906, "incorrect_loss_uncond": -7.963155110677083}, "model_output": [{"sum_logits": -162.91355895996094, "num_tokens": 45, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -168.61790466308594, "logits_per_token": -3.6203013102213544, "logits_per_char": -0.7176808764755989, "num_chars": 227}, {"sum_logits": -82.90359497070312, "num_tokens": 30, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -96.32410430908203, "logits_per_token": -2.763453165690104, "logits_per_char": -0.4409765689931017, "num_chars": 188}, {"sum_logits": -77.46885681152344, "num_tokens": 23, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -85.3797607421875, "logits_per_token": -3.36821116571841, "logits_per_char": -0.7240080075843311, "num_chars": 107}, {"sum_logits": -179.0767364501953, "num_tokens": 54, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -189.3509521484375, "logits_per_token": -3.3162358601888022, "logits_per_char": -0.7078131875501791, "num_chars": 253}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 655, "native_id": 11215, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 64.19873046875, "incorrect_loss_raw": 89.38120778401692, "correct_loss_per_char": 0.34702016469594593, "incorrect_loss_per_char": 0.541464615355821, "correct_loss_per_token": 1.5658226943597562, "incorrect_loss_per_token": 2.4458464628730723, "correct_loss_uncond": -16.348892211914062, "incorrect_loss_uncond": -12.577117919921875}, "model_output": [{"sum_logits": -81.78736114501953, "num_tokens": 30, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -100.33868408203125, "logits_per_token": -2.726245371500651, "logits_per_char": -0.5841954367501395, "num_chars": 140}, {"sum_logits": -64.19873046875, "num_tokens": 41, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -80.54762268066406, "logits_per_token": -1.5658226943597562, "logits_per_char": -0.34702016469594593, "num_chars": 185}, {"sum_logits": -113.3603515625, "num_tokens": 46, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -124.11725616455078, "logits_per_token": -2.46435546875, "logits_per_char": -0.55029296875, "num_chars": 206}, {"sum_logits": -72.99591064453125, "num_tokens": 34, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -81.41903686523438, "logits_per_token": -2.146938548368566, "logits_per_char": -0.4899054405673238, "num_chars": 149}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 656, "native_id": 22971, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 20.401859283447266, "incorrect_loss_raw": 28.977816899617512, "correct_loss_per_char": 0.728637831551688, "incorrect_loss_per_char": 0.6427712729372262, "correct_loss_per_token": 2.914551326206752, "incorrect_loss_per_token": 2.8880675446753408, "correct_loss_uncond": -20.518024444580078, "incorrect_loss_uncond": -17.949358622233074}, "model_output": [{"sum_logits": -20.401859283447266, "num_tokens": 7, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -40.919883728027344, "logits_per_token": -2.914551326206752, "logits_per_char": -0.728637831551688, "num_chars": 28}, {"sum_logits": -14.933664321899414, "num_tokens": 6, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -26.790842056274414, "logits_per_token": -2.4889440536499023, "logits_per_char": -0.574371704688439, "num_chars": 26}, {"sum_logits": -42.68457794189453, "num_tokens": 17, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -67.13526153564453, "logits_per_token": -2.5108575259937957, "logits_per_char": -0.5616391834459806, "num_chars": 76}, {"sum_logits": -29.315208435058594, "num_tokens": 8, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -46.85542297363281, "logits_per_token": -3.664401054382324, "logits_per_char": -0.7923029306772593, "num_chars": 37}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 657, "native_id": 18992, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 29.05364227294922, "incorrect_loss_raw": 28.734636942545574, "correct_loss_per_char": 0.46116892496744794, "incorrect_loss_per_char": 0.5079101319001796, "correct_loss_per_token": 2.0752601623535156, "incorrect_loss_per_token": 2.265531095832285, "correct_loss_uncond": -18.000999450683594, "incorrect_loss_uncond": -42.140846252441406}, "model_output": [{"sum_logits": -26.571699142456055, "num_tokens": 11, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -74.69395446777344, "logits_per_token": -2.4156090129505503, "logits_per_char": -0.5210137086756089, "num_chars": 51}, {"sum_logits": -24.330209732055664, "num_tokens": 12, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -77.07283020019531, "logits_per_token": -2.027517477671305, "logits_per_char": -0.44236744967373937, "num_chars": 55}, {"sum_logits": -29.05364227294922, "num_tokens": 14, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -47.05464172363281, "logits_per_token": -2.0752601623535156, "logits_per_char": -0.46116892496744794, "num_chars": 63}, {"sum_logits": -35.302001953125, "num_tokens": 15, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -60.85966491699219, "logits_per_token": -2.353466796875, "logits_per_char": -0.5603492373511905, "num_chars": 63}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 658, "native_id": 45951, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 71.32181549072266, "incorrect_loss_raw": 116.38572947184245, "correct_loss_per_char": 0.5131065862641918, "incorrect_loss_per_char": 0.7148639782851283, "correct_loss_per_token": 2.377393849690755, "incorrect_loss_per_token": 2.766897152761093, "correct_loss_uncond": -27.739051818847656, "incorrect_loss_uncond": -24.45855458577474}, "model_output": [{"sum_logits": -71.32181549072266, "num_tokens": 30, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -99.06086730957031, "logits_per_token": -2.377393849690755, "logits_per_char": -0.5131065862641918, "num_chars": 139}, {"sum_logits": -119.7481460571289, "num_tokens": 54, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -152.45501708984375, "logits_per_token": -2.217558260317202, "logits_per_char": -0.567526758564592, "num_chars": 211}, {"sum_logits": -141.79515075683594, "num_tokens": 48, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -148.8548126220703, "logits_per_token": -2.9540656407674155, "logits_per_char": -0.7877508375379775, "num_chars": 180}, {"sum_logits": -87.6138916015625, "num_tokens": 28, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -121.2230224609375, "logits_per_token": -3.1290675571986606, "logits_per_char": -0.7893143387528153, "num_chars": 111}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 659, "native_id": 26809, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 81.83981323242188, "incorrect_loss_raw": 102.62074279785156, "correct_loss_per_char": 0.41333239006273675, "incorrect_loss_per_char": 0.5896860268971912, "correct_loss_per_token": 2.0984567495492787, "incorrect_loss_per_token": 2.8341739299419046, "correct_loss_uncond": -23.950424194335938, "incorrect_loss_uncond": -22.098241170247395}, "model_output": [{"sum_logits": -121.21623229980469, "num_tokens": 49, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -153.52969360351562, "logits_per_token": -2.4738006591796875, "logits_per_char": -0.561186260647244, "num_chars": 216}, {"sum_logits": -81.83981323242188, "num_tokens": 39, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -105.79023742675781, "logits_per_token": -2.0984567495492787, "logits_per_char": -0.41333239006273675, "num_chars": 198}, {"sum_logits": -67.23080444335938, "num_tokens": 24, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -85.53936767578125, "logits_per_token": -2.801283518473307, "logits_per_char": -0.5293764129398376, "num_chars": 127}, {"sum_logits": -119.41519165039062, "num_tokens": 37, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -135.087890625, "logits_per_token": -3.2274376121727197, "logits_per_char": -0.6784954071044922, "num_chars": 176}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 660, "native_id": 45031, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 76.29443359375, "incorrect_loss_raw": 73.60219065348308, "correct_loss_per_char": 0.6305325090392562, "incorrect_loss_per_char": 0.6382305962152556, "correct_loss_per_token": 2.6308425377155173, "incorrect_loss_per_token": 2.564465422299875, "correct_loss_uncond": -27.304489135742188, "incorrect_loss_uncond": -18.86573028564453}, "model_output": [{"sum_logits": -76.29443359375, "num_tokens": 29, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -103.59892272949219, "logits_per_token": -2.6308425377155173, "logits_per_char": -0.6305325090392562, "num_chars": 121}, {"sum_logits": -56.63438415527344, "num_tokens": 23, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -69.67864990234375, "logits_per_token": -2.4623645284901494, "logits_per_char": -0.6363413950030723, "num_chars": 89}, {"sum_logits": -100.31869506835938, "num_tokens": 34, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -120.00457763671875, "logits_per_token": -2.9505498549517464, "logits_per_char": -0.6871143497832833, "num_chars": 146}, {"sum_logits": -63.853492736816406, "num_tokens": 28, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -87.72053527832031, "logits_per_token": -2.2804818834577287, "logits_per_char": -0.5912360438594112, "num_chars": 108}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 661, "native_id": 16887, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 52.58936309814453, "incorrect_loss_raw": 85.16031392415364, "correct_loss_per_char": 0.4275557975458905, "incorrect_loss_per_char": 0.5931224738079313, "correct_loss_per_token": 1.6964310676820817, "incorrect_loss_per_token": 2.4718697039579283, "correct_loss_uncond": -23.85340118408203, "incorrect_loss_uncond": -25.88121287027995}, "model_output": [{"sum_logits": -96.12671661376953, "num_tokens": 35, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -123.81404113769531, "logits_per_token": -2.7464776175362724, "logits_per_char": -0.6866194043840681, "num_chars": 140}, {"sum_logits": -105.20027160644531, "num_tokens": 36, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -131.401611328125, "logits_per_token": -2.922229766845703, "logits_per_char": -0.641465070771008, "num_chars": 164}, {"sum_logits": -54.153953552246094, "num_tokens": 31, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -77.90892791748047, "logits_per_token": -1.7469017274918095, "logits_per_char": -0.45128294626871746, "num_chars": 120}, {"sum_logits": -52.58936309814453, "num_tokens": 31, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -76.44276428222656, "logits_per_token": -1.6964310676820817, "logits_per_char": -0.4275557975458905, "num_chars": 123}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 662, "native_id": 39915, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 69.85971069335938, "incorrect_loss_raw": 73.89771525065105, "correct_loss_per_char": 0.6528944924613026, "incorrect_loss_per_char": 0.5768159633106273, "correct_loss_per_token": 2.253539054624496, "incorrect_loss_per_token": 2.706119294783541, "correct_loss_uncond": -22.251731872558594, "incorrect_loss_uncond": -14.287485758463541}, "model_output": [{"sum_logits": -77.83328247070312, "num_tokens": 27, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -100.11929321289062, "logits_per_token": -2.8827141655815973, "logits_per_char": -0.5852126501556626, "num_chars": 133}, {"sum_logits": -77.20381164550781, "num_tokens": 35, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -86.44488525390625, "logits_per_token": -2.205823189871652, "logits_per_char": -0.498089107390373, "num_chars": 155}, {"sum_logits": -69.85971069335938, "num_tokens": 31, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -92.11144256591797, "logits_per_token": -2.253539054624496, "logits_per_char": -0.6528944924613026, "num_chars": 107}, {"sum_logits": -66.65605163574219, "num_tokens": 22, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -77.99142456054688, "logits_per_token": -3.029820528897372, "logits_per_char": -0.6471461323858465, "num_chars": 103}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 663, "native_id": 49114, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 71.28311157226562, "incorrect_loss_raw": 103.09156036376953, "correct_loss_per_char": 0.389525199848446, "incorrect_loss_per_char": 0.5778011181272177, "correct_loss_per_token": 1.8758713571648848, "incorrect_loss_per_token": 2.7385688714813767, "correct_loss_uncond": -27.353302001953125, "incorrect_loss_uncond": -23.578684488932293}, "model_output": [{"sum_logits": -105.78282165527344, "num_tokens": 35, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -122.24869537353516, "logits_per_token": -3.0223663330078123, "logits_per_char": -0.6114613968512915, "num_chars": 173}, {"sum_logits": -122.89857482910156, "num_tokens": 40, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -151.9144744873047, "logits_per_token": -3.072464370727539, "logits_per_char": -0.6334978083974308, "num_chars": 194}, {"sum_logits": -71.28311157226562, "num_tokens": 38, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -98.63641357421875, "logits_per_token": -1.8758713571648848, "logits_per_char": -0.389525199848446, "num_chars": 183}, {"sum_logits": -80.5932846069336, "num_tokens": 38, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -105.84756469726562, "logits_per_token": -2.120875910708779, "logits_per_char": -0.4884441491329309, "num_chars": 165}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 664, "native_id": 25436, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 77.19827270507812, "incorrect_loss_raw": 135.99392700195312, "correct_loss_per_char": 0.4595135280064174, "incorrect_loss_per_char": 0.6994028799911547, "correct_loss_per_token": 2.086439802839949, "incorrect_loss_per_token": 3.242330893492087, "correct_loss_uncond": -13.920867919921875, "incorrect_loss_uncond": -13.235321044921875}, "model_output": [{"sum_logits": -213.296630859375, "num_tokens": 66, "num_tokens_all": 494, "is_greedy": false, "sum_logits_uncond": -226.11508178710938, "logits_per_token": -3.2317671342329546, "logits_per_char": -0.7756241122159091, "num_chars": 275}, {"sum_logits": -121.67234802246094, "num_tokens": 33, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -132.7995147705078, "logits_per_token": -3.687040849165483, "logits_per_char": -0.7652348932230247, "num_chars": 159}, {"sum_logits": -77.19827270507812, "num_tokens": 37, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -91.119140625, "logits_per_token": -2.086439802839949, "logits_per_char": -0.4595135280064174, "num_chars": 168}, {"sum_logits": -73.01280212402344, "num_tokens": 26, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -88.77314758300781, "logits_per_token": -2.8081846970778246, "logits_per_char": -0.55734963453453, "num_chars": 131}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 665, "native_id": 29683, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 85.53428649902344, "incorrect_loss_raw": 89.40991719563802, "correct_loss_per_char": 0.6383155708882347, "incorrect_loss_per_char": 0.598601149343377, "correct_loss_per_token": 2.949458155138739, "incorrect_loss_per_token": 2.538934472768428, "correct_loss_uncond": -28.028518676757812, "incorrect_loss_uncond": -23.99853006998698}, "model_output": [{"sum_logits": -90.73912048339844, "num_tokens": 38, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -126.50454711914062, "logits_per_token": -2.38787159166838, "logits_per_char": -0.5566817207570456, "num_chars": 163}, {"sum_logits": -91.11271667480469, "num_tokens": 33, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -105.1194076538086, "logits_per_token": -2.760991414388021, "logits_per_char": -0.6554871703223358, "num_chars": 139}, {"sum_logits": -86.37791442871094, "num_tokens": 35, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -108.60138702392578, "logits_per_token": -2.467940412248884, "logits_per_char": -0.5836345569507496, "num_chars": 148}, {"sum_logits": -85.53428649902344, "num_tokens": 29, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -113.56280517578125, "logits_per_token": -2.949458155138739, "logits_per_char": -0.6383155708882347, "num_chars": 134}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 666, "native_id": 39516, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 120.15271759033203, "incorrect_loss_raw": 146.9769032796224, "correct_loss_per_char": 0.5316491928775754, "incorrect_loss_per_char": 0.5788711018495046, "correct_loss_per_token": 2.6700603908962672, "incorrect_loss_per_token": 2.495790071957441, "correct_loss_uncond": -30.79694366455078, "incorrect_loss_uncond": -33.241902669270836}, "model_output": [{"sum_logits": -115.43516540527344, "num_tokens": 50, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -151.8155059814453, "logits_per_token": -2.3087033081054686, "logits_per_char": -0.544505497194686, "num_chars": 212}, {"sum_logits": -128.00070190429688, "num_tokens": 51, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -157.72430419921875, "logits_per_token": -2.509817684397978, "logits_per_char": -0.6095271519252232, "num_chars": 210}, {"sum_logits": -197.49484252929688, "num_tokens": 74, "num_tokens_all": 505, "is_greedy": false, "sum_logits_uncond": -231.11660766601562, "logits_per_token": -2.6688492233688765, "logits_per_char": -0.5825806564286043, "num_chars": 339}, {"sum_logits": -120.15271759033203, "num_tokens": 45, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -150.9496612548828, "logits_per_token": -2.6700603908962672, "logits_per_char": -0.5316491928775754, "num_chars": 226}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 667, "native_id": 45796, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 11.116776466369629, "incorrect_loss_raw": 43.865492502848305, "correct_loss_per_char": 0.30045341800999, "incorrect_loss_per_char": 0.5426195107949133, "correct_loss_per_token": 1.010616042397239, "incorrect_loss_per_token": 2.323709721284754, "correct_loss_uncond": -24.176928520202637, "incorrect_loss_uncond": -26.79709243774414}, "model_output": [{"sum_logits": -64.15170288085938, "num_tokens": 25, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -93.16140747070312, "logits_per_token": -2.566068115234375, "logits_per_char": -0.661357761658344, "num_chars": 97}, {"sum_logits": -34.725894927978516, "num_tokens": 14, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -60.57027053833008, "logits_per_token": -2.4804210662841797, "logits_per_char": -0.5182969392235599, "num_chars": 67}, {"sum_logits": -11.116776466369629, "num_tokens": 11, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -35.293704986572266, "logits_per_token": -1.010616042397239, "logits_per_char": -0.30045341800999, "num_chars": 37}, {"sum_logits": -32.71887969970703, "num_tokens": 17, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -58.25607681274414, "logits_per_token": -1.9246399823357077, "logits_per_char": -0.44820383150283605, "num_chars": 73}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 668, "native_id": 25142, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 87.5489501953125, "incorrect_loss_raw": 120.20753733317058, "correct_loss_per_char": 0.5305996981534091, "incorrect_loss_per_char": 0.5826898366546444, "correct_loss_per_token": 2.303919741981908, "incorrect_loss_per_token": 2.7671023468488354, "correct_loss_uncond": -21.10942840576172, "incorrect_loss_uncond": -19.218699137369793}, "model_output": [{"sum_logits": -146.80442810058594, "num_tokens": 51, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -154.42333984375, "logits_per_token": -2.878518198050705, "logits_per_char": -0.643879070616605, "num_chars": 228}, {"sum_logits": -147.6774139404297, "num_tokens": 47, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -165.53823852539062, "logits_per_token": -3.142072637030419, "logits_per_char": -0.6448795368577716, "num_chars": 229}, {"sum_logits": -87.5489501953125, "num_tokens": 38, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -108.65837860107422, "logits_per_token": -2.303919741981908, "logits_per_char": -0.5305996981534091, "num_chars": 165}, {"sum_logits": -66.1407699584961, "num_tokens": 29, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -98.31713104248047, "logits_per_token": -2.2807162054653825, "logits_per_char": -0.4593109024895562, "num_chars": 144}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 669, "native_id": 27000, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 36.5777587890625, "incorrect_loss_raw": 40.43513298034668, "correct_loss_per_char": 0.6531742640904018, "incorrect_loss_per_char": 0.8279317316382823, "correct_loss_per_token": 2.8136737530048075, "incorrect_loss_per_token": 3.7705928568254437, "correct_loss_uncond": -26.62470245361328, "incorrect_loss_uncond": -18.237860997517902}, "model_output": [{"sum_logits": -36.5777587890625, "num_tokens": 13, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -63.20246124267578, "logits_per_token": -2.8136737530048075, "logits_per_char": -0.6531742640904018, "num_chars": 56}, {"sum_logits": -33.01017379760742, "num_tokens": 9, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -56.82838439941406, "logits_per_token": -3.667797088623047, "logits_per_char": -0.6001849781383167, "num_chars": 55}, {"sum_logits": -62.01502990722656, "num_tokens": 19, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -80.84503173828125, "logits_per_token": -3.2639489424856087, "logits_per_char": -0.6890558878580729, "num_chars": 90}, {"sum_logits": -26.280195236206055, "num_tokens": 6, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -38.34556579589844, "logits_per_token": -4.380032539367676, "logits_per_char": -1.194554328918457, "num_chars": 22}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 670, "native_id": 43311, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 117.20635223388672, "incorrect_loss_raw": 129.48839569091797, "correct_loss_per_char": 0.6584626529993636, "incorrect_loss_per_char": 0.7609607158853624, "correct_loss_per_token": 3.005291082920172, "incorrect_loss_per_token": 3.2419278556608084, "correct_loss_uncond": -12.084205627441406, "incorrect_loss_uncond": -10.955866495768229}, "model_output": [{"sum_logits": -71.2371826171875, "num_tokens": 27, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -86.55116271972656, "logits_per_token": -2.6384141710069446, "logits_per_char": -0.6720488926149765, "num_chars": 106}, {"sum_logits": -206.1853790283203, "num_tokens": 57, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -215.49514770507812, "logits_per_token": -3.6172873513740407, "logits_per_char": -0.845022045198034, "num_chars": 244}, {"sum_logits": -117.20635223388672, "num_tokens": 39, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -129.29055786132812, "logits_per_token": -3.005291082920172, "logits_per_char": -0.6584626529993636, "num_chars": 178}, {"sum_logits": -111.0426254272461, "num_tokens": 32, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -119.2864761352539, "logits_per_token": -3.4700820446014404, "logits_per_char": -0.7658112098430765, "num_chars": 145}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 671, "native_id": 38608, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 54.42659378051758, "incorrect_loss_raw": 83.76083628336589, "correct_loss_per_char": 0.4535549481709798, "incorrect_loss_per_char": 0.5534898690721745, "correct_loss_per_token": 1.8142197926839192, "incorrect_loss_per_token": 2.2795003599688592, "correct_loss_uncond": -24.48678970336914, "incorrect_loss_uncond": -19.760223388671875}, "model_output": [{"sum_logits": -79.64569091796875, "num_tokens": 34, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -102.87307739257812, "logits_per_token": -2.342520321116728, "logits_per_char": -0.5418074212106717, "num_chars": 147}, {"sum_logits": -49.990211486816406, "num_tokens": 27, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -68.68408966064453, "logits_per_token": -1.8514893143265336, "logits_per_char": -0.471605768743551, "num_chars": 106}, {"sum_logits": -54.42659378051758, "num_tokens": 30, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -78.91338348388672, "logits_per_token": -1.8142197926839192, "logits_per_char": -0.4535549481709798, "num_chars": 120}, {"sum_logits": -121.6466064453125, "num_tokens": 46, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -139.00601196289062, "logits_per_token": -2.6444914444633154, "logits_per_char": -0.6470564172623006, "num_chars": 188}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 672, "native_id": 16407, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 121.49012756347656, "incorrect_loss_raw": 109.0128657023112, "correct_loss_per_char": 0.499959372689204, "incorrect_loss_per_char": 0.7652850522398316, "correct_loss_per_token": 2.3821593639897367, "incorrect_loss_per_token": 3.4573786323453177, "correct_loss_uncond": -37.4013671875, "incorrect_loss_uncond": -14.083704630533854}, "model_output": [{"sum_logits": -121.49012756347656, "num_tokens": 51, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -158.89149475097656, "logits_per_token": -2.3821593639897367, "logits_per_char": -0.499959372689204, "num_chars": 243}, {"sum_logits": -148.95712280273438, "num_tokens": 36, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -160.7446746826172, "logits_per_token": -4.137697855631511, "logits_per_char": -0.8368377685546875, "num_chars": 178}, {"sum_logits": -80.56500244140625, "num_tokens": 27, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -88.66028594970703, "logits_per_token": -2.9838889793113426, "logits_per_char": -0.7258108328054617, "num_chars": 111}, {"sum_logits": -97.51647186279297, "num_tokens": 30, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -119.88475036621094, "logits_per_token": -3.250549062093099, "logits_per_char": -0.7332065553593456, "num_chars": 133}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 673, "native_id": 47215, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 115.82611846923828, "incorrect_loss_raw": 78.23800150553386, "correct_loss_per_char": 0.4370796923367482, "incorrect_loss_per_char": 0.5698574044978046, "correct_loss_per_token": 1.9631545503260726, "incorrect_loss_per_token": 2.171619368379836, "correct_loss_uncond": -20.67064666748047, "incorrect_loss_uncond": -26.604700724283855}, "model_output": [{"sum_logits": -78.97433471679688, "num_tokens": 33, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -102.71963500976562, "logits_per_token": -2.393161658084754, "logits_per_char": -0.6267804342602926, "num_chars": 126}, {"sum_logits": -115.82611846923828, "num_tokens": 59, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -136.49676513671875, "logits_per_token": -1.9631545503260726, "logits_per_char": -0.4370796923367482, "num_chars": 265}, {"sum_logits": -79.60706329345703, "num_tokens": 31, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -109.22303771972656, "logits_per_token": -2.5679697836599042, "logits_per_char": -0.6862677870125606, "num_chars": 116}, {"sum_logits": -76.13260650634766, "num_tokens": 49, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -102.58543395996094, "logits_per_token": -1.55372666339485, "logits_per_char": -0.39652399222056073, "num_chars": 192}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 674, "native_id": 38696, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 52.166786193847656, "incorrect_loss_raw": 106.24655151367188, "correct_loss_per_char": 0.5269372342812895, "incorrect_loss_per_char": 0.7268758706656372, "correct_loss_per_token": 2.4841326758975075, "incorrect_loss_per_token": 3.2371797648909824, "correct_loss_uncond": -9.456687927246094, "incorrect_loss_uncond": -18.36615498860677}, "model_output": [{"sum_logits": -106.01618957519531, "num_tokens": 34, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -124.81292724609375, "logits_per_token": -3.1181232227998623, "logits_per_char": -0.7020939706966577, "num_chars": 151}, {"sum_logits": -85.89511108398438, "num_tokens": 30, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -97.07301330566406, "logits_per_token": -2.863170369466146, "logits_per_char": -0.7098769511073089, "num_chars": 121}, {"sum_logits": -126.82835388183594, "num_tokens": 34, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -151.95217895507812, "logits_per_token": -3.7302457024069393, "logits_per_char": -0.768656690192945, "num_chars": 165}, {"sum_logits": -52.166786193847656, "num_tokens": 21, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -61.62347412109375, "logits_per_token": -2.4841326758975075, "logits_per_char": -0.5269372342812895, "num_chars": 99}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 675, "native_id": 41028, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 114.2582778930664, "incorrect_loss_raw": 123.56708272298177, "correct_loss_per_char": 0.42161726159803103, "incorrect_loss_per_char": 0.5453458835640531, "correct_loss_per_token": 2.483875606371009, "incorrect_loss_per_token": 2.846673401844719, "correct_loss_uncond": -24.585716247558594, "incorrect_loss_uncond": -22.81537373860677}, "model_output": [{"sum_logits": -170.5335693359375, "num_tokens": 57, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -189.51226806640625, "logits_per_token": -2.9918170058936404, "logits_per_char": -0.5880467908135776, "num_chars": 290}, {"sum_logits": -109.5233154296875, "num_tokens": 38, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -138.07302856445312, "logits_per_token": -2.882192511307566, "logits_per_char": -0.524035002055921, "num_chars": 209}, {"sum_logits": -90.64436340332031, "num_tokens": 34, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -111.56207275390625, "logits_per_token": -2.66601068833295, "logits_per_char": -0.5239558578226607, "num_chars": 173}, {"sum_logits": -114.2582778930664, "num_tokens": 46, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -138.843994140625, "logits_per_token": -2.483875606371009, "logits_per_char": -0.42161726159803103, "num_chars": 271}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 676, "native_id": 47953, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 26.344505310058594, "incorrect_loss_raw": 48.83623186747233, "correct_loss_per_char": 0.9408751896449498, "incorrect_loss_per_char": 0.8385794215732151, "correct_loss_per_token": 4.390750885009766, "incorrect_loss_per_token": 3.756233395952167, "correct_loss_uncond": -13.834171295166016, "incorrect_loss_uncond": -18.50489616394043}, "model_output": [{"sum_logits": -45.64545440673828, "num_tokens": 11, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -64.06192016601562, "logits_per_token": -4.149586764248935, "logits_per_char": -0.9509469668070475, "num_chars": 48}, {"sum_logits": -69.001953125, "num_tokens": 22, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -92.42410278320312, "logits_per_token": -3.136452414772727, "logits_per_char": -0.7263363486842105, "num_chars": 95}, {"sum_logits": -26.344505310058594, "num_tokens": 6, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -40.17867660522461, "logits_per_token": -4.390750885009766, "logits_per_char": -0.9408751896449498, "num_chars": 28}, {"sum_logits": -31.86128807067871, "num_tokens": 8, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -45.53736114501953, "logits_per_token": -3.982661008834839, "logits_per_char": -0.8384549492283871, "num_chars": 38}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 677, "native_id": 24980, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 27.150314331054688, "incorrect_loss_raw": 30.89841651916504, "correct_loss_per_char": 1.0442428588867188, "incorrect_loss_per_char": 0.7906263739330405, "correct_loss_per_token": 5.430062866210937, "incorrect_loss_per_token": 3.5520229460701107, "correct_loss_uncond": -16.108455657958984, "incorrect_loss_uncond": -16.436553319295246}, "model_output": [{"sum_logits": -27.150314331054688, "num_tokens": 5, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -43.25876998901367, "logits_per_token": -5.430062866210937, "logits_per_char": -1.0442428588867188, "num_chars": 26}, {"sum_logits": -24.36263084411621, "num_tokens": 7, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -36.605926513671875, "logits_per_token": -3.4803758348737444, "logits_per_char": -0.7858913175521358, "num_chars": 31}, {"sum_logits": -37.513816833496094, "num_tokens": 10, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -58.31543731689453, "logits_per_token": -3.751381683349609, "logits_per_char": -0.815517757249915, "num_chars": 46}, {"sum_logits": -30.818801879882812, "num_tokens": 9, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -47.08354568481445, "logits_per_token": -3.424311319986979, "logits_per_char": -0.7704700469970703, "num_chars": 40}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 678, "native_id": 24049, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 82.62274932861328, "incorrect_loss_raw": 154.37762451171875, "correct_loss_per_char": 0.5901624952043806, "incorrect_loss_per_char": 0.7873295164531114, "correct_loss_per_token": 2.23304727915171, "incorrect_loss_per_token": 3.3679262272049875, "correct_loss_uncond": -16.719573974609375, "incorrect_loss_uncond": -17.324330647786457}, "model_output": [{"sum_logits": -177.92933654785156, "num_tokens": 48, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -204.48348999023438, "logits_per_token": -3.7068611780802407, "logits_per_char": -0.827578309524891, "num_chars": 215}, {"sum_logits": -155.41021728515625, "num_tokens": 47, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -168.79879760742188, "logits_per_token": -3.306600367769282, "logits_per_char": -0.8052342864515868, "num_chars": 193}, {"sum_logits": -82.62274932861328, "num_tokens": 37, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -99.34232330322266, "logits_per_token": -2.23304727915171, "logits_per_char": -0.5901624952043806, "num_chars": 140}, {"sum_logits": -129.79331970214844, "num_tokens": 42, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -141.82357788085938, "logits_per_token": -3.090317135765439, "logits_per_char": -0.7291759533828563, "num_chars": 178}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 679, "native_id": 21851, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 28.669034957885742, "incorrect_loss_raw": 45.948506673177086, "correct_loss_per_char": 0.7748387826455606, "incorrect_loss_per_char": 0.7886967996999176, "correct_loss_per_token": 3.5836293697357178, "incorrect_loss_per_token": 3.525093347598345, "correct_loss_uncond": -22.24223518371582, "incorrect_loss_uncond": -21.294817606608074}, "model_output": [{"sum_logits": -36.13701629638672, "num_tokens": 8, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -48.814727783203125, "logits_per_token": -4.51712703704834, "logits_per_char": -0.9509741130628084, "num_chars": 38}, {"sum_logits": -36.12989044189453, "num_tokens": 13, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -62.09137725830078, "logits_per_token": -2.7792223416841946, "logits_per_char": -0.7527060508728027, "num_chars": 48}, {"sum_logits": -28.669034957885742, "num_tokens": 8, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -50.91127014160156, "logits_per_token": -3.5836293697357178, "logits_per_char": -0.7748387826455606, "num_chars": 37}, {"sum_logits": -65.57861328125, "num_tokens": 20, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -90.82386779785156, "logits_per_token": -3.2789306640625, "logits_per_char": -0.6624102351641414, "num_chars": 99}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 680, "native_id": 14644, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 91.4154052734375, "incorrect_loss_raw": 98.0162862141927, "correct_loss_per_char": 0.4862521557097739, "incorrect_loss_per_char": 0.5565099454696082, "correct_loss_per_token": 2.2853851318359375, "incorrect_loss_per_token": 2.7380889589630306, "correct_loss_uncond": -20.889999389648438, "incorrect_loss_uncond": -14.100369771321615}, "model_output": [{"sum_logits": -111.36934661865234, "num_tokens": 39, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -117.17333221435547, "logits_per_token": -2.855624272273137, "logits_per_char": -0.6512827287640488, "num_chars": 171}, {"sum_logits": -91.4154052734375, "num_tokens": 40, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -112.30540466308594, "logits_per_token": -2.2853851318359375, "logits_per_char": -0.4862521557097739, "num_chars": 188}, {"sum_logits": -81.85297393798828, "num_tokens": 32, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -109.80044555664062, "logits_per_token": -2.557905435562134, "logits_per_char": -0.5115810871124268, "num_chars": 160}, {"sum_logits": -100.8265380859375, "num_tokens": 36, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -109.37619018554688, "logits_per_token": -2.8007371690538196, "logits_per_char": -0.5066660205323492, "num_chars": 199}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 681, "native_id": 38002, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 84.16825866699219, "incorrect_loss_raw": 81.6375020345052, "correct_loss_per_char": 0.6376383232347893, "incorrect_loss_per_char": 0.7201113026292902, "correct_loss_per_token": 2.475537019617417, "incorrect_loss_per_token": 3.053610197549473, "correct_loss_uncond": -16.519027709960938, "incorrect_loss_uncond": -19.68731943766276}, "model_output": [{"sum_logits": -61.08454895019531, "num_tokens": 23, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -81.57685852050781, "logits_per_token": -2.655849954356318, "logits_per_char": -0.6047975143583695, "num_chars": 101}, {"sum_logits": -101.89315795898438, "num_tokens": 27, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -120.62183380126953, "logits_per_token": -3.7738206651475696, "logits_per_char": -0.8783892927498653, "num_chars": 116}, {"sum_logits": -84.16825866699219, "num_tokens": 34, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -100.68728637695312, "logits_per_token": -2.475537019617417, "logits_per_char": -0.6376383232347893, "num_chars": 132}, {"sum_logits": -81.93479919433594, "num_tokens": 30, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -101.77577209472656, "logits_per_token": -2.731159973144531, "logits_per_char": -0.6771471007796358, "num_chars": 121}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 682, "native_id": 31651, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 40.39508056640625, "incorrect_loss_raw": 60.87115287780762, "correct_loss_per_char": 0.5689447967099471, "incorrect_loss_per_char": 0.7861646278421003, "correct_loss_per_token": 2.69300537109375, "incorrect_loss_per_token": 3.6679967494926067, "correct_loss_uncond": -29.310348510742188, "incorrect_loss_uncond": -24.462683995564777}, "model_output": [{"sum_logits": -101.02747344970703, "num_tokens": 22, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -130.50765991210938, "logits_per_token": -4.592157884077593, "logits_per_char": -0.9808492567932722, "num_chars": 103}, {"sum_logits": -29.183000564575195, "num_tokens": 10, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -49.25183868408203, "logits_per_token": -2.9183000564575194, "logits_per_char": -0.5836600112915039, "num_chars": 50}, {"sum_logits": -40.39508056640625, "num_tokens": 15, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -69.70542907714844, "logits_per_token": -2.69300537109375, "logits_per_char": -0.5689447967099471, "num_chars": 71}, {"sum_logits": -52.402984619140625, "num_tokens": 15, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -76.24201202392578, "logits_per_token": -3.4935323079427083, "logits_per_char": -0.7939846154415247, "num_chars": 66}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 683, "native_id": 16318, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 126.07923889160156, "incorrect_loss_raw": 111.02603658040364, "correct_loss_per_char": 0.7595134872988046, "incorrect_loss_per_char": 0.7226906189668597, "correct_loss_per_token": 2.865437247536399, "incorrect_loss_per_token": 3.3023887478361815, "correct_loss_uncond": -18.010055541992188, "incorrect_loss_uncond": -11.527923583984375}, "model_output": [{"sum_logits": -126.07923889160156, "num_tokens": 44, "num_tokens_all": 494, "is_greedy": false, "sum_logits_uncond": -144.08929443359375, "logits_per_token": -2.865437247536399, "logits_per_char": -0.7595134872988046, "num_chars": 166}, {"sum_logits": -158.62257385253906, "num_tokens": 41, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -160.86990356445312, "logits_per_token": -3.868843264696075, "logits_per_char": -0.8392728775266617, "num_chars": 189}, {"sum_logits": -66.20887756347656, "num_tokens": 26, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -80.76215362548828, "logits_per_token": -2.5464952909029446, "logits_per_char": -0.5610921827413268, "num_chars": 118}, {"sum_logits": -108.24665832519531, "num_tokens": 31, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -126.02982330322266, "logits_per_token": -3.491827687909526, "logits_per_char": -0.7677067966325909, "num_chars": 141}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 684, "native_id": 34380, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 101.08832550048828, "incorrect_loss_raw": 109.65937042236328, "correct_loss_per_char": 0.5054416275024414, "incorrect_loss_per_char": 0.7458166724188944, "correct_loss_per_token": 2.660219092118113, "incorrect_loss_per_token": 2.923272923455722, "correct_loss_uncond": -29.190223693847656, "incorrect_loss_uncond": -20.078656514485676}, "model_output": [{"sum_logits": -123.6059341430664, "num_tokens": 40, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -129.4959716796875, "logits_per_token": -3.09014835357666, "logits_per_char": -0.7446140611028097, "num_chars": 166}, {"sum_logits": -101.08832550048828, "num_tokens": 38, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -130.27854919433594, "logits_per_token": -2.660219092118113, "logits_per_char": -0.5054416275024414, "num_chars": 200}, {"sum_logits": -110.02587127685547, "num_tokens": 46, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -140.19100952148438, "logits_per_token": -2.3918667668881626, "logits_per_char": -0.5671436663755437, "num_chars": 194}, {"sum_logits": -95.34630584716797, "num_tokens": 29, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -119.527099609375, "logits_per_token": -3.2878036499023438, "logits_per_char": -0.9256922897783298, "num_chars": 103}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 685, "native_id": 7915, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 116.84147644042969, "incorrect_loss_raw": 69.94636535644531, "correct_loss_per_char": 0.6491193135579427, "incorrect_loss_per_char": 0.7329083006841203, "correct_loss_per_token": 3.157877741633235, "incorrect_loss_per_token": 2.6831685219675756, "correct_loss_uncond": -15.617950439453125, "incorrect_loss_uncond": -18.95459493001302}, "model_output": [{"sum_logits": -116.84147644042969, "num_tokens": 37, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -132.4594268798828, "logits_per_token": -3.157877741633235, "logits_per_char": -0.6491193135579427, "num_chars": 180}, {"sum_logits": -64.64483642578125, "num_tokens": 26, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -94.22880554199219, "logits_per_token": -2.486339862530048, "logits_per_char": -0.7026612654976223, "num_chars": 92}, {"sum_logits": -73.89192199707031, "num_tokens": 30, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -94.5063705444336, "logits_per_token": -2.4630640665690104, "logits_per_char": -0.636999327560951, "num_chars": 116}, {"sum_logits": -71.30233764648438, "num_tokens": 23, "num_tokens_all": 447, "is_greedy": false, "sum_logits_uncond": -77.96770477294922, "logits_per_token": -3.1001016368036685, "logits_per_char": -0.8590643089937876, "num_chars": 83}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 686, "native_id": 18269, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 15.664839744567871, "incorrect_loss_raw": 27.09783681233724, "correct_loss_per_char": 0.46073058072258444, "incorrect_loss_per_char": 0.6884650511619372, "correct_loss_per_token": 2.2378342492239818, "incorrect_loss_per_token": 2.81256560123328, "correct_loss_uncond": -25.56239414215088, "incorrect_loss_uncond": -21.964251200358074}, "model_output": [{"sum_logits": -36.51020812988281, "num_tokens": 12, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -65.38906860351562, "logits_per_token": -3.042517344156901, "logits_per_char": -0.702119387113131, "num_chars": 52}, {"sum_logits": -15.664839744567871, "num_tokens": 7, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -41.22723388671875, "logits_per_token": -2.2378342492239818, "logits_per_char": -0.46073058072258444, "num_chars": 34}, {"sum_logits": -17.47640609741211, "num_tokens": 6, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -32.351104736328125, "logits_per_token": -2.912734349568685, "logits_per_char": -0.7943820953369141, "num_chars": 22}, {"sum_logits": -27.306896209716797, "num_tokens": 11, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -49.44609069824219, "logits_per_token": -2.4824451099742544, "logits_per_char": -0.5688936710357666, "num_chars": 48}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 687, "native_id": 15034, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 44.730934143066406, "incorrect_loss_raw": 52.68310038248698, "correct_loss_per_char": 0.5734735146546975, "incorrect_loss_per_char": 0.6220783092936532, "correct_loss_per_token": 2.982062276204427, "incorrect_loss_per_token": 2.86642972263151, "correct_loss_uncond": -13.525779724121094, "incorrect_loss_uncond": -19.51712417602539}, "model_output": [{"sum_logits": -44.730934143066406, "num_tokens": 15, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -58.2567138671875, "logits_per_token": -2.982062276204427, "logits_per_char": -0.5734735146546975, "num_chars": 78}, {"sum_logits": -37.25801086425781, "num_tokens": 12, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -57.664703369140625, "logits_per_token": -3.104834238688151, "logits_per_char": -0.6107870633484888, "num_chars": 61}, {"sum_logits": -27.24524688720703, "num_tokens": 11, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -43.02249526977539, "logits_per_token": -2.47684062610973, "logits_per_char": -0.5676093101501465, "num_chars": 48}, {"sum_logits": -93.5460433959961, "num_tokens": 31, "num_tokens_all": 439, "is_greedy": false, "sum_logits_uncond": -115.9134750366211, "logits_per_token": -3.0176143030966482, "logits_per_char": -0.6878385543823242, "num_chars": 136}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 688, "native_id": 49271, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 108.28878021240234, "incorrect_loss_raw": 130.6818822224935, "correct_loss_per_char": 0.5308273539823645, "incorrect_loss_per_char": 0.648989430547911, "correct_loss_per_token": 2.5783042907714844, "incorrect_loss_per_token": 2.9281808468611055, "correct_loss_uncond": -24.74266815185547, "incorrect_loss_uncond": -21.830853780110676}, "model_output": [{"sum_logits": -128.11886596679688, "num_tokens": 39, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -144.69308471679688, "logits_per_token": -3.285099127353766, "logits_per_char": -0.8428872760973478, "num_chars": 152}, {"sum_logits": -124.08625030517578, "num_tokens": 45, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -154.43121337890625, "logits_per_token": -2.7574722290039064, "logits_per_char": -0.5937141162927071, "num_chars": 209}, {"sum_logits": -139.8405303955078, "num_tokens": 51, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -158.41390991210938, "logits_per_token": -2.7419711842256436, "logits_per_char": -0.5103668992536782, "num_chars": 274}, {"sum_logits": -108.28878021240234, "num_tokens": 42, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -133.0314483642578, "logits_per_token": -2.5783042907714844, "logits_per_char": -0.5308273539823645, "num_chars": 204}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 689, "native_id": 30763, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 96.84318542480469, "incorrect_loss_raw": 160.72987365722656, "correct_loss_per_char": 0.5565700311770384, "incorrect_loss_per_char": 0.6864428397873176, "correct_loss_per_token": 2.362028912800114, "incorrect_loss_per_token": 3.104315668708356, "correct_loss_uncond": -31.059341430664062, "incorrect_loss_uncond": -19.59002176920573}, "model_output": [{"sum_logits": -249.62579345703125, "num_tokens": 60, "num_tokens_all": 503, "is_greedy": false, "sum_logits_uncond": -267.01751708984375, "logits_per_token": -4.160429890950521, "logits_per_char": -0.8820699415442801, "num_chars": 283}, {"sum_logits": -96.84318542480469, "num_tokens": 41, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -127.90252685546875, "logits_per_token": -2.362028912800114, "logits_per_char": -0.5565700311770384, "num_chars": 174}, {"sum_logits": -134.62071228027344, "num_tokens": 46, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -157.54776000976562, "logits_per_token": -2.926537223484205, "logits_per_char": -0.6534986033022983, "num_chars": 206}, {"sum_logits": -97.943115234375, "num_tokens": 44, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -116.3944091796875, "logits_per_token": -2.225979891690341, "logits_per_char": -0.5237599745153744, "num_chars": 187}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 690, "native_id": 50475, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 103.12294006347656, "incorrect_loss_raw": 121.79788970947266, "correct_loss_per_char": 0.5697400003506993, "incorrect_loss_per_char": 0.7365444138854814, "correct_loss_per_token": 2.7137615806178044, "incorrect_loss_per_token": 3.441061531995713, "correct_loss_uncond": -26.105697631835938, "incorrect_loss_uncond": -16.52100880940755}, "model_output": [{"sum_logits": -95.85198211669922, "num_tokens": 29, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -109.59426879882812, "logits_per_token": -3.3052407626448006, "logits_per_char": -0.7373229393592248, "num_chars": 130}, {"sum_logits": -103.12294006347656, "num_tokens": 38, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -129.2286376953125, "logits_per_token": -2.7137615806178044, "logits_per_char": -0.5697400003506993, "num_chars": 181}, {"sum_logits": -134.28298950195312, "num_tokens": 46, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -158.695068359375, "logits_per_token": -2.919195423955503, "logits_per_char": -0.6425023421146082, "num_chars": 209}, {"sum_logits": -135.25869750976562, "num_tokens": 33, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -146.6673583984375, "logits_per_token": -4.098748409386837, "logits_per_char": -0.8298079601826112, "num_chars": 163}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 691, "native_id": 2364, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 19.310611724853516, "incorrect_loss_raw": 33.76866467793783, "correct_loss_per_char": 0.9195529392787388, "incorrect_loss_per_char": 0.8277250848728577, "correct_loss_per_token": 3.218435287475586, "incorrect_loss_per_token": 3.688207491850241, "correct_loss_uncond": -14.95395278930664, "incorrect_loss_uncond": -20.624256769816082}, "model_output": [{"sum_logits": -47.769325256347656, "num_tokens": 13, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -73.12115478515625, "logits_per_token": -3.674563481257512, "logits_per_char": -0.809649580616062, "num_chars": 59}, {"sum_logits": -19.310611724853516, "num_tokens": 6, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -34.264564514160156, "logits_per_token": -3.218435287475586, "logits_per_char": -0.9195529392787388, "num_chars": 21}, {"sum_logits": -36.78525924682617, "num_tokens": 8, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -57.1364631652832, "logits_per_token": -4.5981574058532715, "logits_per_char": -0.8758395058768136, "num_chars": 42}, {"sum_logits": -16.75140953063965, "num_tokens": 6, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -32.921146392822266, "logits_per_token": -2.7919015884399414, "logits_per_char": -0.7976861681256976, "num_chars": 21}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 692, "native_id": 18901, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 73.42930603027344, "incorrect_loss_raw": 123.84584045410156, "correct_loss_per_char": 0.4768136755212561, "incorrect_loss_per_char": 0.7952191238698404, "correct_loss_per_token": 1.9845758386560388, "incorrect_loss_per_token": 3.287190067066866, "correct_loss_uncond": -16.422760009765625, "incorrect_loss_uncond": -21.19549814860026}, "model_output": [{"sum_logits": -84.84634399414062, "num_tokens": 30, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -105.39791107177734, "logits_per_token": -2.828211466471354, "logits_per_char": -0.757556642804827, "num_chars": 112}, {"sum_logits": -183.819580078125, "num_tokens": 51, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -203.27902221679688, "logits_per_token": -3.604305491727941, "logits_per_char": -0.8880172950634058, "num_chars": 207}, {"sum_logits": -102.87159729003906, "num_tokens": 30, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -126.44708251953125, "logits_per_token": -3.429053243001302, "logits_per_char": -0.7400834337412883, "num_chars": 139}, {"sum_logits": -73.42930603027344, "num_tokens": 37, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -89.85206604003906, "logits_per_token": -1.9845758386560388, "logits_per_char": -0.4768136755212561, "num_chars": 154}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 693, "native_id": 8351, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 12.983037948608398, "incorrect_loss_raw": 56.55585479736328, "correct_loss_per_char": 0.4476909637451172, "incorrect_loss_per_char": 0.8597772050735112, "correct_loss_per_token": 1.8547197069440569, "incorrect_loss_per_token": 3.541005329702092, "correct_loss_uncond": -18.10553550720215, "incorrect_loss_uncond": -23.340104420979817}, "model_output": [{"sum_logits": -40.949951171875, "num_tokens": 8, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -54.43762969970703, "logits_per_token": -5.118743896484375, "logits_per_char": -1.3209661668346775, "num_chars": 31}, {"sum_logits": -33.113868713378906, "num_tokens": 15, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -53.50772476196289, "logits_per_token": -2.207591247558594, "logits_per_char": -0.5340946566674017, "num_chars": 62}, {"sum_logits": -12.983037948608398, "num_tokens": 7, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -31.088573455810547, "logits_per_token": -1.8547197069440569, "logits_per_char": -0.4476909637451172, "num_chars": 29}, {"sum_logits": -95.60374450683594, "num_tokens": 29, "num_tokens_all": 432, "is_greedy": false, "sum_logits_uncond": -131.74252319335938, "logits_per_token": -3.296680845063308, "logits_per_char": -0.7242707917184541, "num_chars": 132}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 694, "native_id": 47950, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 89.05667877197266, "incorrect_loss_raw": 85.64391326904297, "correct_loss_per_char": 0.43442282327791537, "incorrect_loss_per_char": 0.5392334826248287, "correct_loss_per_token": 2.406937264107369, "incorrect_loss_per_token": 2.6292705698164975, "correct_loss_uncond": -28.01044464111328, "incorrect_loss_uncond": -30.133926391601562}, "model_output": [{"sum_logits": -105.08131408691406, "num_tokens": 37, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -145.64373779296875, "logits_per_token": -2.8400355158625423, "logits_per_char": -0.571094098298446, "num_chars": 184}, {"sum_logits": -84.7061996459961, "num_tokens": 31, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -103.85810089111328, "logits_per_token": -2.7324580530966482, "logits_per_char": -0.6321358182537022, "num_chars": 134}, {"sum_logits": -67.14422607421875, "num_tokens": 29, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -97.83168029785156, "logits_per_token": -2.3153181404903016, "logits_per_char": -0.41447053132233797, "num_chars": 162}, {"sum_logits": -89.05667877197266, "num_tokens": 37, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -117.06712341308594, "logits_per_token": -2.406937264107369, "logits_per_char": -0.43442282327791537, "num_chars": 205}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 695, "native_id": 2328, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 85.5448989868164, "incorrect_loss_raw": 90.39463806152344, "correct_loss_per_char": 0.4726237513083779, "incorrect_loss_per_char": 0.562791353270757, "correct_loss_per_token": 2.193458948379908, "incorrect_loss_per_token": 2.685103592485273, "correct_loss_uncond": -45.000083923339844, "incorrect_loss_uncond": -20.616106669108074}, "model_output": [{"sum_logits": -83.92082214355469, "num_tokens": 32, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -99.61840057373047, "logits_per_token": -2.622525691986084, "logits_per_char": -0.6309836251395089, "num_chars": 133}, {"sum_logits": -98.06912231445312, "num_tokens": 34, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -124.25550842285156, "logits_per_token": -2.884385950425092, "logits_per_char": -0.5418183553284703, "num_chars": 181}, {"sum_logits": -89.1939697265625, "num_tokens": 35, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -109.1583251953125, "logits_per_token": -2.548399135044643, "logits_per_char": -0.515572079344292, "num_chars": 173}, {"sum_logits": -85.5448989868164, "num_tokens": 39, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -130.54498291015625, "logits_per_token": -2.193458948379908, "logits_per_char": -0.4726237513083779, "num_chars": 181}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 696, "native_id": 27333, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 55.02629470825195, "incorrect_loss_raw": 61.71623738606771, "correct_loss_per_char": 0.5142644365257192, "incorrect_loss_per_char": 0.6608118330376459, "correct_loss_per_token": 2.3924475960109546, "incorrect_loss_per_token": 2.8448024827234772, "correct_loss_uncond": -35.638248443603516, "incorrect_loss_uncond": -27.679051717122395}, "model_output": [{"sum_logits": -75.18544006347656, "num_tokens": 31, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -101.06902313232422, "logits_per_token": -2.4253367762411795, "logits_per_char": -0.5739346569731035, "num_chars": 131}, {"sum_logits": -69.5899887084961, "num_tokens": 18, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -97.41255950927734, "logits_per_token": -3.8661104838053384, "logits_per_char": -0.9278665161132813, "num_chars": 75}, {"sum_logits": -40.37328338623047, "num_tokens": 18, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -69.70428466796875, "logits_per_token": -2.2429601881239147, "logits_per_char": -0.4806343260265532, "num_chars": 84}, {"sum_logits": -55.02629470825195, "num_tokens": 23, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -90.66454315185547, "logits_per_token": -2.3924475960109546, "logits_per_char": -0.5142644365257192, "num_chars": 107}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 697, "native_id": 30169, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 108.44827270507812, "incorrect_loss_raw": 161.7796147664388, "correct_loss_per_char": 0.589392786440642, "incorrect_loss_per_char": 0.7584267580323365, "correct_loss_per_token": 2.4647334705699575, "incorrect_loss_per_token": 3.4488742898008646, "correct_loss_uncond": -30.2186279296875, "incorrect_loss_uncond": -18.533490498860676}, "model_output": [{"sum_logits": -178.8369140625, "num_tokens": 58, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -199.87606811523438, "logits_per_token": -3.0833950700431036, "logits_per_char": -0.7068652729743083, "num_chars": 253}, {"sum_logits": -108.44827270507812, "num_tokens": 44, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -138.66690063476562, "logits_per_token": -2.4647334705699575, "logits_per_char": -0.589392786440642, "num_chars": 184}, {"sum_logits": -178.65655517578125, "num_tokens": 51, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -192.71461486816406, "logits_per_token": -3.503069709329044, "logits_per_char": -0.7382502279990961, "num_chars": 242}, {"sum_logits": -127.84537506103516, "num_tokens": 34, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -148.3486328125, "logits_per_token": -3.760158090030446, "logits_per_char": -0.8301647731236049, "num_chars": 154}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 698, "native_id": 42852, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 87.853271484375, "incorrect_loss_raw": 133.87281799316406, "correct_loss_per_char": 0.6655550870028409, "incorrect_loss_per_char": 0.7125920518905079, "correct_loss_per_token": 2.6622203480113638, "incorrect_loss_per_token": 3.0736731200741048, "correct_loss_uncond": -34.27192687988281, "incorrect_loss_uncond": -14.345001220703125}, "model_output": [{"sum_logits": -166.02297973632812, "num_tokens": 48, "num_tokens_all": 498, "is_greedy": false, "sum_logits_uncond": -176.93701171875, "logits_per_token": -3.4588120778401694, "logits_per_char": -0.7650828559277794, "num_chars": 217}, {"sum_logits": -155.07696533203125, "num_tokens": 49, "num_tokens_all": 499, "is_greedy": false, "sum_logits_uncond": -168.95880126953125, "logits_per_token": -3.164836027184311, "logits_per_char": -0.7017057254843043, "num_chars": 221}, {"sum_logits": -87.853271484375, "num_tokens": 33, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -122.12519836425781, "logits_per_token": -2.6622203480113638, "logits_per_char": -0.6655550870028409, "num_chars": 132}, {"sum_logits": -80.51850891113281, "num_tokens": 31, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -98.75764465332031, "logits_per_token": -2.5973712551978325, "logits_per_char": -0.6709875742594401, "num_chars": 120}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 699, "native_id": 44926, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 37.90107345581055, "incorrect_loss_raw": 35.106615702311196, "correct_loss_per_char": 0.5121766683217641, "incorrect_loss_per_char": 0.4557917514852974, "correct_loss_per_token": 3.1584227879842124, "incorrect_loss_per_token": 2.455451816982693, "correct_loss_uncond": -29.217830657958984, "incorrect_loss_uncond": -29.468001047770183}, "model_output": [{"sum_logits": -37.90107345581055, "num_tokens": 12, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -67.11890411376953, "logits_per_token": -3.1584227879842124, "logits_per_char": -0.5121766683217641, "num_chars": 74}, {"sum_logits": -22.39168930053711, "num_tokens": 10, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -49.72545623779297, "logits_per_token": -2.2391689300537108, "logits_per_char": -0.42248470378371905, "num_chars": 53}, {"sum_logits": -53.504798889160156, "num_tokens": 20, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -81.51204681396484, "logits_per_token": -2.6752399444580077, "logits_per_char": -0.5245568518545113, "num_chars": 102}, {"sum_logits": -29.423358917236328, "num_tokens": 12, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -62.48634719848633, "logits_per_token": -2.451946576436361, "logits_per_char": -0.4203336988176618, "num_chars": 70}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 700, "native_id": 45501, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 40.49005889892578, "incorrect_loss_raw": 72.4429219563802, "correct_loss_per_char": 0.39696136175417435, "incorrect_loss_per_char": 0.47130253205671613, "correct_loss_per_token": 1.4996318110713251, "incorrect_loss_per_token": 2.122624560976081, "correct_loss_uncond": -18.80553436279297, "incorrect_loss_uncond": -20.10875193277995}, "model_output": [{"sum_logits": -73.75929260253906, "num_tokens": 43, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -104.00599670410156, "logits_per_token": -1.7153323861055596, "logits_per_char": -0.3633462689780249, "num_chars": 203}, {"sum_logits": -40.49005889892578, "num_tokens": 27, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -59.29559326171875, "logits_per_token": -1.4996318110713251, "logits_per_char": -0.39696136175417435, "num_chars": 102}, {"sum_logits": -69.11634826660156, "num_tokens": 24, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -79.43697357177734, "logits_per_token": -2.879847844441732, "logits_per_char": -0.6226698042036177, "num_chars": 111}, {"sum_logits": -74.453125, "num_tokens": 42, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -94.21205139160156, "logits_per_token": -1.7726934523809523, "logits_per_char": -0.42789152298850575, "num_chars": 174}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 701, "native_id": 9677, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 52.52185821533203, "incorrect_loss_raw": 106.59538269042969, "correct_loss_per_char": 0.6484180026584201, "incorrect_loss_per_char": 0.7336210430857729, "correct_loss_per_token": 2.283559052840523, "incorrect_loss_per_token": 3.4473383759501317, "correct_loss_uncond": -47.04627227783203, "incorrect_loss_uncond": -29.802711486816406}, "model_output": [{"sum_logits": -83.49714660644531, "num_tokens": 26, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -102.85743713378906, "logits_per_token": -3.211428715632512, "logits_per_char": -0.7389128018269496, "num_chars": 113}, {"sum_logits": -85.10714721679688, "num_tokens": 27, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -112.58499908447266, "logits_per_token": -3.1521165635850696, "logits_per_char": -0.6447511152787642, "num_chars": 132}, {"sum_logits": -151.18185424804688, "num_tokens": 38, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -193.75184631347656, "logits_per_token": -3.9784698486328125, "logits_per_char": -0.8171992121516047, "num_chars": 185}, {"sum_logits": -52.52185821533203, "num_tokens": 23, "num_tokens_all": 440, "is_greedy": false, "sum_logits_uncond": -99.56813049316406, "logits_per_token": -2.283559052840523, "logits_per_char": -0.6484180026584201, "num_chars": 81}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 702, "native_id": 8539, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 55.955406188964844, "incorrect_loss_raw": 94.24581909179688, "correct_loss_per_char": 0.3564038610762092, "incorrect_loss_per_char": 0.5748008880862174, "correct_loss_per_token": 1.5987258911132813, "incorrect_loss_per_token": 2.7355374719342613, "correct_loss_uncond": -27.58631134033203, "incorrect_loss_uncond": -14.753262837727865}, "model_output": [{"sum_logits": -100.54798126220703, "num_tokens": 39, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -108.85554504394531, "logits_per_token": -2.578153365697616, "logits_per_char": -0.529199901380037, "num_chars": 190}, {"sum_logits": -74.64085388183594, "num_tokens": 26, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -87.12442779541016, "logits_per_token": -2.8708020723783054, "logits_per_char": -0.6379560160840678, "num_chars": 117}, {"sum_logits": -55.955406188964844, "num_tokens": 35, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -83.54171752929688, "logits_per_token": -1.5987258911132813, "logits_per_char": -0.3564038610762092, "num_chars": 157}, {"sum_logits": -107.54862213134766, "num_tokens": 39, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -131.01727294921875, "logits_per_token": -2.757656977726863, "logits_per_char": -0.5572467467945474, "num_chars": 193}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 703, "native_id": 36703, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 94.37821197509766, "incorrect_loss_raw": 118.39262390136719, "correct_loss_per_char": 0.59357366022074, "incorrect_loss_per_char": 0.6289063954210641, "correct_loss_per_token": 2.6965203421456474, "incorrect_loss_per_token": 2.8246838568936252, "correct_loss_uncond": -32.56517028808594, "incorrect_loss_uncond": -29.68708038330078}, "model_output": [{"sum_logits": -82.69493103027344, "num_tokens": 40, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -118.7256088256836, "logits_per_token": -2.067373275756836, "logits_per_char": -0.49223173232305617, "num_chars": 168}, {"sum_logits": -128.61569213867188, "num_tokens": 41, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -152.40379333496094, "logits_per_token": -3.1369681009432164, "logits_per_char": -0.702817989828808, "num_chars": 183}, {"sum_logits": -94.37821197509766, "num_tokens": 35, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -126.9433822631836, "logits_per_token": -2.6965203421456474, "logits_per_char": -0.59357366022074, "num_chars": 159}, {"sum_logits": -143.86724853515625, "num_tokens": 44, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -173.10971069335938, "logits_per_token": -3.2697101939808237, "logits_per_char": -0.6916694641113281, "num_chars": 208}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 704, "native_id": 46204, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 116.41114807128906, "incorrect_loss_raw": 73.52111053466797, "correct_loss_per_char": 0.5734539313856604, "incorrect_loss_per_char": 0.4932773001278761, "correct_loss_per_token": 2.375737715740593, "incorrect_loss_per_token": 2.061389858508236, "correct_loss_uncond": -32.11494445800781, "incorrect_loss_uncond": -32.37560780843099}, "model_output": [{"sum_logits": -116.41114807128906, "num_tokens": 49, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -148.52609252929688, "logits_per_token": -2.375737715740593, "logits_per_char": -0.5734539313856604, "num_chars": 203}, {"sum_logits": -81.10635375976562, "num_tokens": 36, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -117.6274185180664, "logits_per_token": -2.252954271104601, "logits_per_char": -0.5301068873187296, "num_chars": 153}, {"sum_logits": -72.3370590209961, "num_tokens": 35, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -101.28144073486328, "logits_per_token": -2.066773114885603, "logits_per_char": -0.5166932787214007, "num_chars": 140}, {"sum_logits": -67.11991882324219, "num_tokens": 36, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -98.78129577636719, "logits_per_token": -1.8644421895345051, "logits_per_char": -0.433031734343498, "num_chars": 155}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 705, "native_id": 13733, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 59.52652359008789, "incorrect_loss_raw": 111.81455739339192, "correct_loss_per_char": 0.5615709772649801, "incorrect_loss_per_char": 0.6985823562996626, "correct_loss_per_token": 2.3810609436035155, "incorrect_loss_per_token": 3.1450944235754026, "correct_loss_uncond": -18.458683013916016, "incorrect_loss_uncond": -18.66620635986328}, "model_output": [{"sum_logits": -124.32563781738281, "num_tokens": 34, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -142.77993774414062, "logits_per_token": -3.6566364063936123, "logits_per_char": -0.7489496254059206, "num_chars": 166}, {"sum_logits": -114.20964813232422, "num_tokens": 37, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -125.37408447265625, "logits_per_token": -3.0867472468195736, "logits_per_char": -0.7665077055860686, "num_chars": 149}, {"sum_logits": -59.52652359008789, "num_tokens": 25, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -77.9852066040039, "logits_per_token": -2.3810609436035155, "logits_per_char": -0.5615709772649801, "num_chars": 106}, {"sum_logits": -96.90838623046875, "num_tokens": 36, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -123.28826904296875, "logits_per_token": -2.691899617513021, "logits_per_char": -0.5802897379069984, "num_chars": 167}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 706, "native_id": 46050, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 68.93994140625, "incorrect_loss_raw": 79.53482437133789, "correct_loss_per_char": 0.4924281529017857, "incorrect_loss_per_char": 0.5393707095339355, "correct_loss_per_token": 1.9697126116071428, "incorrect_loss_per_token": 2.2821983716384837, "correct_loss_uncond": -41.759521484375, "incorrect_loss_uncond": -29.663265228271484}, "model_output": [{"sum_logits": -115.36305236816406, "num_tokens": 36, "num_tokens_all": 498, "is_greedy": false, "sum_logits_uncond": -144.46951293945312, "logits_per_token": -3.204529232449002, "logits_per_char": -0.6707154207451399, "num_chars": 172}, {"sum_logits": -48.8394889831543, "num_tokens": 29, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -82.76911163330078, "logits_per_token": -1.6841203097639412, "logits_per_char": -0.4003236801897893, "num_chars": 122}, {"sum_logits": -68.93994140625, "num_tokens": 35, "num_tokens_all": 497, "is_greedy": false, "sum_logits_uncond": -110.699462890625, "logits_per_token": -1.9697126116071428, "logits_per_char": -0.4924281529017857, "num_chars": 140}, {"sum_logits": -74.40193176269531, "num_tokens": 38, "num_tokens_all": 500, "is_greedy": false, "sum_logits_uncond": -100.35564422607422, "logits_per_token": -1.9579455727025081, "logits_per_char": -0.5470730276668773, "num_chars": 136}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 707, "native_id": 7453, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 17.564586639404297, "incorrect_loss_raw": 29.23592185974121, "correct_loss_per_char": 0.5665995690130419, "incorrect_loss_per_char": 0.6024923458787462, "correct_loss_per_token": 2.195573329925537, "incorrect_loss_per_token": 2.7904596964518227, "correct_loss_uncond": -26.388851165771484, "incorrect_loss_uncond": -25.933437983194988}, "model_output": [{"sum_logits": -17.564586639404297, "num_tokens": 8, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -43.95343780517578, "logits_per_token": -2.195573329925537, "logits_per_char": -0.5665995690130419, "num_chars": 31}, {"sum_logits": -28.248069763183594, "num_tokens": 8, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -47.47117614746094, "logits_per_token": -3.531008720397949, "logits_per_char": -0.7433702569258841, "num_chars": 38}, {"sum_logits": -20.763723373413086, "num_tokens": 10, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -42.78160858154297, "logits_per_token": -2.0763723373413088, "logits_per_char": -0.5190930843353272, "num_chars": 40}, {"sum_logits": -38.69597244262695, "num_tokens": 14, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -75.25529479980469, "logits_per_token": -2.763998031616211, "logits_per_char": -0.5450136963750275, "num_chars": 71}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 708, "native_id": 45461, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 60.121665954589844, "incorrect_loss_raw": 115.31138865152995, "correct_loss_per_char": 0.48097332763671874, "incorrect_loss_per_char": 0.609610605152834, "correct_loss_per_token": 2.0731608949858567, "incorrect_loss_per_token": 2.595504044362439, "correct_loss_uncond": -11.795867919921875, "incorrect_loss_uncond": -32.96763356526693}, "model_output": [{"sum_logits": -108.24385070800781, "num_tokens": 43, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -136.1593017578125, "logits_per_token": -2.5172988536746, "logits_per_char": -0.5385266204378498, "num_chars": 201}, {"sum_logits": -60.121665954589844, "num_tokens": 29, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -71.91753387451172, "logits_per_token": -2.0731608949858567, "logits_per_char": -0.48097332763671874, "num_chars": 125}, {"sum_logits": -146.57415771484375, "num_tokens": 49, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -187.60586547851562, "logits_per_token": -2.99130934111926, "logits_per_char": -0.7755246439938823, "num_chars": 189}, {"sum_logits": -91.11615753173828, "num_tokens": 40, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -121.0718994140625, "logits_per_token": -2.277903938293457, "logits_per_char": -0.51478055102677, "num_chars": 177}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 709, "native_id": 17102, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 112.02205657958984, "incorrect_loss_raw": 81.1635233561198, "correct_loss_per_char": 0.42432597189238574, "incorrect_loss_per_char": 0.5363722913387196, "correct_loss_per_token": 1.9652992382384182, "incorrect_loss_per_token": 2.472834742292807, "correct_loss_uncond": -26.774879455566406, "incorrect_loss_uncond": -20.5698979695638}, "model_output": [{"sum_logits": -119.71666717529297, "num_tokens": 38, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -146.0871124267578, "logits_per_token": -3.150438609876131, "logits_per_char": -0.6436379955660912, "num_chars": 186}, {"sum_logits": -112.02205657958984, "num_tokens": 57, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -138.79693603515625, "logits_per_token": -1.9652992382384182, "logits_per_char": -0.42432597189238574, "num_chars": 264}, {"sum_logits": -56.874290466308594, "num_tokens": 29, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -75.19503784179688, "logits_per_token": -1.96118242987271, "logits_per_char": -0.45866363279281125, "num_chars": 124}, {"sum_logits": -66.89961242675781, "num_tokens": 29, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -83.9181137084961, "logits_per_token": -2.30688318712958, "logits_per_char": -0.5068152456572561, "num_chars": 132}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 710, "native_id": 41761, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 133.8782501220703, "incorrect_loss_raw": 85.85344950358073, "correct_loss_per_char": 0.5270797248900406, "incorrect_loss_per_char": 0.5536209675973981, "correct_loss_per_token": 2.4792268541124134, "incorrect_loss_per_token": 2.5803710158920965, "correct_loss_uncond": -10.792343139648438, "incorrect_loss_uncond": -15.977045694986979}, "model_output": [{"sum_logits": -140.33204650878906, "num_tokens": 44, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -154.9857177734375, "logits_per_token": -3.18936469338157, "logits_per_char": -0.7123454137501983, "num_chars": 197}, {"sum_logits": -53.61591720581055, "num_tokens": 20, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -70.21464538574219, "logits_per_token": -2.6807958602905275, "logits_per_char": -0.5765152387721564, "num_chars": 93}, {"sum_logits": -133.8782501220703, "num_tokens": 54, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -144.67059326171875, "logits_per_token": -2.4792268541124134, "logits_per_char": -0.5270797248900406, "num_chars": 254}, {"sum_logits": -63.61238479614258, "num_tokens": 34, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -80.29112243652344, "logits_per_token": -1.8709524940041935, "logits_per_char": -0.37200225026983963, "num_chars": 171}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 711, "native_id": 17656, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 77.56657409667969, "incorrect_loss_raw": 117.40328470865886, "correct_loss_per_char": 0.7317601329875443, "incorrect_loss_per_char": 0.7035951795673138, "correct_loss_per_token": 3.3724597433338994, "incorrect_loss_per_token": 3.268182020216017, "correct_loss_uncond": -14.310646057128906, "incorrect_loss_uncond": -21.15485127766927}, "model_output": [{"sum_logits": -77.56657409667969, "num_tokens": 23, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -91.8772201538086, "logits_per_token": -3.3724597433338994, "logits_per_char": -0.7317601329875443, "num_chars": 106}, {"sum_logits": -118.82856750488281, "num_tokens": 42, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -137.51803588867188, "logits_per_token": -2.8292516072591147, "logits_per_char": -0.6458074320917544, "num_chars": 184}, {"sum_logits": -105.87689208984375, "num_tokens": 30, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -129.169921875, "logits_per_token": -3.529229736328125, "logits_per_char": -0.6875122862976867, "num_chars": 154}, {"sum_logits": -127.50439453125, "num_tokens": 37, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -148.9864501953125, "logits_per_token": -3.446064717060811, "logits_per_char": -0.7774658203125, "num_chars": 164}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 712, "native_id": 1860, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 66.63391876220703, "incorrect_loss_raw": 104.64933522542317, "correct_loss_per_char": 0.469252949029627, "incorrect_loss_per_char": 0.7280202863259451, "correct_loss_per_token": 1.8509421878390842, "incorrect_loss_per_token": 3.314352779124739, "correct_loss_uncond": -20.57318115234375, "incorrect_loss_uncond": -15.705879211425781}, "model_output": [{"sum_logits": -99.2989730834961, "num_tokens": 31, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -111.58255004882812, "logits_per_token": -3.2031926801127772, "logits_per_char": -0.6576090932681861, "num_chars": 151}, {"sum_logits": -84.98506164550781, "num_tokens": 28, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -103.42648315429688, "logits_per_token": -3.0351807730538503, "logits_per_char": -0.7725914695046164, "num_chars": 110}, {"sum_logits": -66.63391876220703, "num_tokens": 36, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -87.20709991455078, "logits_per_token": -1.8509421878390842, "logits_per_char": -0.469252949029627, "num_chars": 142}, {"sum_logits": -129.66397094726562, "num_tokens": 35, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -146.05661010742188, "logits_per_token": -3.7046848842075892, "logits_per_char": -0.7538602962050327, "num_chars": 172}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 713, "native_id": 21287, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 14.233417510986328, "incorrect_loss_raw": 28.944085439046223, "correct_loss_per_char": 0.5474391350379357, "incorrect_loss_per_char": 0.9108540839567163, "correct_loss_per_token": 2.0333453587123325, "incorrect_loss_per_token": 4.045324181065415, "correct_loss_uncond": -21.934436798095703, "incorrect_loss_uncond": -11.483771006266275}, "model_output": [{"sum_logits": -37.77059555053711, "num_tokens": 6, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -45.40684127807617, "logits_per_token": -6.295099258422852, "logits_per_char": -1.3989109463161893, "num_chars": 27}, {"sum_logits": -14.233417510986328, "num_tokens": 7, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -36.16785430908203, "logits_per_token": -2.0333453587123325, "logits_per_char": -0.5474391350379357, "num_chars": 26}, {"sum_logits": -30.83612632751465, "num_tokens": 11, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -43.96864700317383, "logits_per_token": -2.8032842115922407, "logits_per_char": -0.6046299279904833, "num_chars": 51}, {"sum_logits": -18.225534439086914, "num_tokens": 6, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -31.9080810546875, "logits_per_token": -3.0375890731811523, "logits_per_char": -0.7290213775634765, "num_chars": 25}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 714, "native_id": 38745, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 106.96429443359375, "incorrect_loss_raw": 100.02819315592448, "correct_loss_per_char": 0.4383782558753842, "incorrect_loss_per_char": 0.6049610646642083, "correct_loss_per_token": 2.487541731013808, "incorrect_loss_per_token": 2.7119229851964533, "correct_loss_uncond": -29.359054565429688, "incorrect_loss_uncond": -22.352620442708332}, "model_output": [{"sum_logits": -106.96429443359375, "num_tokens": 43, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -136.32334899902344, "logits_per_token": -2.487541731013808, "logits_per_char": -0.4383782558753842, "num_chars": 244}, {"sum_logits": -134.61204528808594, "num_tokens": 45, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -157.21026611328125, "logits_per_token": -2.9913787841796875, "logits_per_char": -0.6290282490097474, "num_chars": 214}, {"sum_logits": -51.28633117675781, "num_tokens": 26, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -80.51116943359375, "logits_per_token": -1.9725511991060698, "logits_per_char": -0.43462992522676114, "num_chars": 118}, {"sum_logits": -114.18620300292969, "num_tokens": 36, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -129.42100524902344, "logits_per_token": -3.1718389723036022, "logits_per_char": -0.7512250197561163, "num_chars": 152}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 715, "native_id": 5749, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 66.94741821289062, "incorrect_loss_raw": 67.03341801961263, "correct_loss_per_char": 0.4714606916400748, "incorrect_loss_per_char": 0.45472020197012647, "correct_loss_per_token": 2.231580607096354, "incorrect_loss_per_token": 2.1460091711613636, "correct_loss_uncond": -17.297279357910156, "incorrect_loss_uncond": -16.970015207926433}, "model_output": [{"sum_logits": -66.94741821289062, "num_tokens": 30, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -84.24469757080078, "logits_per_token": -2.231580607096354, "logits_per_char": -0.4714606916400748, "num_chars": 142}, {"sum_logits": -74.79721069335938, "num_tokens": 35, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -88.21611785888672, "logits_per_token": -2.1370631626674106, "logits_per_char": -0.43741058885005485, "num_chars": 171}, {"sum_logits": -78.64923858642578, "num_tokens": 41, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -96.9820785522461, "logits_per_token": -1.9182741118640434, "logits_per_char": -0.42513101938608533, "num_chars": 185}, {"sum_logits": -47.653804779052734, "num_tokens": 20, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -66.81210327148438, "logits_per_token": -2.3826902389526365, "logits_per_char": -0.5016189976742393, "num_chars": 95}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 716, "native_id": 12611, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 76.7614974975586, "incorrect_loss_raw": 90.2371597290039, "correct_loss_per_char": 0.48892673565323946, "incorrect_loss_per_char": 0.5668233374084058, "correct_loss_per_token": 2.326105984774503, "incorrect_loss_per_token": 3.0238372017355526, "correct_loss_uncond": -29.955703735351562, "incorrect_loss_uncond": -21.983540852864582}, "model_output": [{"sum_logits": -94.34195709228516, "num_tokens": 34, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -111.00137329101562, "logits_per_token": -2.77476344389074, "logits_per_char": -0.5717694369229404, "num_chars": 165}, {"sum_logits": -100.99026489257812, "num_tokens": 32, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -129.16534423828125, "logits_per_token": -3.1559457778930664, "logits_per_char": -0.5488601352857507, "num_chars": 184}, {"sum_logits": -76.7614974975586, "num_tokens": 33, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -106.71720123291016, "logits_per_token": -2.326105984774503, "logits_per_char": -0.48892673565323946, "num_chars": 157}, {"sum_logits": -75.37925720214844, "num_tokens": 24, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -96.4953842163086, "logits_per_token": -3.1408023834228516, "logits_per_char": -0.5798404400165265, "num_chars": 130}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 717, "native_id": 3890, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 38.39728546142578, "incorrect_loss_raw": 73.40569814046223, "correct_loss_per_char": 0.6508014484987421, "incorrect_loss_per_char": 0.7793772197905041, "correct_loss_per_token": 2.5598190307617186, "incorrect_loss_per_token": 3.367617726122212, "correct_loss_uncond": -29.680213928222656, "incorrect_loss_uncond": -26.10047658284505}, "model_output": [{"sum_logits": -33.52460479736328, "num_tokens": 13, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -70.503662109375, "logits_per_token": -2.578815753643329, "logits_per_char": -0.6447039384108323, "num_chars": 52}, {"sum_logits": -38.39728546142578, "num_tokens": 15, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -68.07749938964844, "logits_per_token": -2.5598190307617186, "logits_per_char": -0.6508014484987421, "num_chars": 59}, {"sum_logits": -78.05726623535156, "num_tokens": 20, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -92.44413757324219, "logits_per_token": -3.902863311767578, "logits_per_char": -0.8577721564324348, "num_chars": 91}, {"sum_logits": -108.63522338867188, "num_tokens": 30, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -135.5707244873047, "logits_per_token": -3.621174112955729, "logits_per_char": -0.8356555645282452, "num_chars": 130}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 718, "native_id": 16015, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 33.35548400878906, "incorrect_loss_raw": 51.890052795410156, "correct_loss_per_char": 0.6176941483109085, "incorrect_loss_per_char": 0.7424537245478712, "correct_loss_per_token": 2.7796236673990884, "incorrect_loss_per_token": 3.0473511585822473, "correct_loss_uncond": -18.984886169433594, "incorrect_loss_uncond": -34.45366541544596}, "model_output": [{"sum_logits": -34.16923904418945, "num_tokens": 16, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -68.79444885253906, "logits_per_token": -2.135577440261841, "logits_per_char": -0.5601514597408107, "num_chars": 61}, {"sum_logits": -86.90494537353516, "num_tokens": 20, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -126.3201904296875, "logits_per_token": -4.345247268676758, "logits_per_char": -1.100062599665002, "num_chars": 79}, {"sum_logits": -33.35548400878906, "num_tokens": 12, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -52.340370178222656, "logits_per_token": -2.7796236673990884, "logits_per_char": -0.6176941483109085, "num_chars": 54}, {"sum_logits": -34.59597396850586, "num_tokens": 13, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -63.9165153503418, "logits_per_token": -2.661228766808143, "logits_per_char": -0.567147114237801, "num_chars": 61}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 719, "native_id": 20428, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 79.9239501953125, "incorrect_loss_raw": 61.81160418192545, "correct_loss_per_char": 0.48438757694128787, "incorrect_loss_per_char": 0.7218355550301907, "correct_loss_per_token": 2.2201097276475696, "incorrect_loss_per_token": 3.5837941760108585, "correct_loss_uncond": -38.12464904785156, "incorrect_loss_uncond": -29.188995997111004}, "model_output": [{"sum_logits": -79.9239501953125, "num_tokens": 36, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -118.04859924316406, "logits_per_token": -2.2201097276475696, "logits_per_char": -0.48438757694128787, "num_chars": 165}, {"sum_logits": -89.28742218017578, "num_tokens": 20, "num_tokens_all": 440, "is_greedy": false, "sum_logits_uncond": -112.54840087890625, "logits_per_token": -4.464371109008789, "logits_per_char": -0.8043911908123944, "num_chars": 111}, {"sum_logits": -65.03384399414062, "num_tokens": 16, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -91.91152954101562, "logits_per_token": -4.064615249633789, "logits_per_char": -0.8337672306941106, "num_chars": 78}, {"sum_logits": -31.11354637145996, "num_tokens": 14, "num_tokens_all": 434, "is_greedy": false, "sum_logits_uncond": -68.5418701171875, "logits_per_token": -2.2223961693899974, "logits_per_char": -0.5273482435840672, "num_chars": 59}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 720, "native_id": 6514, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 79.22784423828125, "incorrect_loss_raw": 68.03417714436848, "correct_loss_per_char": 0.5741148133208787, "incorrect_loss_per_char": 0.7949160665638321, "correct_loss_per_token": 2.5557369109122985, "incorrect_loss_per_token": 3.6339653079249277, "correct_loss_uncond": -34.19172668457031, "incorrect_loss_uncond": -20.647987365722656}, "model_output": [{"sum_logits": -70.94540405273438, "num_tokens": 18, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -101.28805541992188, "logits_per_token": -3.941411336263021, "logits_per_char": -0.8651878543016387, "num_chars": 82}, {"sum_logits": -55.305458068847656, "num_tokens": 17, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -77.16851806640625, "logits_per_token": -3.2532622393439796, "logits_per_char": -0.7000690894790843, "num_chars": 79}, {"sum_logits": -77.85166931152344, "num_tokens": 21, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -87.58992004394531, "logits_per_token": -3.707222348167783, "logits_per_char": -0.819491255910773, "num_chars": 95}, {"sum_logits": -79.22784423828125, "num_tokens": 31, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -113.41957092285156, "logits_per_token": -2.5557369109122985, "logits_per_char": -0.5741148133208787, "num_chars": 138}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 721, "native_id": 12408, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 75.96792602539062, "incorrect_loss_raw": 108.9386469523112, "correct_loss_per_char": 0.5064528401692708, "incorrect_loss_per_char": 0.5800553495858183, "correct_loss_per_token": 2.373997688293457, "incorrect_loss_per_token": 2.6680422263435433, "correct_loss_uncond": -20.994583129882812, "incorrect_loss_uncond": -20.76848602294922}, "model_output": [{"sum_logits": -91.25750732421875, "num_tokens": 34, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -116.88772583007812, "logits_per_token": -2.6840443330652572, "logits_per_char": -0.6207993695525085, "num_chars": 147}, {"sum_logits": -75.96792602539062, "num_tokens": 32, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -96.96250915527344, "logits_per_token": -2.373997688293457, "logits_per_char": -0.5064528401692708, "num_chars": 150}, {"sum_logits": -106.78972625732422, "num_tokens": 49, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -134.79620361328125, "logits_per_token": -2.179382168516821, "logits_per_char": -0.44869632881228666, "num_chars": 238}, {"sum_logits": -128.76870727539062, "num_tokens": 41, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -137.43746948242188, "logits_per_token": -3.1407001774485517, "logits_per_char": -0.6706703503926595, "num_chars": 192}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 722, "native_id": 43575, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 100.0400390625, "incorrect_loss_raw": 84.5897928873698, "correct_loss_per_char": 0.44660731724330355, "incorrect_loss_per_char": 0.5333741326504051, "correct_loss_per_token": 1.7248282596982758, "incorrect_loss_per_token": 2.3704942355449465, "correct_loss_uncond": -36.6741943359375, "incorrect_loss_uncond": -25.704727172851562}, "model_output": [{"sum_logits": -78.837158203125, "num_tokens": 41, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -104.6717529296875, "logits_per_token": -1.9228575171493902, "logits_per_char": -0.5086268271169355, "num_chars": 155}, {"sum_logits": -93.5137939453125, "num_tokens": 41, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -118.47615051269531, "logits_per_token": -2.2808242425685976, "logits_per_char": -0.47468930936706855, "num_chars": 197}, {"sum_logits": -81.41842651367188, "num_tokens": 28, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -107.73565673828125, "logits_per_token": -2.907800946916853, "logits_per_char": -0.6168062614672112, "num_chars": 132}, {"sum_logits": -100.0400390625, "num_tokens": 58, "num_tokens_all": 498, "is_greedy": false, "sum_logits_uncond": -136.7142333984375, "logits_per_token": -1.7248282596982758, "logits_per_char": -0.44660731724330355, "num_chars": 224}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 723, "native_id": 50427, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 32.576995849609375, "incorrect_loss_raw": 94.92268880208333, "correct_loss_per_char": 0.3016388504593461, "incorrect_loss_per_char": 0.6181614266459142, "correct_loss_per_token": 1.2529613788311298, "incorrect_loss_per_token": 2.844822901950138, "correct_loss_uncond": -29.434871673583984, "incorrect_loss_uncond": -22.4639409383138}, "model_output": [{"sum_logits": -109.65487670898438, "num_tokens": 38, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -127.58654022216797, "logits_per_token": -2.885654650236431, "logits_per_char": -0.6768819549937307, "num_chars": 162}, {"sum_logits": -80.69742584228516, "num_tokens": 25, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -109.12570190429688, "logits_per_token": -3.227897033691406, "logits_per_char": -0.7078721565112733, "num_chars": 114}, {"sum_logits": -32.576995849609375, "num_tokens": 26, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -62.01186752319336, "logits_per_token": -1.2529613788311298, "logits_per_char": -0.3016388504593461, "num_chars": 108}, {"sum_logits": -94.41576385498047, "num_tokens": 39, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -115.44764709472656, "logits_per_token": -2.420917021922576, "logits_per_char": -0.4697301684327386, "num_chars": 201}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 724, "native_id": 18007, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 86.13302612304688, "incorrect_loss_raw": 130.33777364095053, "correct_loss_per_char": 0.43945421491350445, "incorrect_loss_per_char": 0.6800491332770346, "correct_loss_per_token": 2.1008055151962655, "incorrect_loss_per_token": 3.0520248177903313, "correct_loss_uncond": -25.947784423828125, "incorrect_loss_uncond": -10.685829162597656}, "model_output": [{"sum_logits": -86.13302612304688, "num_tokens": 41, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -112.080810546875, "logits_per_token": -2.1008055151962655, "logits_per_char": -0.43945421491350445, "num_chars": 196}, {"sum_logits": -146.63949584960938, "num_tokens": 45, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -163.2576141357422, "logits_per_token": -3.258655463324653, "logits_per_char": -0.7758703484106316, "num_chars": 189}, {"sum_logits": -85.61460876464844, "num_tokens": 35, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -92.81969451904297, "logits_per_token": -2.446131678989955, "logits_per_char": -0.552352314610635, "num_chars": 155}, {"sum_logits": -158.75921630859375, "num_tokens": 46, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -166.99349975585938, "logits_per_token": -3.4512873110563858, "logits_per_char": -0.7119247368098375, "num_chars": 223}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 725, "native_id": 28372, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 130.2637939453125, "incorrect_loss_raw": 127.36747233072917, "correct_loss_per_char": 0.4736865234375, "incorrect_loss_per_char": 0.6226237896780344, "correct_loss_per_token": 2.135472031890369, "incorrect_loss_per_token": 2.735709597544925, "correct_loss_uncond": -20.771591186523438, "incorrect_loss_uncond": -13.192220052083334}, "model_output": [{"sum_logits": -85.70048522949219, "num_tokens": 33, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -97.41360473632812, "logits_per_token": -2.5969844008937026, "logits_per_char": -0.5564966573343648, "num_chars": 154}, {"sum_logits": -130.2637939453125, "num_tokens": 61, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -151.03538513183594, "logits_per_token": -2.135472031890369, "logits_per_char": -0.4736865234375, "num_chars": 275}, {"sum_logits": -148.35064697265625, "num_tokens": 56, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -167.50526428222656, "logits_per_token": -2.6491186959402904, "logits_per_char": -0.625952096931039, "num_chars": 237}, {"sum_logits": -148.05128479003906, "num_tokens": 50, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -156.7602081298828, "logits_per_token": -2.9610256958007812, "logits_per_char": -0.6854226147686994, "num_chars": 216}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 726, "native_id": 50240, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 91.02156829833984, "incorrect_loss_raw": 98.36971791585286, "correct_loss_per_char": 0.5450393311277835, "incorrect_loss_per_char": 0.6686140755371444, "correct_loss_per_token": 2.528376897176107, "incorrect_loss_per_token": 3.2192210417786185, "correct_loss_uncond": -26.395530700683594, "incorrect_loss_uncond": -32.10562388102213}, "model_output": [{"sum_logits": -91.02156829833984, "num_tokens": 36, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -117.41709899902344, "logits_per_token": -2.528376897176107, "logits_per_char": -0.5450393311277835, "num_chars": 167}, {"sum_logits": -109.45533752441406, "num_tokens": 30, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -140.80291748046875, "logits_per_token": -3.6485112508138022, "logits_per_char": -0.7297022501627605, "num_chars": 150}, {"sum_logits": -83.24943542480469, "num_tokens": 27, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -113.53590393066406, "logits_per_token": -3.083312423140914, "logits_per_char": -0.6555073655496432, "num_chars": 127}, {"sum_logits": -102.40438079833984, "num_tokens": 35, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -137.0872039794922, "logits_per_token": -2.9258394513811385, "logits_per_char": -0.6206326108990293, "num_chars": 165}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 727, "native_id": 23457, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 87.47529602050781, "incorrect_loss_raw": 61.63778813680013, "correct_loss_per_char": 0.520686285836356, "incorrect_loss_per_char": 0.5782552286469866, "correct_loss_per_token": 2.242956308218149, "incorrect_loss_per_token": 2.546954275060583, "correct_loss_uncond": -14.641624450683594, "incorrect_loss_uncond": -21.652838389078777}, "model_output": [{"sum_logits": -83.75689697265625, "num_tokens": 28, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -98.142822265625, "logits_per_token": -2.9913177490234375, "logits_per_char": -0.6492782711058624, "num_chars": 129}, {"sum_logits": -38.78269577026367, "num_tokens": 18, "num_tokens_all": 435, "is_greedy": false, "sum_logits_uncond": -60.349876403808594, "logits_per_token": -2.154594209459093, "logits_per_char": -0.4357606266321761, "num_chars": 89}, {"sum_logits": -62.37377166748047, "num_tokens": 25, "num_tokens_all": 442, "is_greedy": false, "sum_logits_uncond": -91.37918090820312, "logits_per_token": -2.494950866699219, "logits_per_char": -0.6497267882029215, "num_chars": 96}, {"sum_logits": -87.47529602050781, "num_tokens": 39, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -102.1169204711914, "logits_per_token": -2.242956308218149, "logits_per_char": -0.520686285836356, "num_chars": 168}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 728, "native_id": 31407, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 80.81767272949219, "incorrect_loss_raw": 68.26112238566081, "correct_loss_per_char": 0.45403186926681005, "incorrect_loss_per_char": 0.5227967844300796, "correct_loss_per_token": 1.9711627494998094, "incorrect_loss_per_token": 2.201861965275612, "correct_loss_uncond": -11.874725341796875, "incorrect_loss_uncond": -17.845757802327473}, "model_output": [{"sum_logits": -63.75163650512695, "num_tokens": 34, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -84.53726959228516, "logits_per_token": -1.8750481325037338, "logits_per_char": -0.4553688321794782, "num_chars": 140}, {"sum_logits": -79.68900299072266, "num_tokens": 35, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -98.23133850097656, "logits_per_token": -2.27682865687779, "logits_per_char": -0.517461058381316, "num_chars": 154}, {"sum_logits": -80.81767272949219, "num_tokens": 41, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -92.69239807128906, "logits_per_token": -1.9711627494998094, "logits_per_char": -0.45403186926681005, "num_chars": 178}, {"sum_logits": -61.34272766113281, "num_tokens": 25, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -75.55203247070312, "logits_per_token": -2.4537091064453125, "logits_per_char": -0.5955604627294447, "num_chars": 103}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 729, "native_id": 21240, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 38.93220138549805, "incorrect_loss_raw": 31.09392738342285, "correct_loss_per_char": 0.4424113793806596, "incorrect_loss_per_char": 0.5621285272421558, "correct_loss_per_token": 2.433262586593628, "incorrect_loss_per_token": 2.95194901261605, "correct_loss_uncond": -32.7844123840332, "incorrect_loss_uncond": -27.03887875874837}, "model_output": [{"sum_logits": -32.429813385009766, "num_tokens": 9, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -65.12493896484375, "logits_per_token": -3.6033125983344183, "logits_per_char": -0.6358786938237209, "num_chars": 51}, {"sum_logits": -19.981584548950195, "num_tokens": 13, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -54.849308013916016, "logits_per_token": -1.5370449653038611, "logits_per_char": -0.30740899306077224, "num_chars": 65}, {"sum_logits": -40.870384216308594, "num_tokens": 11, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -54.424171447753906, "logits_per_token": -3.715489474209872, "logits_per_char": -0.7430978948419744, "num_chars": 55}, {"sum_logits": -38.93220138549805, "num_tokens": 16, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -71.71661376953125, "logits_per_token": -2.433262586593628, "logits_per_char": -0.4424113793806596, "num_chars": 88}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 730, "native_id": 2583, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 54.09366226196289, "incorrect_loss_raw": 79.52640279134114, "correct_loss_per_char": 0.4507805188496908, "incorrect_loss_per_char": 0.6161483764434666, "correct_loss_per_token": 2.2539025942484536, "incorrect_loss_per_token": 2.7584380785419924, "correct_loss_uncond": -13.297420501708984, "incorrect_loss_uncond": -17.574717203776043}, "model_output": [{"sum_logits": -66.15736389160156, "num_tokens": 23, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -89.47750854492188, "logits_per_token": -2.876407125721807, "logits_per_char": -0.6423045038019569, "num_chars": 103}, {"sum_logits": -92.47897338867188, "num_tokens": 35, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -99.37431335449219, "logits_per_token": -2.642256382533482, "logits_per_char": -0.6005128142121551, "num_chars": 154}, {"sum_logits": -54.09366226196289, "num_tokens": 24, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -67.39108276367188, "logits_per_token": -2.2539025942484536, "logits_per_char": -0.4507805188496908, "num_chars": 120}, {"sum_logits": -79.94287109375, "num_tokens": 29, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -102.4515380859375, "logits_per_token": -2.7566507273706895, "logits_per_char": -0.6056278113162878, "num_chars": 132}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 731, "native_id": 18769, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 13.783796310424805, "incorrect_loss_raw": 22.951169967651367, "correct_loss_per_char": 0.34459490776062013, "incorrect_loss_per_char": 0.6384953649617238, "correct_loss_per_token": 1.2530723918568005, "incorrect_loss_per_token": 2.4968005427607785, "correct_loss_uncond": -32.02905464172363, "incorrect_loss_uncond": -16.471877415974934}, "model_output": [{"sum_logits": -28.80823516845703, "num_tokens": 9, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -46.24034118652344, "logits_per_token": -3.2009150187174478, "logits_per_char": -0.7026398821574885, "num_chars": 41}, {"sum_logits": -25.64632225036621, "num_tokens": 9, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -37.665435791015625, "logits_per_token": -2.849591361151801, "logits_per_char": -0.8014475703239441, "num_chars": 32}, {"sum_logits": -13.783796310424805, "num_tokens": 11, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -45.81285095214844, "logits_per_token": -1.2530723918568005, "logits_per_char": -0.34459490776062013, "num_chars": 40}, {"sum_logits": -14.39895248413086, "num_tokens": 10, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -34.363365173339844, "logits_per_token": -1.439895248413086, "logits_per_char": -0.41139864240373886, "num_chars": 35}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 732, "native_id": 18339, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 126.30789184570312, "incorrect_loss_raw": 93.79521814982097, "correct_loss_per_char": 0.517655294449603, "incorrect_loss_per_char": 0.516646470666421, "correct_loss_per_token": 2.1051315307617187, "incorrect_loss_per_token": 2.1379164312356984, "correct_loss_uncond": -37.47523498535156, "incorrect_loss_uncond": -29.007882436116535}, "model_output": [{"sum_logits": -133.56600952148438, "num_tokens": 62, "num_tokens_all": 496, "is_greedy": false, "sum_logits_uncond": -159.17379760742188, "logits_per_token": -2.154290476152974, "logits_per_char": -0.5279288913892664, "num_chars": 253}, {"sum_logits": -42.888065338134766, "num_tokens": 34, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -79.25949096679688, "logits_per_token": -1.2614136864157284, "logits_per_char": -0.322466656677705, "num_chars": 133}, {"sum_logits": -126.30789184570312, "num_tokens": 60, "num_tokens_all": 494, "is_greedy": false, "sum_logits_uncond": -163.7831268310547, "logits_per_token": -2.1051315307617187, "logits_per_char": -0.517655294449603, "num_chars": 244}, {"sum_logits": -104.93157958984375, "num_tokens": 35, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -129.97601318359375, "logits_per_token": -2.998045131138393, "logits_per_char": -0.6995438639322916, "num_chars": 150}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 733, "native_id": 16820, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 145.49501037597656, "incorrect_loss_raw": 172.98802693684897, "correct_loss_per_char": 0.5728150014802227, "incorrect_loss_per_char": 0.7059818346635818, "correct_loss_per_token": 2.852843340705423, "incorrect_loss_per_token": 3.557175527940521, "correct_loss_uncond": -42.67924499511719, "incorrect_loss_uncond": -15.761545817057291}, "model_output": [{"sum_logits": -145.49501037597656, "num_tokens": 51, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -188.17425537109375, "logits_per_token": -2.852843340705423, "logits_per_char": -0.5728150014802227, "num_chars": 254}, {"sum_logits": -168.76675415039062, "num_tokens": 48, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -180.4810791015625, "logits_per_token": -3.5159740447998047, "logits_per_char": -0.6368556760392099, "num_chars": 265}, {"sum_logits": -173.14627075195312, "num_tokens": 47, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -180.35055541992188, "logits_per_token": -3.683963207488364, "logits_per_char": -0.7244613838993854, "num_chars": 239}, {"sum_logits": -177.05105590820312, "num_tokens": 51, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -205.41708374023438, "logits_per_token": -3.4715893315333948, "logits_per_char": -0.7566284440521501, "num_chars": 234}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 734, "native_id": 18893, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 145.5867919921875, "incorrect_loss_raw": 105.79220581054688, "correct_loss_per_char": 0.6470524088541667, "incorrect_loss_per_char": 0.7053120317475369, "correct_loss_per_token": 2.6960517035590277, "incorrect_loss_per_token": 2.967018673466701, "correct_loss_uncond": -20.3519287109375, "incorrect_loss_uncond": -20.733139038085938}, "model_output": [{"sum_logits": -150.1288299560547, "num_tokens": 56, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -168.8148651123047, "logits_per_token": -2.6808719635009766, "logits_per_char": -0.6203670659341103, "num_chars": 242}, {"sum_logits": -145.5867919921875, "num_tokens": 54, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -165.938720703125, "logits_per_token": -2.6960517035590277, "logits_per_char": -0.6470524088541667, "num_chars": 225}, {"sum_logits": -63.19781494140625, "num_tokens": 20, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -83.57803344726562, "logits_per_token": -3.1598907470703126, "logits_per_char": -0.7523549397786459, "num_chars": 84}, {"sum_logits": -104.04997253417969, "num_tokens": 34, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -127.18313598632812, "logits_per_token": -3.0602933098288143, "logits_per_char": -0.7432140895298549, "num_chars": 140}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 735, "native_id": 23136, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 131.091552734375, "incorrect_loss_raw": 140.5927937825521, "correct_loss_per_char": 0.4615899744168134, "incorrect_loss_per_char": 0.7107439726133568, "correct_loss_per_token": 2.1143798828125, "incorrect_loss_per_token": 3.1961758832906875, "correct_loss_uncond": -22.344436645507812, "incorrect_loss_uncond": -18.662378946940105}, "model_output": [{"sum_logits": -131.091552734375, "num_tokens": 62, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -153.4359893798828, "logits_per_token": -2.1143798828125, "logits_per_char": -0.4615899744168134, "num_chars": 284}, {"sum_logits": -116.13174438476562, "num_tokens": 43, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -131.3892364501953, "logits_per_token": -2.700738241506177, "logits_per_char": -0.6017188828226199, "num_chars": 193}, {"sum_logits": -124.83627319335938, "num_tokens": 40, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -147.79977416992188, "logits_per_token": -3.1209068298339844, "logits_per_char": -0.6570330168071546, "num_chars": 190}, {"sum_logits": -180.81036376953125, "num_tokens": 48, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -198.57650756835938, "logits_per_token": -3.766882578531901, "logits_per_char": -0.8734800182102959, "num_chars": 207}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 736, "native_id": 19162, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 102.71502685546875, "incorrect_loss_raw": 111.93345387776692, "correct_loss_per_char": 0.6006726716694079, "incorrect_loss_per_char": 0.716718500839821, "correct_loss_per_token": 2.4455958775111606, "incorrect_loss_per_token": 2.769681030536141, "correct_loss_uncond": -25.28008270263672, "incorrect_loss_uncond": -28.374135335286457}, "model_output": [{"sum_logits": -123.38999938964844, "num_tokens": 39, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -136.48599243164062, "logits_per_token": -3.1638461381961136, "logits_per_char": -0.7050857107979911, "num_chars": 175}, {"sum_logits": -88.18862915039062, "num_tokens": 31, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -124.53348541259766, "logits_per_token": -2.844794488722278, "logits_per_char": -0.8645944034352022, "num_chars": 102}, {"sum_logits": -102.71502685546875, "num_tokens": 42, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -127.99510955810547, "logits_per_token": -2.4455958775111606, "logits_per_char": -0.6006726716694079, "num_chars": 171}, {"sum_logits": -124.22173309326172, "num_tokens": 54, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -159.90328979492188, "logits_per_token": -2.300402464690032, "logits_per_char": -0.5804753882862698, "num_chars": 214}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 737, "native_id": 23513, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 16.57793617248535, "incorrect_loss_raw": 27.13311195373535, "correct_loss_per_char": 0.33155872344970705, "incorrect_loss_per_char": 0.5227197722384804, "correct_loss_per_token": 1.841992908053928, "incorrect_loss_per_token": 2.1398918626700825, "correct_loss_uncond": -25.680299758911133, "incorrect_loss_uncond": -27.744789759318035}, "model_output": [{"sum_logits": -19.813316345214844, "num_tokens": 9, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -47.11585235595703, "logits_per_token": -2.2014795939127603, "logits_per_char": -0.5827445983886719, "num_chars": 34}, {"sum_logits": -10.826494216918945, "num_tokens": 7, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -35.90303039550781, "logits_per_token": -1.5466420309884208, "logits_per_char": -0.45110392570495605, "num_chars": 24}, {"sum_logits": -16.57793617248535, "num_tokens": 9, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -42.258235931396484, "logits_per_token": -1.841992908053928, "logits_per_char": -0.33155872344970705, "num_chars": 50}, {"sum_logits": -50.759525299072266, "num_tokens": 19, "num_tokens_all": 428, "is_greedy": false, "sum_logits_uncond": -81.61482238769531, "logits_per_token": -2.6715539631090666, "logits_per_char": -0.5343107926218134, "num_chars": 95}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 738, "native_id": 7188, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 71.98319244384766, "incorrect_loss_raw": 54.46414057413737, "correct_loss_per_char": 0.5667967908964382, "incorrect_loss_per_char": 0.6345594358467311, "correct_loss_per_token": 2.66604416458695, "incorrect_loss_per_token": 3.101099823391627, "correct_loss_uncond": -21.650840759277344, "incorrect_loss_uncond": -20.106482187906902}, "model_output": [{"sum_logits": -71.98319244384766, "num_tokens": 27, "num_tokens_all": 433, "is_greedy": false, "sum_logits_uncond": -93.634033203125, "logits_per_token": -2.66604416458695, "logits_per_char": -0.5667967908964382, "num_chars": 127}, {"sum_logits": -43.9902229309082, "num_tokens": 16, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -67.04389953613281, "logits_per_token": -2.7493889331817627, "logits_per_char": -0.5568382649482051, "num_chars": 79}, {"sum_logits": -45.574806213378906, "num_tokens": 15, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -68.167236328125, "logits_per_token": -3.0383204142252604, "logits_per_char": -0.6158757596402555, "num_chars": 74}, {"sum_logits": -73.827392578125, "num_tokens": 21, "num_tokens_all": 427, "is_greedy": false, "sum_logits_uncond": -88.500732421875, "logits_per_token": -3.515590122767857, "logits_per_char": -0.7309642829517327, "num_chars": 101}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 739, "native_id": 32528, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 44.343563079833984, "incorrect_loss_raw": 42.25502332051595, "correct_loss_per_char": 0.43052002990130084, "incorrect_loss_per_char": 0.6374148231178626, "correct_loss_per_token": 1.7055216569166918, "incorrect_loss_per_token": 2.8561993370279235, "correct_loss_uncond": -32.1288948059082, "incorrect_loss_uncond": -20.824963251749676}, "model_output": [{"sum_logits": -29.81483268737793, "num_tokens": 10, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -48.79295349121094, "logits_per_token": -2.981483268737793, "logits_per_char": -0.6625518374972873, "num_chars": 45}, {"sum_logits": -34.51853561401367, "num_tokens": 15, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -56.10717010498047, "logits_per_token": -2.3012357076009113, "logits_per_char": -0.5152020240897563, "num_chars": 67}, {"sum_logits": -62.43170166015625, "num_tokens": 19, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -84.33983612060547, "logits_per_token": -3.285879034745066, "logits_per_char": -0.7344906077665441, "num_chars": 85}, {"sum_logits": -44.343563079833984, "num_tokens": 26, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -76.47245788574219, "logits_per_token": -1.7055216569166918, "logits_per_char": -0.43052002990130084, "num_chars": 103}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 740, "native_id": 33340, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 26.622072219848633, "incorrect_loss_raw": 16.384379069010418, "correct_loss_per_char": 0.619117958601131, "incorrect_loss_per_char": 0.433396413591173, "correct_loss_per_token": 2.2185060183207193, "incorrect_loss_per_token": 1.7441805839538576, "correct_loss_uncond": -25.87089729309082, "incorrect_loss_uncond": -24.820601145426433}, "model_output": [{"sum_logits": -12.68912124633789, "num_tokens": 8, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -32.51617431640625, "logits_per_token": -1.5861401557922363, "logits_per_char": -0.42297070821126304, "num_chars": 30}, {"sum_logits": -12.377471923828125, "num_tokens": 10, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -40.71980285644531, "logits_per_token": -1.2377471923828125, "logits_per_char": -0.275054931640625, "num_chars": 45}, {"sum_logits": -24.086544036865234, "num_tokens": 10, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -50.378963470458984, "logits_per_token": -2.4086544036865236, "logits_per_char": -0.6021636009216309, "num_chars": 40}, {"sum_logits": -26.622072219848633, "num_tokens": 12, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -52.49296951293945, "logits_per_token": -2.2185060183207193, "logits_per_char": -0.619117958601131, "num_chars": 43}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 741, "native_id": 30821, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 85.81854248046875, "incorrect_loss_raw": 80.98448689778645, "correct_loss_per_char": 0.5959621005588107, "incorrect_loss_per_char": 0.710034595134989, "correct_loss_per_token": 2.768340080015121, "incorrect_loss_per_token": 3.1858333355535273, "correct_loss_uncond": -18.619659423828125, "incorrect_loss_uncond": -17.710980733235676}, "model_output": [{"sum_logits": -109.4403076171875, "num_tokens": 35, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -129.44232177734375, "logits_per_token": -3.126865931919643, "logits_per_char": -0.6714129301667945, "num_chars": 163}, {"sum_logits": -62.46619415283203, "num_tokens": 18, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -76.84032440185547, "logits_per_token": -3.4703441196017795, "logits_per_char": -0.800848642985026, "num_chars": 78}, {"sum_logits": -71.04695892333984, "num_tokens": 24, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -89.80375671386719, "logits_per_token": -2.96028995513916, "logits_per_char": -0.6578422122531467, "num_chars": 108}, {"sum_logits": -85.81854248046875, "num_tokens": 31, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -104.43820190429688, "logits_per_token": -2.768340080015121, "logits_per_char": -0.5959621005588107, "num_chars": 144}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 742, "native_id": 19555, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 64.94920349121094, "incorrect_loss_raw": 71.73086929321289, "correct_loss_per_char": 0.3400481858178583, "incorrect_loss_per_char": 0.5052789718128937, "correct_loss_per_token": 1.6653641920823317, "incorrect_loss_per_token": 2.1982032288213653, "correct_loss_uncond": -22.767616271972656, "incorrect_loss_uncond": -21.42106755574544}, "model_output": [{"sum_logits": -64.94920349121094, "num_tokens": 39, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -87.7168197631836, "logits_per_token": -1.6653641920823317, "logits_per_char": -0.3400481858178583, "num_chars": 191}, {"sum_logits": -81.82550048828125, "num_tokens": 41, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -118.79841613769531, "logits_per_token": -1.995743914348323, "logits_per_char": -0.5114093780517578, "num_chars": 160}, {"sum_logits": -73.05248260498047, "num_tokens": 29, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -80.85980224609375, "logits_per_token": -2.519051124309671, "logits_per_char": -0.5576525389693165, "num_chars": 131}, {"sum_logits": -60.31462478637695, "num_tokens": 29, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -79.79759216308594, "logits_per_token": -2.079814647806102, "logits_per_char": -0.44677499841760704, "num_chars": 135}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 743, "native_id": 31858, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 135.5319061279297, "incorrect_loss_raw": 100.69653574625652, "correct_loss_per_char": 0.6515957025381235, "incorrect_loss_per_char": 0.7192607187613934, "correct_loss_per_token": 3.0118201361762154, "incorrect_loss_per_token": 3.31137543981979, "correct_loss_uncond": -22.948501586914062, "incorrect_loss_uncond": -29.272244771321613}, "model_output": [{"sum_logits": -135.5319061279297, "num_tokens": 45, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -158.48040771484375, "logits_per_token": -3.0118201361762154, "logits_per_char": -0.6515957025381235, "num_chars": 208}, {"sum_logits": -56.253662109375, "num_tokens": 22, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -79.51676940917969, "logits_per_token": -2.556984641335227, "logits_per_char": -0.5515064912683824, "num_chars": 102}, {"sum_logits": -116.4780502319336, "num_tokens": 38, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -150.4111785888672, "logits_per_token": -3.065211848208779, "logits_per_char": -0.7016750013971903, "num_chars": 166}, {"sum_logits": -129.35789489746094, "num_tokens": 30, "num_tokens_all": 437, "is_greedy": false, "sum_logits_uncond": -159.9783935546875, "logits_per_token": -4.311929829915365, "logits_per_char": -0.9046006636186079, "num_chars": 143}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 744, "native_id": 23838, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 24.91474151611328, "incorrect_loss_raw": 51.82611083984375, "correct_loss_per_char": 0.7118497576032367, "incorrect_loss_per_char": 0.6051046516908155, "correct_loss_per_token": 2.768304612901476, "incorrect_loss_per_token": 2.7436712961348277, "correct_loss_uncond": -19.225963592529297, "incorrect_loss_uncond": -20.927963256835938}, "model_output": [{"sum_logits": -24.91474151611328, "num_tokens": 9, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -44.14070510864258, "logits_per_token": -2.768304612901476, "logits_per_char": -0.7118497576032367, "num_chars": 35}, {"sum_logits": -54.53779602050781, "num_tokens": 21, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -74.51545715332031, "logits_per_token": -2.597037905738467, "logits_per_char": -0.5928021306576936, "num_chars": 92}, {"sum_logits": -53.98460388183594, "num_tokens": 20, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -75.98735046386719, "logits_per_token": -2.699230194091797, "logits_per_char": -0.6205126882969648, "num_chars": 87}, {"sum_logits": -46.9559326171875, "num_tokens": 16, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -67.75941467285156, "logits_per_token": -2.9347457885742188, "logits_per_char": -0.6019991361177884, "num_chars": 78}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 745, "native_id": 14553, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 52.10539245605469, "incorrect_loss_raw": 64.39609336853027, "correct_loss_per_char": 0.6595619298234771, "incorrect_loss_per_char": 0.8277168652710452, "correct_loss_per_token": 2.6052696228027346, "incorrect_loss_per_token": 3.70250829310199, "correct_loss_uncond": -16.10028076171875, "incorrect_loss_uncond": -29.65702756245931}, "model_output": [{"sum_logits": -119.75332641601562, "num_tokens": 34, "num_tokens_all": 434, "is_greedy": false, "sum_logits_uncond": -156.81893920898438, "logits_per_token": -3.522156659294577, "logits_per_char": -0.8493143717447916, "num_chars": 141}, {"sum_logits": -21.768556594848633, "num_tokens": 9, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -50.40271759033203, "logits_per_token": -2.418728510538737, "logits_per_char": -0.6402516645543715, "num_chars": 34}, {"sum_logits": -51.66639709472656, "num_tokens": 10, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -74.93770599365234, "logits_per_token": -5.166639709472657, "logits_per_char": -0.9935845595139724, "num_chars": 52}, {"sum_logits": -52.10539245605469, "num_tokens": 20, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -68.20567321777344, "logits_per_token": -2.6052696228027346, "logits_per_char": -0.6595619298234771, "num_chars": 79}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 746, "native_id": 8497, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 61.11445236206055, "incorrect_loss_raw": 115.9188944498698, "correct_loss_per_char": 0.477456659078598, "incorrect_loss_per_char": 0.6941023292465472, "correct_loss_per_token": 2.0371484120686847, "incorrect_loss_per_token": 3.059559236608974, "correct_loss_uncond": -27.451190948486328, "incorrect_loss_uncond": -22.567911783854168}, "model_output": [{"sum_logits": -61.11445236206055, "num_tokens": 30, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -88.56564331054688, "logits_per_token": -2.0371484120686847, "logits_per_char": -0.477456659078598, "num_chars": 128}, {"sum_logits": -128.36965942382812, "num_tokens": 37, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -159.54461669921875, "logits_per_token": -3.4694502546980575, "logits_per_char": -0.7211778619316187, "num_chars": 178}, {"sum_logits": -124.36817932128906, "num_tokens": 38, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -138.18527221679688, "logits_per_token": -3.272846824244449, "logits_per_char": -0.7921540084158539, "num_chars": 157}, {"sum_logits": -95.01884460449219, "num_tokens": 39, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -117.73052978515625, "logits_per_token": -2.436380630884415, "logits_per_char": -0.5689751173921688, "num_chars": 167}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 747, "native_id": 44932, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 150.29649353027344, "incorrect_loss_raw": 131.91116333007812, "correct_loss_per_char": 0.5893980138442095, "incorrect_loss_per_char": 0.6379315783870508, "correct_loss_per_token": 2.7326635187322443, "incorrect_loss_per_token": 3.033009937296234, "correct_loss_uncond": -22.603836059570312, "incorrect_loss_uncond": -21.60961405436198}, "model_output": [{"sum_logits": -99.22196960449219, "num_tokens": 36, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -118.12971496582031, "logits_per_token": -2.7561658223470054, "logits_per_char": -0.5637611909346147, "num_chars": 176}, {"sum_logits": -169.77406311035156, "num_tokens": 50, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -193.9373779296875, "logits_per_token": -3.3954812622070314, "logits_per_char": -0.7163462578495846, "num_chars": 237}, {"sum_logits": -150.29649353027344, "num_tokens": 55, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -172.90032958984375, "logits_per_token": -2.7326635187322443, "logits_per_char": -0.5893980138442095, "num_chars": 255}, {"sum_logits": -126.73745727539062, "num_tokens": 43, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -148.4952392578125, "logits_per_token": -2.9473827273346656, "logits_per_char": -0.6336872863769532, "num_chars": 200}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 748, "native_id": 39715, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 62.27141189575195, "incorrect_loss_raw": 72.39083735148112, "correct_loss_per_char": 0.5368225163426893, "incorrect_loss_per_char": 0.7778523080054321, "correct_loss_per_token": 2.8305187225341797, "incorrect_loss_per_token": 3.3350524198091946, "correct_loss_uncond": -38.64858627319336, "incorrect_loss_uncond": -18.08448537190755}, "model_output": [{"sum_logits": -105.31716918945312, "num_tokens": 25, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -125.66328430175781, "logits_per_token": -4.212686767578125, "logits_per_char": -0.9935581999005012, "num_chars": 106}, {"sum_logits": -62.27141189575195, "num_tokens": 22, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -100.91999816894531, "logits_per_token": -2.8305187225341797, "logits_per_char": -0.5368225163426893, "num_chars": 116}, {"sum_logits": -73.10645294189453, "num_tokens": 26, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -96.30335998535156, "logits_per_token": -2.811786651611328, "logits_per_char": -0.6832378779616312, "num_chars": 107}, {"sum_logits": -38.7488899230957, "num_tokens": 13, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -49.45932388305664, "logits_per_token": -2.980683840238131, "logits_per_char": -0.6567608461541644, "num_chars": 59}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 749, "native_id": 7335, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 65.53392028808594, "incorrect_loss_raw": 73.48235575358073, "correct_loss_per_char": 0.7201529701987466, "incorrect_loss_per_char": 0.7389301648165622, "correct_loss_per_token": 3.449153699372944, "incorrect_loss_per_token": 3.208377047321875, "correct_loss_uncond": -12.3270263671875, "incorrect_loss_uncond": -18.399612426757812}, "model_output": [{"sum_logits": -51.04240417480469, "num_tokens": 17, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -73.78617858886719, "logits_per_token": -3.0024943632238053, "logits_per_char": -0.7089222802056206, "num_chars": 72}, {"sum_logits": -76.18128967285156, "num_tokens": 18, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -85.59434509277344, "logits_per_token": -4.232293870713976, "logits_per_char": -0.9178468635283321, "num_chars": 83}, {"sum_logits": -93.22337341308594, "num_tokens": 39, "num_tokens_all": 433, "is_greedy": false, "sum_logits_uncond": -116.265380859375, "logits_per_token": -2.3903429080278444, "logits_per_char": -0.5900213507157338, "num_chars": 158}, {"sum_logits": -65.53392028808594, "num_tokens": 19, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -77.86094665527344, "logits_per_token": -3.449153699372944, "logits_per_char": -0.7201529701987466, "num_chars": 91}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 750, "native_id": 17303, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 55.81402587890625, "incorrect_loss_raw": 96.29305013020833, "correct_loss_per_char": 0.4574920154008709, "incorrect_loss_per_char": 0.7055175936894593, "correct_loss_per_token": 2.0671861436631946, "incorrect_loss_per_token": 2.914539080639029, "correct_loss_uncond": -31.848373413085938, "incorrect_loss_uncond": -21.50220235188802}, "model_output": [{"sum_logits": -113.5025863647461, "num_tokens": 31, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -120.52130889892578, "logits_per_token": -3.6613737537014868, "logits_per_char": -0.9153434384253717, "num_chars": 124}, {"sum_logits": -85.06657409667969, "num_tokens": 34, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -106.84537506103516, "logits_per_token": -2.50195806166705, "logits_per_char": -0.5826477677854773, "num_chars": 146}, {"sum_logits": -55.81402587890625, "num_tokens": 27, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -87.66239929199219, "logits_per_token": -2.0671861436631946, "logits_per_char": -0.4574920154008709, "num_chars": 122}, {"sum_logits": -90.30998992919922, "num_tokens": 35, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -126.01907348632812, "logits_per_token": -2.5802854265485493, "logits_per_char": -0.6185615748575289, "num_chars": 146}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 751, "native_id": 43842, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 132.26962280273438, "incorrect_loss_raw": 102.47008005777995, "correct_loss_per_char": 0.4991306520857901, "incorrect_loss_per_char": 0.5168184432770955, "correct_loss_per_token": 2.4956532604289503, "incorrect_loss_per_token": 2.4186397955070897, "correct_loss_uncond": -25.152633666992188, "incorrect_loss_uncond": -17.703023274739582}, "model_output": [{"sum_logits": -132.26962280273438, "num_tokens": 53, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -157.42225646972656, "logits_per_token": -2.4956532604289503, "logits_per_char": -0.4991306520857901, "num_chars": 265}, {"sum_logits": -80.65776824951172, "num_tokens": 35, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -91.01607513427734, "logits_per_token": -2.3045076642717635, "logits_per_char": -0.4801057633899507, "num_chars": 168}, {"sum_logits": -98.39387512207031, "num_tokens": 37, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -126.70611572265625, "logits_per_token": -2.6592939222181164, "logits_per_char": -0.5527745793374736, "num_chars": 178}, {"sum_logits": -128.3585968017578, "num_tokens": 56, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -142.797119140625, "logits_per_token": -2.2921178000313893, "logits_per_char": -0.5175749871038622, "num_chars": 248}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 752, "native_id": 35833, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 51.316532135009766, "incorrect_loss_raw": 88.22416432698567, "correct_loss_per_char": 0.44238389771560144, "incorrect_loss_per_char": 0.7117593163083855, "correct_loss_per_token": 1.8327332905360632, "incorrect_loss_per_token": 3.0178676464648166, "correct_loss_uncond": -39.01398849487305, "incorrect_loss_uncond": -17.96740214029948}, "model_output": [{"sum_logits": -72.73004150390625, "num_tokens": 25, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -89.94613647460938, "logits_per_token": -2.90920166015625, "logits_per_char": -0.7992312253176511, "num_chars": 91}, {"sum_logits": -113.47721862792969, "num_tokens": 33, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -122.6115493774414, "logits_per_token": -3.438703594785748, "logits_per_char": -0.7182102444805677, "num_chars": 158}, {"sum_logits": -51.316532135009766, "num_tokens": 28, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -90.33052062988281, "logits_per_token": -1.8327332905360632, "logits_per_char": -0.44238389771560144, "num_chars": 116}, {"sum_logits": -78.4652328491211, "num_tokens": 29, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -106.01701354980469, "logits_per_token": -2.7056976844524514, "logits_per_char": -0.6178364791269377, "num_chars": 127}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 753, "native_id": 48697, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 114.80564880371094, "incorrect_loss_raw": 137.56302897135416, "correct_loss_per_char": 0.5194825737724477, "incorrect_loss_per_char": 0.8566438173096036, "correct_loss_per_token": 2.391784350077311, "incorrect_loss_per_token": 3.8080654607610662, "correct_loss_uncond": -22.385238647460938, "incorrect_loss_uncond": -11.300094604492188}, "model_output": [{"sum_logits": -135.54095458984375, "num_tokens": 32, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -139.6838836669922, "logits_per_token": -4.235654830932617, "logits_per_char": -0.9283627026701626, "num_chars": 146}, {"sum_logits": -114.80564880371094, "num_tokens": 48, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -137.19088745117188, "logits_per_token": -2.391784350077311, "logits_per_char": -0.5194825737724477, "num_chars": 221}, {"sum_logits": -121.78955078125, "num_tokens": 38, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -139.71002197265625, "logits_per_token": -3.2049881784539473, "logits_per_char": -0.6766086154513888, "num_chars": 180}, {"sum_logits": -155.35858154296875, "num_tokens": 39, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -167.19546508789062, "logits_per_token": -3.9835533728966346, "logits_per_char": -0.9649601338072593, "num_chars": 161}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 754, "native_id": 19545, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 37.91959762573242, "incorrect_loss_raw": 89.18044153849284, "correct_loss_per_char": 0.8618090369484641, "incorrect_loss_per_char": 0.8588355794133463, "correct_loss_per_token": 4.739949703216553, "incorrect_loss_per_token": 4.101961980942121, "correct_loss_uncond": -11.922046661376953, "incorrect_loss_uncond": -11.834672292073568}, "model_output": [{"sum_logits": -62.18564987182617, "num_tokens": 14, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -76.43604278564453, "logits_per_token": -4.44183213370187, "logits_per_char": -0.8518582174222763, "num_chars": 73}, {"sum_logits": -37.91959762573242, "num_tokens": 8, "num_tokens_all": 394, "is_greedy": false, "sum_logits_uncond": -49.841644287109375, "logits_per_token": -4.739949703216553, "logits_per_char": -0.8618090369484641, "num_chars": 44}, {"sum_logits": -87.02389526367188, "num_tokens": 23, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -96.81546020507812, "logits_per_token": -3.783647620159647, "logits_per_char": -0.8971535594192976, "num_chars": 97}, {"sum_logits": -118.33177947998047, "num_tokens": 29, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -129.79383850097656, "logits_per_token": -4.080406188964844, "logits_per_char": -0.8274949613984648, "num_chars": 143}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 755, "native_id": 36822, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 44.581298828125, "incorrect_loss_raw": 106.3263651529948, "correct_loss_per_char": 0.3326962599113806, "incorrect_loss_per_char": 0.5024166368817706, "correct_loss_per_token": 1.3931655883789062, "incorrect_loss_per_token": 2.424424984531262, "correct_loss_uncond": -39.19464874267578, "incorrect_loss_uncond": -19.532376607259113}, "model_output": [{"sum_logits": -86.99606323242188, "num_tokens": 44, "num_tokens_all": 496, "is_greedy": false, "sum_logits_uncond": -104.27235412597656, "logits_per_token": -1.9771832552823154, "logits_per_char": -0.46521958947819186, "num_chars": 187}, {"sum_logits": -157.21151733398438, "num_tokens": 48, "num_tokens_all": 500, "is_greedy": false, "sum_logits_uncond": -172.33995056152344, "logits_per_token": -3.275239944458008, "logits_per_char": -0.6443094972704277, "num_chars": 244}, {"sum_logits": -74.77151489257812, "num_tokens": 37, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -100.96392059326172, "logits_per_token": -2.0208517538534627, "logits_per_char": -0.39772082389669217, "num_chars": 188}, {"sum_logits": -44.581298828125, "num_tokens": 32, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -83.77594757080078, "logits_per_token": -1.3931655883789062, "logits_per_char": -0.3326962599113806, "num_chars": 134}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 756, "native_id": 28877, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 110.30149841308594, "incorrect_loss_raw": 125.02432505289714, "correct_loss_per_char": 0.6302942766462054, "incorrect_loss_per_char": 0.7187199080659895, "correct_loss_per_token": 2.690280449099657, "incorrect_loss_per_token": 3.4580560351174974, "correct_loss_uncond": -20.00860595703125, "incorrect_loss_uncond": -18.81761423746745}, "model_output": [{"sum_logits": -110.30149841308594, "num_tokens": 41, "num_tokens_all": 496, "is_greedy": false, "sum_logits_uncond": -130.3101043701172, "logits_per_token": -2.690280449099657, "logits_per_char": -0.6302942766462054, "num_chars": 175}, {"sum_logits": -131.46424865722656, "num_tokens": 35, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -144.0288543701172, "logits_per_token": -3.756121390206473, "logits_per_char": -0.8268191739448212, "num_chars": 159}, {"sum_logits": -96.8023910522461, "num_tokens": 31, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -118.18797302246094, "logits_per_token": -3.1226577758789062, "logits_per_char": -0.6368578358700401, "num_chars": 152}, {"sum_logits": -146.80633544921875, "num_tokens": 42, "num_tokens_all": 497, "is_greedy": false, "sum_logits_uncond": -169.30899047851562, "logits_per_token": -3.495388939267113, "logits_per_char": -0.6924827143831073, "num_chars": 212}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 757, "native_id": 19794, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 69.59394836425781, "incorrect_loss_raw": 64.93437321980794, "correct_loss_per_char": 0.46707347895475043, "incorrect_loss_per_char": 0.46136869568613187, "correct_loss_per_token": 1.8809175233583193, "incorrect_loss_per_token": 2.259812876390107, "correct_loss_uncond": -35.14191436767578, "incorrect_loss_uncond": -23.904468536376953}, "model_output": [{"sum_logits": -72.56189727783203, "num_tokens": 33, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -99.72663116455078, "logits_per_token": -2.198845372055516, "logits_per_char": -0.42433858057211715, "num_chars": 171}, {"sum_logits": -71.38050842285156, "num_tokens": 29, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -96.53025817871094, "logits_per_token": -2.461396842167295, "logits_per_char": -0.5287445068359375, "num_chars": 135}, {"sum_logits": -69.59394836425781, "num_tokens": 37, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -104.7358627319336, "logits_per_token": -1.8809175233583193, "logits_per_char": -0.46707347895475043, "num_chars": 149}, {"sum_logits": -50.860713958740234, "num_tokens": 24, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -70.25963592529297, "logits_per_token": -2.1191964149475098, "logits_per_char": -0.43102299965034097, "num_chars": 118}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 758, "native_id": 4091, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 29.266830444335938, "incorrect_loss_raw": 33.82320912679037, "correct_loss_per_char": 0.532124189897017, "incorrect_loss_per_char": 0.6852438088619349, "correct_loss_per_token": 2.926683044433594, "incorrect_loss_per_token": 3.238816846318605, "correct_loss_uncond": -18.264545440673828, "incorrect_loss_uncond": -25.022467295328777}, "model_output": [{"sum_logits": -39.975547790527344, "num_tokens": 17, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -71.59307861328125, "logits_per_token": -2.351502811207491, "logits_per_char": -0.5060195922851562, "num_chars": 79}, {"sum_logits": -33.13148498535156, "num_tokens": 10, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -57.560569763183594, "logits_per_token": -3.3131484985351562, "logits_per_char": -0.6902392705281576, "num_chars": 48}, {"sum_logits": -29.266830444335938, "num_tokens": 10, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -47.531375885009766, "logits_per_token": -2.926683044433594, "logits_per_char": -0.532124189897017, "num_chars": 55}, {"sum_logits": -28.362594604492188, "num_tokens": 7, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -47.38338088989258, "logits_per_token": -4.051799229213169, "logits_per_char": -0.8594725637724905, "num_chars": 33}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 759, "native_id": 27013, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 75.51058959960938, "incorrect_loss_raw": 110.44869740804036, "correct_loss_per_char": 0.5067824805342911, "incorrect_loss_per_char": 0.6738800843703823, "correct_loss_per_token": 2.288199684836648, "incorrect_loss_per_token": 3.074987964437465, "correct_loss_uncond": -13.376396179199219, "incorrect_loss_uncond": -13.338302612304688}, "model_output": [{"sum_logits": -119.83100128173828, "num_tokens": 40, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -136.51943969726562, "logits_per_token": -2.995775032043457, "logits_per_char": -0.6145179552909655, "num_chars": 195}, {"sum_logits": -104.15752410888672, "num_tokens": 35, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -124.13015747070312, "logits_per_token": -2.9759292602539062, "logits_per_char": -0.7233161396450467, "num_chars": 144}, {"sum_logits": -107.3575668334961, "num_tokens": 33, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -110.7114028930664, "logits_per_token": -3.253259601015033, "logits_per_char": -0.6838061581751343, "num_chars": 157}, {"sum_logits": -75.51058959960938, "num_tokens": 33, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -88.8869857788086, "logits_per_token": -2.288199684836648, "logits_per_char": -0.5067824805342911, "num_chars": 149}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 760, "native_id": 14291, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 85.4683837890625, "incorrect_loss_raw": 130.90350087483725, "correct_loss_per_char": 0.5117867292758234, "incorrect_loss_per_char": 0.6430551780237711, "correct_loss_per_token": 2.589951023910985, "incorrect_loss_per_token": 2.685139892913483, "correct_loss_uncond": -23.285858154296875, "incorrect_loss_uncond": -16.356163024902344}, "model_output": [{"sum_logits": -121.00942993164062, "num_tokens": 56, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -142.07830810546875, "logits_per_token": -2.1608826773507253, "logits_per_char": -0.4764150784710261, "num_chars": 254}, {"sum_logits": -71.6776351928711, "num_tokens": 35, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -90.1357421875, "logits_per_token": -2.047932434082031, "logits_per_char": -0.5047720788230359, "num_chars": 142}, {"sum_logits": -85.4683837890625, "num_tokens": 33, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -108.75424194335938, "logits_per_token": -2.589951023910985, "logits_per_char": -0.5117867292758234, "num_chars": 167}, {"sum_logits": -200.0234375, "num_tokens": 52, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -209.56494140625, "logits_per_token": -3.8466045673076925, "logits_per_char": -0.9479783767772512, "num_chars": 211}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 761, "native_id": 48360, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 66.39469909667969, "incorrect_loss_raw": 65.87605412801106, "correct_loss_per_char": 0.677496929557956, "incorrect_loss_per_char": 0.6920525389876254, "correct_loss_per_token": 3.6885943942599826, "incorrect_loss_per_token": 3.7945623730856277, "correct_loss_uncond": -21.056259155273438, "incorrect_loss_uncond": -19.02865982055664}, "model_output": [{"sum_logits": -78.99642944335938, "num_tokens": 25, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -94.46119689941406, "logits_per_token": -3.159857177734375, "logits_per_char": -0.5602583648465204, "num_chars": 141}, {"sum_logits": -52.47170639038086, "num_tokens": 15, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -69.96367645263672, "logits_per_token": -3.498113759358724, "logits_per_char": -0.718790498498368, "num_chars": 73}, {"sum_logits": -66.16002655029297, "num_tokens": 14, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -90.28926849365234, "logits_per_token": -4.725716182163784, "logits_per_char": -0.7971087536179876, "num_chars": 83}, {"sum_logits": -66.39469909667969, "num_tokens": 18, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -87.45095825195312, "logits_per_token": -3.6885943942599826, "logits_per_char": -0.677496929557956, "num_chars": 98}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 762, "native_id": 41692, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 87.45802307128906, "incorrect_loss_raw": 120.08141072591145, "correct_loss_per_char": 0.5205834706624349, "incorrect_loss_per_char": 0.6805401277190986, "correct_loss_per_token": 2.242513412084335, "incorrect_loss_per_token": 2.9249073717981546, "correct_loss_uncond": -32.711585998535156, "incorrect_loss_uncond": -18.984090169270832}, "model_output": [{"sum_logits": -128.7239990234375, "num_tokens": 48, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -150.90194702148438, "logits_per_token": -2.6817499796549478, "logits_per_char": -0.6218550677460749, "num_chars": 207}, {"sum_logits": -115.13748168945312, "num_tokens": 34, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -131.10800170898438, "logits_per_token": -3.386396520278033, "logits_per_char": -0.7940515978582974, "num_chars": 145}, {"sum_logits": -116.38275146484375, "num_tokens": 43, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -135.18655395507812, "logits_per_token": -2.7065756154614826, "logits_per_char": -0.6257137175529234, "num_chars": 186}, {"sum_logits": -87.45802307128906, "num_tokens": 39, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -120.16960906982422, "logits_per_token": -2.242513412084335, "logits_per_char": -0.5205834706624349, "num_chars": 168}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 763, "native_id": 41507, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 74.9820556640625, "incorrect_loss_raw": 43.42649586995443, "correct_loss_per_char": 0.5680458762428977, "incorrect_loss_per_char": 0.8099469985002651, "correct_loss_per_token": 2.49940185546875, "incorrect_loss_per_token": 3.42519372056573, "correct_loss_uncond": -46.201576232910156, "incorrect_loss_uncond": -13.90710194905599}, "model_output": [{"sum_logits": -23.95867156982422, "num_tokens": 7, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -31.728370666503906, "logits_per_token": -3.4226673671177457, "logits_per_char": -0.8873582062897859, "num_chars": 27}, {"sum_logits": -41.34941101074219, "num_tokens": 11, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -56.3040771484375, "logits_per_token": -3.7590373646129263, "logits_per_char": -0.8438655308314732, "num_chars": 49}, {"sum_logits": -64.97140502929688, "num_tokens": 21, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -83.96834564208984, "logits_per_token": -3.093876429966518, "logits_per_char": -0.6986172583795363, "num_chars": 93}, {"sum_logits": -74.9820556640625, "num_tokens": 30, "num_tokens_all": 439, "is_greedy": false, "sum_logits_uncond": -121.18363189697266, "logits_per_token": -2.49940185546875, "logits_per_char": -0.5680458762428977, "num_chars": 132}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 764, "native_id": 45622, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 113.67814636230469, "incorrect_loss_raw": 92.86233647664388, "correct_loss_per_char": 0.7240646265114948, "incorrect_loss_per_char": 0.609578447970902, "correct_loss_per_token": 3.247947038922991, "incorrect_loss_per_token": 2.498902327834594, "correct_loss_uncond": -35.955780029296875, "incorrect_loss_uncond": -17.012688954671223}, "model_output": [{"sum_logits": -78.5749740600586, "num_tokens": 33, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -100.14881134033203, "logits_per_token": -2.3810598200017754, "logits_per_char": -0.5952649550004439, "num_chars": 132}, {"sum_logits": -141.94427490234375, "num_tokens": 43, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -151.7159423828125, "logits_per_token": -3.3010296488917152, "logits_per_char": -0.8065015619451349, "num_chars": 176}, {"sum_logits": -58.0677604675293, "num_tokens": 32, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -77.76032257080078, "logits_per_token": -1.8146175146102905, "logits_per_char": -0.4269688269671272, "num_chars": 136}, {"sum_logits": -113.67814636230469, "num_tokens": 35, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -149.63392639160156, "logits_per_token": -3.247947038922991, "logits_per_char": -0.7240646265114948, "num_chars": 157}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 765, "native_id": 3899, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 78.11946105957031, "incorrect_loss_raw": 96.62759908040364, "correct_loss_per_char": 0.531424905167145, "incorrect_loss_per_char": 0.4966883327063427, "correct_loss_per_token": 2.4412331581115723, "incorrect_loss_per_token": 2.564923931197397, "correct_loss_uncond": -31.47583770751953, "incorrect_loss_uncond": -19.13104756673177}, "model_output": [{"sum_logits": -92.5152359008789, "num_tokens": 33, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -103.07846069335938, "logits_per_token": -2.8034919969963306, "logits_per_char": -0.5197485162970725, "num_chars": 178}, {"sum_logits": -83.77217864990234, "num_tokens": 46, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -113.98959350585938, "logits_per_token": -1.821134318476138, "logits_per_char": -0.3461660274789353, "num_chars": 242}, {"sum_logits": -113.59538269042969, "num_tokens": 37, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -130.2078857421875, "logits_per_token": -3.0701454781197213, "logits_per_char": -0.6241504543430203, "num_chars": 182}, {"sum_logits": -78.11946105957031, "num_tokens": 32, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -109.59529876708984, "logits_per_token": -2.4412331581115723, "logits_per_char": -0.531424905167145, "num_chars": 147}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 766, "native_id": 27376, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 110.58830261230469, "incorrect_loss_raw": 86.53974151611328, "correct_loss_per_char": 0.604307664548113, "incorrect_loss_per_char": 0.6364442985887568, "correct_loss_per_token": 2.6330548241024925, "incorrect_loss_per_token": 2.7429559661477385, "correct_loss_uncond": -20.732925415039062, "incorrect_loss_uncond": -18.704015096028645}, "model_output": [{"sum_logits": -65.15636444091797, "num_tokens": 29, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -81.14945983886719, "logits_per_token": -2.246771187617861, "logits_per_char": -0.5212509155273437, "num_chars": 125}, {"sum_logits": -71.72203063964844, "num_tokens": 24, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -89.97893524169922, "logits_per_token": -2.988417943318685, "logits_per_char": -0.6702993517724153, "num_chars": 107}, {"sum_logits": -110.58830261230469, "num_tokens": 42, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -131.32122802734375, "logits_per_token": -2.6330548241024925, "logits_per_char": -0.604307664548113, "num_chars": 183}, {"sum_logits": -122.74082946777344, "num_tokens": 41, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -144.60287475585938, "logits_per_token": -2.9936787675066694, "logits_per_char": -0.7177826284665113, "num_chars": 171}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 767, "native_id": 42875, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 16.86568260192871, "incorrect_loss_raw": 24.218550364176433, "correct_loss_per_char": 0.40156387147449313, "incorrect_loss_per_char": 0.45062258476708017, "correct_loss_per_token": 1.686568260192871, "incorrect_loss_per_token": 1.890004046816238, "correct_loss_uncond": -23.333646774291992, "incorrect_loss_uncond": -23.69855244954427}, "model_output": [{"sum_logits": -19.286767959594727, "num_tokens": 13, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -45.505035400390625, "logits_per_token": -1.4835975353534405, "logits_per_char": -0.3506685083562678, "num_chars": 55}, {"sum_logits": -31.445528030395508, "num_tokens": 12, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -54.44910430908203, "logits_per_token": -2.6204606691996255, "logits_per_char": -0.6165789809881472, "num_chars": 51}, {"sum_logits": -16.86568260192871, "num_tokens": 10, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -40.1993293762207, "logits_per_token": -1.686568260192871, "logits_per_char": -0.40156387147449313, "num_chars": 42}, {"sum_logits": -21.923355102539062, "num_tokens": 14, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -43.79716873168945, "logits_per_token": -1.5659539358956474, "logits_per_char": -0.38462026495682566, "num_chars": 57}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 768, "native_id": 28521, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 98.47064971923828, "incorrect_loss_raw": 117.67555236816406, "correct_loss_per_char": 0.5049776908678886, "incorrect_loss_per_char": 0.7525148208709084, "correct_loss_per_token": 2.344539279029483, "incorrect_loss_per_token": 3.5427774880349694, "correct_loss_uncond": -34.34809112548828, "incorrect_loss_uncond": -16.017679850260418}, "model_output": [{"sum_logits": -121.81156921386719, "num_tokens": 35, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -132.87832641601562, "logits_per_token": -3.480330548967634, "logits_per_char": -0.7758698676042496, "num_chars": 157}, {"sum_logits": -109.1063232421875, "num_tokens": 34, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -121.15800476074219, "logits_per_token": -3.2090095071231617, "logits_per_char": -0.7039117628528225, "num_chars": 155}, {"sum_logits": -122.1087646484375, "num_tokens": 31, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -147.04336547851562, "logits_per_token": -3.938992408014113, "logits_per_char": -0.7777628321556529, "num_chars": 157}, {"sum_logits": -98.47064971923828, "num_tokens": 42, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -132.81874084472656, "logits_per_token": -2.344539279029483, "logits_per_char": -0.5049776908678886, "num_chars": 195}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 769, "native_id": 36387, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 133.77157592773438, "incorrect_loss_raw": 153.5840276082357, "correct_loss_per_char": 0.3654961090921704, "incorrect_loss_per_char": 0.6497562298310501, "correct_loss_per_token": 1.8841067032075265, "incorrect_loss_per_token": 3.199692809493454, "correct_loss_uncond": -28.314620971679688, "incorrect_loss_uncond": -14.248196919759115}, "model_output": [{"sum_logits": -172.82278442382812, "num_tokens": 48, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -182.19886779785156, "logits_per_token": -3.6004746754964194, "logits_per_char": -0.7820035494290866, "num_chars": 221}, {"sum_logits": -179.92129516601562, "num_tokens": 50, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -195.38665771484375, "logits_per_token": -3.5984259033203125, "logits_per_char": -0.6893536213257304, "num_chars": 261}, {"sum_logits": -108.00800323486328, "num_tokens": 45, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -125.91114807128906, "logits_per_token": -2.4001778496636286, "logits_per_char": -0.4779115187383331, "num_chars": 226}, {"sum_logits": -133.77157592773438, "num_tokens": 71, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -162.08619689941406, "logits_per_token": -1.8841067032075265, "logits_per_char": -0.3654961090921704, "num_chars": 366}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 770, "native_id": 35338, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 42.21339416503906, "incorrect_loss_raw": 54.317952473958336, "correct_loss_per_char": 0.48521142718435706, "incorrect_loss_per_char": 0.8386769954400473, "correct_loss_per_token": 2.3451885647243924, "incorrect_loss_per_token": 3.9240823157747884, "correct_loss_uncond": -32.70647430419922, "incorrect_loss_uncond": -12.63992436726888}, "model_output": [{"sum_logits": -23.860538482666016, "num_tokens": 5, "num_tokens_all": 391, "is_greedy": false, "sum_logits_uncond": -33.78386306762695, "logits_per_token": -4.772107696533203, "logits_per_char": -1.0374147166376528, "num_chars": 23}, {"sum_logits": -63.95206832885742, "num_tokens": 21, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -83.49348449707031, "logits_per_token": -3.045336587088449, "logits_per_char": -0.6149237339313214, "num_chars": 104}, {"sum_logits": -42.21339416503906, "num_tokens": 18, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -74.91986846923828, "logits_per_token": -2.3451885647243924, "logits_per_char": -0.48521142718435706, "num_chars": 87}, {"sum_logits": -75.14125061035156, "num_tokens": 19, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -83.59628295898438, "logits_per_token": -3.9548026637027136, "logits_per_char": -0.8636925357511673, "num_chars": 87}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 771, "native_id": 22032, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 89.41889953613281, "incorrect_loss_raw": 89.69015248616536, "correct_loss_per_char": 0.4940270692604023, "incorrect_loss_per_char": 0.5017639112008238, "correct_loss_per_token": 2.416727014490076, "incorrect_loss_per_token": 2.470753978672133, "correct_loss_uncond": -32.41291046142578, "incorrect_loss_uncond": -26.7247797648112}, "model_output": [{"sum_logits": -126.03115844726562, "num_tokens": 47, "num_tokens_all": 496, "is_greedy": false, "sum_logits_uncond": -159.01467895507812, "logits_per_token": -2.68151400951629, "logits_per_char": -0.5409062594303246, "num_chars": 233}, {"sum_logits": -89.41889953613281, "num_tokens": 37, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -121.8318099975586, "logits_per_token": -2.416727014490076, "logits_per_char": -0.4940270692604023, "num_chars": 181}, {"sum_logits": -61.97187805175781, "num_tokens": 25, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -87.36289978027344, "logits_per_token": -2.4788751220703125, "logits_per_char": -0.4730677713874642, "num_chars": 131}, {"sum_logits": -81.06742095947266, "num_tokens": 36, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -102.86721801757812, "logits_per_token": -2.251872804429796, "logits_per_char": -0.4913177027846828, "num_chars": 165}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 772, "native_id": 36554, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 76.18590545654297, "incorrect_loss_raw": 84.32520039876302, "correct_loss_per_char": 0.4589512376900179, "incorrect_loss_per_char": 0.5503900706648484, "correct_loss_per_token": 2.6271001881566542, "incorrect_loss_per_token": 2.7361765389914035, "correct_loss_uncond": -33.66870880126953, "incorrect_loss_uncond": -23.264569600423176}, "model_output": [{"sum_logits": -93.5078125, "num_tokens": 28, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -112.14421081542969, "logits_per_token": -3.339564732142857, "logits_per_char": -0.6539007867132867, "num_chars": 143}, {"sum_logits": -65.54006958007812, "num_tokens": 26, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -95.07434844970703, "logits_per_token": -2.5207719069260817, "logits_per_char": -0.5080625548843265, "num_chars": 129}, {"sum_logits": -76.18590545654297, "num_tokens": 29, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -109.8546142578125, "logits_per_token": -2.6271001881566542, "logits_per_char": -0.4589512376900179, "num_chars": 166}, {"sum_logits": -93.92771911621094, "num_tokens": 40, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -115.55075073242188, "logits_per_token": -2.3481929779052733, "logits_per_char": -0.48920687039693195, "num_chars": 192}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 773, "native_id": 33736, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 103.79357147216797, "incorrect_loss_raw": 104.71690622965495, "correct_loss_per_char": 0.6034509969312091, "incorrect_loss_per_char": 0.6164381912598916, "correct_loss_per_token": 2.531550523711414, "incorrect_loss_per_token": 2.9399288863732074, "correct_loss_uncond": -13.474510192871094, "incorrect_loss_uncond": -8.698287963867188}, "model_output": [{"sum_logits": -117.65603637695312, "num_tokens": 35, "num_tokens_all": 437, "is_greedy": false, "sum_logits_uncond": -128.48187255859375, "logits_per_token": -3.361601039341518, "logits_per_char": -0.726271829487365, "num_chars": 162}, {"sum_logits": -102.68826293945312, "num_tokens": 36, "num_tokens_all": 438, "is_greedy": false, "sum_logits_uncond": -103.94209289550781, "logits_per_token": -2.8524517483181424, "logits_per_char": -0.6076228576299001, "num_chars": 169}, {"sum_logits": -93.8064193725586, "num_tokens": 36, "num_tokens_all": 438, "is_greedy": false, "sum_logits_uncond": -107.82161712646484, "logits_per_token": -2.605733871459961, "logits_per_char": -0.5154198866624099, "num_chars": 182}, {"sum_logits": -103.79357147216797, "num_tokens": 41, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -117.26808166503906, "logits_per_token": -2.531550523711414, "logits_per_char": -0.6034509969312091, "num_chars": 172}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 774, "native_id": 45809, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 163.40443420410156, "incorrect_loss_raw": 121.83697509765625, "correct_loss_per_char": 0.6562427076469942, "incorrect_loss_per_char": 0.7148201352074034, "correct_loss_per_token": 3.1423929654634914, "incorrect_loss_per_token": 3.675699299217289, "correct_loss_uncond": -15.746658325195312, "incorrect_loss_uncond": -7.9919789632161455}, "model_output": [{"sum_logits": -73.97866821289062, "num_tokens": 26, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -87.62429809570312, "logits_per_token": -2.8453333928034854, "logits_per_char": -0.5284190586635045, "num_chars": 140}, {"sum_logits": -163.40443420410156, "num_tokens": 52, "num_tokens_all": 496, "is_greedy": false, "sum_logits_uncond": -179.15109252929688, "logits_per_token": -3.1423929654634914, "logits_per_char": -0.6562427076469942, "num_chars": 249}, {"sum_logits": -119.591064453125, "num_tokens": 26, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -124.706787109375, "logits_per_token": -4.5996563251201925, "logits_per_char": -0.7972737630208333, "num_chars": 150}, {"sum_logits": -171.94119262695312, "num_tokens": 48, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -177.15577697753906, "logits_per_token": -3.58210817972819, "logits_per_char": -0.8187675839378721, "num_chars": 210}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 775, "native_id": 22851, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 90.87623596191406, "incorrect_loss_raw": 105.83899434407552, "correct_loss_per_char": 0.5283502090808957, "incorrect_loss_per_char": 0.5826952244270146, "correct_loss_per_token": 2.596463884626116, "incorrect_loss_per_token": 2.8439907361833527, "correct_loss_uncond": -33.3369140625, "incorrect_loss_uncond": -26.643150329589844}, "model_output": [{"sum_logits": -134.57858276367188, "num_tokens": 44, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -154.63009643554688, "logits_per_token": -3.0586041537198154, "logits_per_char": -0.700930118560791, "num_chars": 192}, {"sum_logits": -78.78665161132812, "num_tokens": 34, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -104.7784194946289, "logits_per_token": -2.3172544591567097, "logits_per_char": -0.4451223254877295, "num_chars": 177}, {"sum_logits": -104.15174865722656, "num_tokens": 33, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -138.0379180908203, "logits_per_token": -3.1561135956735322, "logits_per_char": -0.6020332292325234, "num_chars": 173}, {"sum_logits": -90.87623596191406, "num_tokens": 35, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -124.21315002441406, "logits_per_token": -2.596463884626116, "logits_per_char": -0.5283502090808957, "num_chars": 172}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 776, "native_id": 13249, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 134.14273071289062, "incorrect_loss_raw": 112.52792867024739, "correct_loss_per_char": 0.5660030831767537, "incorrect_loss_per_char": 0.6275247508974137, "correct_loss_per_token": 2.9161463198454483, "incorrect_loss_per_token": 2.7480166442889224, "correct_loss_uncond": -41.18035888671875, "incorrect_loss_uncond": -32.362579345703125}, "model_output": [{"sum_logits": -134.14215087890625, "num_tokens": 37, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -167.15017700195312, "logits_per_token": -3.6254635372677364, "logits_per_char": -0.8767460841758579, "num_chars": 153}, {"sum_logits": -134.14273071289062, "num_tokens": 46, "num_tokens_all": 494, "is_greedy": false, "sum_logits_uncond": -175.32308959960938, "logits_per_token": -2.9161463198454483, "logits_per_char": -0.5660030831767537, "num_chars": 237}, {"sum_logits": -127.83163452148438, "num_tokens": 52, "num_tokens_all": 500, "is_greedy": false, "sum_logits_uncond": -150.2135009765625, "logits_per_token": -2.4583006638746996, "logits_per_char": -0.5810528841885654, "num_chars": 220}, {"sum_logits": -75.61000061035156, "num_tokens": 35, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -117.30784606933594, "logits_per_token": -2.1602857317243305, "logits_per_char": -0.42477528432781775, "num_chars": 178}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 777, "native_id": 45037, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 62.86339569091797, "incorrect_loss_raw": 157.13411458333334, "correct_loss_per_char": 0.46912981858894004, "incorrect_loss_per_char": 0.7125514847344897, "correct_loss_per_token": 2.3282739144784435, "incorrect_loss_per_token": 3.561787402212138, "correct_loss_uncond": -15.685379028320312, "incorrect_loss_uncond": -16.455673217773438}, "model_output": [{"sum_logits": -62.86339569091797, "num_tokens": 27, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -78.54877471923828, "logits_per_token": -2.3282739144784435, "logits_per_char": -0.46912981858894004, "num_chars": 134}, {"sum_logits": -135.47361755371094, "num_tokens": 35, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -148.98214721679688, "logits_per_token": -3.870674787248884, "logits_per_char": -0.6807719475060852, "num_chars": 199}, {"sum_logits": -193.8991241455078, "num_tokens": 53, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -211.30868530273438, "logits_per_token": -3.6584740404812797, "logits_per_char": -0.8112934064665599, "num_chars": 239}, {"sum_logits": -142.02960205078125, "num_tokens": 45, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -160.47853088378906, "logits_per_token": -3.15621337890625, "logits_per_char": -0.6455891002308238, "num_chars": 220}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 778, "native_id": 9932, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 61.012176513671875, "incorrect_loss_raw": 159.04761250813803, "correct_loss_per_char": 0.33339987165940915, "incorrect_loss_per_char": 0.7535056003906911, "correct_loss_per_token": 1.4526708693731398, "incorrect_loss_per_token": 3.295247392200288, "correct_loss_uncond": -30.31561279296875, "incorrect_loss_uncond": -19.221598307291668}, "model_output": [{"sum_logits": -201.1000213623047, "num_tokens": 56, "num_tokens_all": 497, "is_greedy": false, "sum_logits_uncond": -208.994384765625, "logits_per_token": -3.591071810041155, "logits_per_char": -0.7531836006078827, "num_chars": 267}, {"sum_logits": -61.012176513671875, "num_tokens": 42, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -91.32778930664062, "logits_per_token": -1.4526708693731398, "logits_per_char": -0.33339987165940915, "num_chars": 183}, {"sum_logits": -90.27830505371094, "num_tokens": 35, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -114.91712951660156, "logits_per_token": -2.5793801443917412, "logits_per_char": -0.6269326739841037, "num_chars": 144}, {"sum_logits": -185.76451110839844, "num_tokens": 50, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -210.8961181640625, "logits_per_token": -3.715290222167969, "logits_per_char": -0.8804005265800874, "num_chars": 211}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 779, "native_id": 7683, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 86.94383239746094, "incorrect_loss_raw": 134.84144592285156, "correct_loss_per_char": 0.5433989524841308, "incorrect_loss_per_char": 0.7203023831999369, "correct_loss_per_token": 2.349833308039485, "incorrect_loss_per_token": 3.028469759313072, "correct_loss_uncond": -43.20794677734375, "incorrect_loss_uncond": -22.672073364257812}, "model_output": [{"sum_logits": -149.97872924804688, "num_tokens": 44, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -174.1229705810547, "logits_per_token": -3.4086074829101562, "logits_per_char": -0.7280520837283829, "num_chars": 206}, {"sum_logits": -86.94383239746094, "num_tokens": 37, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -130.1517791748047, "logits_per_token": -2.349833308039485, "logits_per_char": -0.5433989524841308, "num_chars": 160}, {"sum_logits": -133.45263671875, "num_tokens": 41, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -149.81527709960938, "logits_per_token": -3.2549423589939024, "logits_per_char": -0.8722394556781046, "num_chars": 153}, {"sum_logits": -121.09297180175781, "num_tokens": 50, "num_tokens_all": 496, "is_greedy": false, "sum_logits_uncond": -148.60231018066406, "logits_per_token": -2.4218594360351564, "logits_per_char": -0.5606156101933232, "num_chars": 216}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 780, "native_id": 36114, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 24.442920684814453, "incorrect_loss_raw": 56.19270579020182, "correct_loss_per_char": 0.6606194779679582, "incorrect_loss_per_char": 0.817540962377942, "correct_loss_per_token": 3.0553650856018066, "incorrect_loss_per_token": 4.08078087344557, "correct_loss_uncond": -20.09124755859375, "incorrect_loss_uncond": -12.78158187866211}, "model_output": [{"sum_logits": -24.442920684814453, "num_tokens": 8, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -44.5341682434082, "logits_per_token": -3.0553650856018066, "logits_per_char": -0.6606194779679582, "num_chars": 37}, {"sum_logits": -61.56976318359375, "num_tokens": 14, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -69.41667938232422, "logits_per_token": -4.397840227399554, "logits_per_char": -0.843421413473887, "num_chars": 73}, {"sum_logits": -37.64026641845703, "num_tokens": 10, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -54.01933670043945, "logits_per_token": -3.764026641845703, "logits_per_char": -0.7528053283691406, "num_chars": 50}, {"sum_logits": -69.36808776855469, "num_tokens": 17, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -83.48684692382812, "logits_per_token": -4.080475751091452, "logits_per_char": -0.8563961452907987, "num_chars": 81}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 781, "native_id": 19452, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 100.74625396728516, "incorrect_loss_raw": 86.22143809000652, "correct_loss_per_char": 0.47747039794921875, "incorrect_loss_per_char": 0.49247159892938774, "correct_loss_per_token": 2.3429361387740735, "incorrect_loss_per_token": 2.5760860873930347, "correct_loss_uncond": -21.20954132080078, "incorrect_loss_uncond": -13.345100402832031}, "model_output": [{"sum_logits": -100.74625396728516, "num_tokens": 43, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -121.95579528808594, "logits_per_token": -2.3429361387740735, "logits_per_char": -0.47747039794921875, "num_chars": 211}, {"sum_logits": -93.94306945800781, "num_tokens": 36, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -112.94114685058594, "logits_per_token": -2.6095297071668835, "logits_per_char": -0.4817593305538862, "num_chars": 195}, {"sum_logits": -94.95568084716797, "num_tokens": 35, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -108.47064208984375, "logits_per_token": -2.7130194527762277, "logits_per_char": -0.507784389557048, "num_chars": 187}, {"sum_logits": -69.76556396484375, "num_tokens": 29, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -77.28782653808594, "logits_per_token": -2.4057091022359915, "logits_per_char": -0.487871076677229, "num_chars": 143}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 782, "native_id": 32957, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 19.276161193847656, "incorrect_loss_raw": 34.30843607584635, "correct_loss_per_char": 0.344217164175851, "incorrect_loss_per_char": 0.5299534033881927, "correct_loss_per_token": 1.7523782903497869, "incorrect_loss_per_token": 2.416720688124716, "correct_loss_uncond": -35.46132278442383, "incorrect_loss_uncond": -36.77378845214844}, "model_output": [{"sum_logits": -36.10240173339844, "num_tokens": 13, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -65.3976058959961, "logits_per_token": -2.7771078256460338, "logits_per_char": -0.6119051141253973, "num_chars": 59}, {"sum_logits": -33.60117721557617, "num_tokens": 16, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -71.19729614257812, "logits_per_token": -2.1000735759735107, "logits_per_char": -0.4421207528365286, "num_chars": 76}, {"sum_logits": -33.22172927856445, "num_tokens": 14, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -76.65177154541016, "logits_per_token": -2.3729806627546037, "logits_per_char": -0.5358343432026524, "num_chars": 62}, {"sum_logits": -19.276161193847656, "num_tokens": 11, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -54.737483978271484, "logits_per_token": -1.7523782903497869, "logits_per_char": -0.344217164175851, "num_chars": 56}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 783, "native_id": 7824, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 41.25470733642578, "incorrect_loss_raw": 94.12613677978516, "correct_loss_per_char": 0.5428250965319181, "incorrect_loss_per_char": 0.6805459484646231, "correct_loss_per_token": 1.8752139698375354, "incorrect_loss_per_token": 2.9084421804160545, "correct_loss_uncond": -40.43756866455078, "incorrect_loss_uncond": -22.439834594726562}, "model_output": [{"sum_logits": -61.37982940673828, "num_tokens": 22, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -88.2752456665039, "logits_per_token": -2.789992245760831, "logits_per_char": -0.689661004570093, "num_chars": 89}, {"sum_logits": -85.41787719726562, "num_tokens": 28, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -95.96035766601562, "logits_per_token": -3.0506384713309154, "logits_per_char": -0.7001465344038166, "num_chars": 122}, {"sum_logits": -135.58070373535156, "num_tokens": 47, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -165.46231079101562, "logits_per_token": -2.8846958241564162, "logits_per_char": -0.6518303064199594, "num_chars": 208}, {"sum_logits": -41.25470733642578, "num_tokens": 22, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -81.69227600097656, "logits_per_token": -1.8752139698375354, "logits_per_char": -0.5428250965319181, "num_chars": 76}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 784, "native_id": 13895, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 70.03996276855469, "incorrect_loss_raw": 114.42560323079427, "correct_loss_per_char": 0.4830342259900323, "incorrect_loss_per_char": 0.6735224383282566, "correct_loss_per_token": 2.122423114198627, "incorrect_loss_per_token": 3.140661988146251, "correct_loss_uncond": -31.26824188232422, "incorrect_loss_uncond": -20.370262145996094}, "model_output": [{"sum_logits": -109.17498779296875, "num_tokens": 32, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -120.03850555419922, "logits_per_token": -3.4117183685302734, "logits_per_char": -0.6697852011838573, "num_chars": 163}, {"sum_logits": -113.95213317871094, "num_tokens": 37, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -134.23257446289062, "logits_per_token": -3.0797873832084037, "logits_per_char": -0.6906189889618845, "num_chars": 165}, {"sum_logits": -120.14968872070312, "num_tokens": 41, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -150.11651611328125, "logits_per_token": -2.9304802127000764, "logits_per_char": -0.6601631248390282, "num_chars": 182}, {"sum_logits": -70.03996276855469, "num_tokens": 33, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -101.3082046508789, "logits_per_token": -2.122423114198627, "logits_per_char": -0.4830342259900323, "num_chars": 145}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 785, "native_id": 12189, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 71.431396484375, "incorrect_loss_raw": 111.76407368977864, "correct_loss_per_char": 0.5370781690554511, "incorrect_loss_per_char": 0.6629593306133157, "correct_loss_per_token": 2.3042385962701615, "incorrect_loss_per_token": 2.8413732340815794, "correct_loss_uncond": -20.325531005859375, "incorrect_loss_uncond": -15.920257568359375}, "model_output": [{"sum_logits": -71.431396484375, "num_tokens": 31, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -91.75692749023438, "logits_per_token": -2.3042385962701615, "logits_per_char": -0.5370781690554511, "num_chars": 133}, {"sum_logits": -78.0813980102539, "num_tokens": 29, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -93.0738525390625, "logits_per_token": -2.692462000353583, "logits_per_char": -0.6561462017668396, "num_chars": 119}, {"sum_logits": -115.9770278930664, "num_tokens": 42, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -138.2368927001953, "logits_per_token": -2.7613578069777716, "logits_per_char": -0.6665346430636, "num_chars": 174}, {"sum_logits": -141.23379516601562, "num_tokens": 46, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -151.74224853515625, "logits_per_token": -3.0702998949133833, "logits_per_char": -0.6661971470095077, "num_chars": 212}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 786, "native_id": 41975, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 25.692214965820312, "incorrect_loss_raw": 84.67979431152344, "correct_loss_per_char": 0.6423053741455078, "incorrect_loss_per_char": 0.846619659523035, "correct_loss_per_token": 3.211526870727539, "incorrect_loss_per_token": 4.034951493678949, "correct_loss_uncond": -24.116966247558594, "incorrect_loss_uncond": -16.938753763834637}, "model_output": [{"sum_logits": -88.95132446289062, "num_tokens": 20, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -104.203369140625, "logits_per_token": -4.447566223144531, "logits_per_char": -0.9564658544396841, "num_chars": 93}, {"sum_logits": -54.40229797363281, "num_tokens": 16, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -64.70870208740234, "logits_per_token": -3.400143623352051, "logits_per_char": -0.7253639729817708, "num_chars": 75}, {"sum_logits": -25.692214965820312, "num_tokens": 8, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -49.809181213378906, "logits_per_token": -3.211526870727539, "logits_per_char": -0.6423053741455078, "num_chars": 40}, {"sum_logits": -110.68576049804688, "num_tokens": 26, "num_tokens_all": 432, "is_greedy": false, "sum_logits_uncond": -135.94357299804688, "logits_per_token": -4.257144634540264, "logits_per_char": -0.8580291511476502, "num_chars": 129}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 787, "native_id": 38977, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 28.782312393188477, "incorrect_loss_raw": 35.67386563618978, "correct_loss_per_char": 0.6693561021671739, "incorrect_loss_per_char": 0.760462388563656, "correct_loss_per_token": 2.878231239318848, "incorrect_loss_per_token": 3.3593131417716857, "correct_loss_uncond": -25.936830520629883, "incorrect_loss_uncond": -20.000355402628582}, "model_output": [{"sum_logits": -28.138137817382812, "num_tokens": 11, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -50.94913101196289, "logits_per_token": -2.558012528852983, "logits_per_char": -0.6116986482039742, "num_chars": 46}, {"sum_logits": -28.782312393188477, "num_tokens": 10, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -54.71914291381836, "logits_per_token": -2.878231239318848, "logits_per_char": -0.6693561021671739, "num_chars": 43}, {"sum_logits": -30.200944900512695, "num_tokens": 8, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -46.03192901611328, "logits_per_token": -3.775118112564087, "logits_per_char": -0.9742240290487966, "num_chars": 31}, {"sum_logits": -48.68251419067383, "num_tokens": 13, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -70.0416030883789, "logits_per_token": -3.744808783897987, "logits_per_char": -0.6954644884381975, "num_chars": 70}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 788, "native_id": 49373, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 70.92765045166016, "incorrect_loss_raw": 73.28957621256511, "correct_loss_per_char": 0.5030329819266678, "incorrect_loss_per_char": 0.525781326301703, "correct_loss_per_token": 2.626950016728154, "incorrect_loss_per_token": 2.1286753178762763, "correct_loss_uncond": -26.372634887695312, "incorrect_loss_uncond": -11.671595255533854}, "model_output": [{"sum_logits": -91.28728485107422, "num_tokens": 36, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -102.0086441040039, "logits_per_token": -2.5357579125298395, "logits_per_char": -0.5814476742106638, "num_chars": 157}, {"sum_logits": -82.8321533203125, "num_tokens": 41, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -95.37642669677734, "logits_per_token": -2.020296422446646, "logits_per_char": -0.5242541349386867, "num_chars": 158}, {"sum_logits": -70.92765045166016, "num_tokens": 27, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -97.30028533935547, "logits_per_token": -2.626950016728154, "logits_per_char": -0.5030329819266678, "num_chars": 141}, {"sum_logits": -45.749290466308594, "num_tokens": 25, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -57.498443603515625, "logits_per_token": -1.8299716186523438, "logits_per_char": -0.4716421697557587, "num_chars": 97}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 789, "native_id": 46372, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 82.093505859375, "incorrect_loss_raw": 116.22608693440755, "correct_loss_per_char": 0.7201184724506579, "incorrect_loss_per_char": 0.6428028474150151, "correct_loss_per_token": 2.931910923549107, "incorrect_loss_per_token": 2.982541617558051, "correct_loss_uncond": -23.129302978515625, "incorrect_loss_uncond": -21.53636423746745}, "model_output": [{"sum_logits": -77.38502502441406, "num_tokens": 26, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -105.49978637695312, "logits_per_token": -2.976347116323618, "logits_per_char": -0.6190802001953125, "num_chars": 125}, {"sum_logits": -152.34930419921875, "num_tokens": 45, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -172.82032775878906, "logits_per_token": -3.385540093315972, "logits_per_char": -0.7086014148800872, "num_chars": 215}, {"sum_logits": -82.093505859375, "num_tokens": 28, "num_tokens_all": 447, "is_greedy": false, "sum_logits_uncond": -105.22280883789062, "logits_per_token": -2.931910923549107, "logits_per_char": -0.7201184724506579, "num_chars": 114}, {"sum_logits": -118.94393157958984, "num_tokens": 46, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -134.9672393798828, "logits_per_token": -2.585737643034562, "logits_per_char": -0.6007269271696457, "num_chars": 198}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 790, "native_id": 31362, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 72.15645599365234, "incorrect_loss_raw": 81.74466705322266, "correct_loss_per_char": 0.38178019044260497, "incorrect_loss_per_char": 0.5145710010963568, "correct_loss_per_token": 1.8988541050961143, "incorrect_loss_per_token": 2.2998903405729423, "correct_loss_uncond": -20.357254028320312, "incorrect_loss_uncond": -18.530298868815105}, "model_output": [{"sum_logits": -81.94951629638672, "num_tokens": 36, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -102.89209747314453, "logits_per_token": -2.2763754526774087, "logits_per_char": -0.5154057628703568, "num_chars": 159}, {"sum_logits": -89.55584716796875, "num_tokens": 45, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -104.47390747070312, "logits_per_token": -1.9901299370659722, "logits_per_char": -0.4523022584240846, "num_chars": 198}, {"sum_logits": -72.15645599365234, "num_tokens": 38, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -92.51371002197266, "logits_per_token": -1.8988541050961143, "logits_per_char": -0.38178019044260497, "num_chars": 189}, {"sum_logits": -73.7286376953125, "num_tokens": 28, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -93.45889282226562, "logits_per_token": -2.6331656319754466, "logits_per_char": -0.5760049819946289, "num_chars": 128}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 791, "native_id": 13636, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 60.563865661621094, "incorrect_loss_raw": 54.26354090372721, "correct_loss_per_char": 0.7666312109065961, "incorrect_loss_per_char": 0.7621189254778464, "correct_loss_per_token": 3.7852416038513184, "incorrect_loss_per_token": 3.584483733397732, "correct_loss_uncond": -30.12073516845703, "incorrect_loss_uncond": -15.531314849853516}, "model_output": [{"sum_logits": -66.14317321777344, "num_tokens": 19, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -82.48834228515625, "logits_per_token": -3.481219643040707, "logits_per_char": -0.7516269683837891, "num_chars": 88}, {"sum_logits": -60.563865661621094, "num_tokens": 16, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -90.68460083007812, "logits_per_token": -3.7852416038513184, "logits_per_char": -0.7666312109065961, "num_chars": 79}, {"sum_logits": -53.28928756713867, "num_tokens": 16, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -68.53874206542969, "logits_per_token": -3.330580472946167, "logits_per_char": -0.6498693605748619, "num_chars": 82}, {"sum_logits": -43.35816192626953, "num_tokens": 11, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -58.35748291015625, "logits_per_token": -3.941651084206321, "logits_per_char": -0.8848604474748883, "num_chars": 49}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 792, "native_id": 7854, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 60.9876594543457, "incorrect_loss_raw": 89.41848246256511, "correct_loss_per_char": 0.5303274735160496, "incorrect_loss_per_char": 0.6643291358431532, "correct_loss_per_token": 2.651637367580248, "incorrect_loss_per_token": 3.023534052207987, "correct_loss_uncond": -23.126644134521484, "incorrect_loss_uncond": -22.891092936197918}, "model_output": [{"sum_logits": -60.9876594543457, "num_tokens": 23, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -84.11430358886719, "logits_per_token": -2.651637367580248, "logits_per_char": -0.5303274735160496, "num_chars": 115}, {"sum_logits": -88.31199645996094, "num_tokens": 28, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -109.824462890625, "logits_per_token": -3.1539998735700334, "logits_per_char": -0.6399420033330503, "num_chars": 138}, {"sum_logits": -75.12325286865234, "num_tokens": 25, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -93.69142150878906, "logits_per_token": -3.004930114746094, "logits_per_char": -0.6767860618797509, "num_chars": 111}, {"sum_logits": -104.82019805908203, "num_tokens": 36, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -133.412841796875, "logits_per_token": -2.911672168307834, "logits_per_char": -0.6762593423166583, "num_chars": 155}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 793, "native_id": 26875, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 97.78741455078125, "incorrect_loss_raw": 110.55660502115886, "correct_loss_per_char": 0.6228497742087977, "incorrect_loss_per_char": 0.7318244653671759, "correct_loss_per_token": 3.055856704711914, "incorrect_loss_per_token": 3.6140962589200623, "correct_loss_uncond": -27.277511596679688, "incorrect_loss_uncond": -22.606185913085938}, "model_output": [{"sum_logits": -137.20590209960938, "num_tokens": 32, "num_tokens_all": 442, "is_greedy": false, "sum_logits_uncond": -151.23451232910156, "logits_per_token": -4.287684440612793, "logits_per_char": -0.9397664527370505, "num_chars": 146}, {"sum_logits": -92.27606201171875, "num_tokens": 26, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -113.11949157714844, "logits_per_token": -3.5490793081430287, "logits_per_char": -0.6363866345635776, "num_chars": 145}, {"sum_logits": -102.18785095214844, "num_tokens": 34, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -135.13436889648438, "logits_per_token": -3.005525028004366, "logits_per_char": -0.6193203088008996, "num_chars": 165}, {"sum_logits": -97.78741455078125, "num_tokens": 32, "num_tokens_all": 442, "is_greedy": false, "sum_logits_uncond": -125.06492614746094, "logits_per_token": -3.055856704711914, "logits_per_char": -0.6228497742087977, "num_chars": 157}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 794, "native_id": 20723, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 45.44279479980469, "incorrect_loss_raw": 75.66929880777995, "correct_loss_per_char": 0.436949949998122, "incorrect_loss_per_char": 0.5623070532325051, "correct_loss_per_token": 1.9757736869480298, "incorrect_loss_per_token": 2.455680579340592, "correct_loss_uncond": -24.22857666015625, "incorrect_loss_uncond": -20.286176045735676}, "model_output": [{"sum_logits": -46.29668426513672, "num_tokens": 23, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -69.72550964355469, "logits_per_token": -2.0128993158755093, "logits_per_char": -0.4822571277618408, "num_chars": 96}, {"sum_logits": -111.22882080078125, "num_tokens": 40, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -131.92495727539062, "logits_per_token": -2.780720520019531, "logits_per_char": -0.6660408431184506, "num_chars": 167}, {"sum_logits": -45.44279479980469, "num_tokens": 23, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -69.67137145996094, "logits_per_token": -1.9757736869480298, "logits_per_char": -0.436949949998122, "num_chars": 104}, {"sum_logits": -69.48239135742188, "num_tokens": 27, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -86.21595764160156, "logits_per_token": -2.573421902126736, "logits_per_char": -0.5386231888172238, "num_chars": 129}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 795, "native_id": 11267, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 50.03067398071289, "incorrect_loss_raw": 83.80900446573894, "correct_loss_per_char": 0.7697026766263522, "incorrect_loss_per_char": 0.9173368800316476, "correct_loss_per_token": 3.1269171237945557, "incorrect_loss_per_token": 4.237176053951948, "correct_loss_uncond": -13.95175552368164, "incorrect_loss_uncond": -18.363366444905598}, "model_output": [{"sum_logits": -34.2473258972168, "num_tokens": 10, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -53.364662170410156, "logits_per_token": -3.4247325897216796, "logits_per_char": -0.8781365614670974, "num_chars": 39}, {"sum_logits": -80.92332458496094, "num_tokens": 20, "num_tokens_all": 433, "is_greedy": false, "sum_logits_uncond": -105.81236267089844, "logits_per_token": -4.046166229248047, "logits_per_char": -0.8257482100506218, "num_chars": 98}, {"sum_logits": -136.25636291503906, "num_tokens": 26, "num_tokens_all": 439, "is_greedy": false, "sum_logits_uncond": -147.340087890625, "logits_per_token": -5.240629342886118, "logits_per_char": -1.0481258685772235, "num_chars": 130}, {"sum_logits": -50.03067398071289, "num_tokens": 16, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -63.98242950439453, "logits_per_token": -3.1269171237945557, "logits_per_char": -0.7697026766263522, "num_chars": 65}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 796, "native_id": 18281, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 112.80311584472656, "incorrect_loss_raw": 89.50184122721355, "correct_loss_per_char": 0.42890918572139375, "incorrect_loss_per_char": 0.5733765530616104, "correct_loss_per_token": 2.050965742631392, "incorrect_loss_per_token": 2.5137234720944437, "correct_loss_uncond": -23.543975830078125, "incorrect_loss_uncond": -22.456410725911457}, "model_output": [{"sum_logits": -101.26043701171875, "num_tokens": 35, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -124.37454223632812, "logits_per_token": -2.8931553431919643, "logits_per_char": -0.7500773111979167, "num_chars": 135}, {"sum_logits": -82.80074310302734, "num_tokens": 35, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -107.45091247558594, "logits_per_token": -2.3657355172293526, "logits_per_char": -0.5111156981668354, "num_chars": 162}, {"sum_logits": -84.44434356689453, "num_tokens": 37, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -104.04930114746094, "logits_per_token": -2.282279555862014, "logits_per_char": -0.458936649820079, "num_chars": 184}, {"sum_logits": -112.80311584472656, "num_tokens": 55, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -136.3470916748047, "logits_per_token": -2.050965742631392, "logits_per_char": -0.42890918572139375, "num_chars": 263}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 797, "native_id": 37304, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 75.20409393310547, "incorrect_loss_raw": 130.91942850748697, "correct_loss_per_char": 0.49153002570657167, "incorrect_loss_per_char": 0.6661692241132736, "correct_loss_per_token": 2.5068031311035157, "incorrect_loss_per_token": 2.922393856132564, "correct_loss_uncond": -41.92932891845703, "incorrect_loss_uncond": -19.41027323404948}, "model_output": [{"sum_logits": -76.916748046875, "num_tokens": 27, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -91.9498291015625, "logits_per_token": -2.8487684461805554, "logits_per_char": -0.6253394150152439, "num_chars": 123}, {"sum_logits": -75.20409393310547, "num_tokens": 30, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -117.1334228515625, "logits_per_token": -2.5068031311035157, "logits_per_char": -0.49153002570657167, "num_chars": 153}, {"sum_logits": -144.26828002929688, "num_tokens": 44, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -160.11398315429688, "logits_per_token": -3.2788245461203833, "logits_per_char": -0.8197061365300958, "num_chars": 176}, {"sum_logits": -171.57325744628906, "num_tokens": 65, "num_tokens_all": 500, "is_greedy": false, "sum_logits_uncond": -198.92529296875, "logits_per_token": -2.639588576096755, "logits_per_char": -0.5534621207944809, "num_chars": 310}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 798, "native_id": 8565, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 35.306331634521484, "incorrect_loss_raw": 38.156432469685875, "correct_loss_per_char": 0.5043761662074497, "incorrect_loss_per_char": 0.7355077229467266, "correct_loss_per_token": 1.8582279807642887, "incorrect_loss_per_token": 3.2008335378434922, "correct_loss_uncond": -26.416160583496094, "incorrect_loss_uncond": -17.070648829142254}, "model_output": [{"sum_logits": -20.690465927124023, "num_tokens": 8, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -37.67826843261719, "logits_per_token": -2.586308240890503, "logits_per_char": -0.6085431155036477, "num_chars": 34}, {"sum_logits": -47.92261505126953, "num_tokens": 15, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -67.72831726074219, "logits_per_token": -3.194841003417969, "logits_per_char": -0.7487908601760864, "num_chars": 64}, {"sum_logits": -45.85621643066406, "num_tokens": 12, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -60.274658203125, "logits_per_token": -3.8213513692220054, "logits_per_char": -0.8491891931604456, "num_chars": 54}, {"sum_logits": -35.306331634521484, "num_tokens": 19, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -61.72249221801758, "logits_per_token": -1.8582279807642887, "logits_per_char": -0.5043761662074497, "num_chars": 70}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 799, "native_id": 50045, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 66.88507843017578, "incorrect_loss_raw": 51.888598124186196, "correct_loss_per_char": 0.6431257541363056, "incorrect_loss_per_char": 0.7424795733767221, "correct_loss_per_token": 3.344253921508789, "incorrect_loss_per_token": 3.150805217363102, "correct_loss_uncond": -31.367691040039062, "incorrect_loss_uncond": -30.623709360758465}, "model_output": [{"sum_logits": -77.64140319824219, "num_tokens": 22, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -112.7084732055664, "logits_per_token": -3.5291546908291904, "logits_per_char": -0.6751426365064538, "num_chars": 115}, {"sum_logits": -48.616817474365234, "num_tokens": 14, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -80.5639877319336, "logits_per_token": -3.472629819597517, "logits_per_char": -0.8839421358975497, "num_chars": 55}, {"sum_logits": -29.407573699951172, "num_tokens": 12, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -54.264461517333984, "logits_per_token": -2.4506311416625977, "logits_per_char": -0.668353947726163, "num_chars": 44}, {"sum_logits": -66.88507843017578, "num_tokens": 20, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -98.25276947021484, "logits_per_token": -3.344253921508789, "logits_per_char": -0.6431257541363056, "num_chars": 104}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 800, "native_id": 13370, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 89.1778564453125, "incorrect_loss_raw": 87.60282897949219, "correct_loss_per_char": 0.6369846888950893, "incorrect_loss_per_char": 0.6165583959128054, "correct_loss_per_token": 2.4771626790364585, "incorrect_loss_per_token": 2.514663216424367, "correct_loss_uncond": -32.46381378173828, "incorrect_loss_uncond": -18.042325337727863}, "model_output": [{"sum_logits": -71.67350769042969, "num_tokens": 32, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -80.74530792236328, "logits_per_token": -2.2397971153259277, "logits_per_char": -0.5780121587937878, "num_chars": 124}, {"sum_logits": -112.03221130371094, "num_tokens": 42, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -131.7611083984375, "logits_per_token": -2.667433602469308, "logits_per_char": -0.6438632833546606, "num_chars": 174}, {"sum_logits": -79.10276794433594, "num_tokens": 30, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -104.42904663085938, "logits_per_token": -2.6367589314778646, "logits_per_char": -0.6277997455899678, "num_chars": 126}, {"sum_logits": -89.1778564453125, "num_tokens": 36, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -121.64167022705078, "logits_per_token": -2.4771626790364585, "logits_per_char": -0.6369846888950893, "num_chars": 140}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 801, "native_id": 47811, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 37.92351531982422, "incorrect_loss_raw": 28.20887502034505, "correct_loss_per_char": 0.8618980754505504, "incorrect_loss_per_char": 0.8950264856208965, "correct_loss_per_token": 3.7923515319824217, "incorrect_loss_per_token": 3.5263923841809466, "correct_loss_uncond": -25.221515655517578, "incorrect_loss_uncond": -17.838995615641277}, "model_output": [{"sum_logits": -28.971508026123047, "num_tokens": 9, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -47.14727020263672, "logits_per_token": -3.2190564473470054, "logits_per_char": -0.7830137304357581, "num_chars": 37}, {"sum_logits": -22.58094024658203, "num_tokens": 7, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -41.09615707397461, "logits_per_token": -3.225848606654576, "logits_per_char": -0.8684977017916166, "num_chars": 26}, {"sum_logits": -37.92351531982422, "num_tokens": 10, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -63.1450309753418, "logits_per_token": -3.7923515319824217, "logits_per_char": -0.8618980754505504, "num_chars": 44}, {"sum_logits": -33.07417678833008, "num_tokens": 8, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -49.900184631347656, "logits_per_token": -4.13427209854126, "logits_per_char": -1.033568024635315, "num_chars": 32}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 802, "native_id": 23106, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 86.75141906738281, "incorrect_loss_raw": 111.82556406656902, "correct_loss_per_char": 0.36915497475482045, "incorrect_loss_per_char": 0.6803717195396448, "correct_loss_per_token": 1.5772985284978693, "incorrect_loss_per_token": 2.9361087628557256, "correct_loss_uncond": -10.471511840820312, "incorrect_loss_uncond": -8.386100769042969}, "model_output": [{"sum_logits": -85.05783081054688, "num_tokens": 28, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -95.76638793945312, "logits_per_token": -3.0377796718052457, "logits_per_char": -0.7662867640589809, "num_chars": 111}, {"sum_logits": -86.75141906738281, "num_tokens": 55, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -97.22293090820312, "logits_per_token": -1.5772985284978693, "logits_per_char": -0.36915497475482045, "num_chars": 235}, {"sum_logits": -149.8631591796875, "num_tokens": 43, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -157.57395935058594, "logits_per_token": -3.4851897483648258, "logits_per_char": -0.7455878566153606, "num_chars": 201}, {"sum_logits": -100.55570220947266, "num_tokens": 44, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -107.29464721679688, "logits_per_token": -2.285356868397106, "logits_per_char": -0.5292405379445929, "num_chars": 190}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 803, "native_id": 10508, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 107.31407928466797, "incorrect_loss_raw": 116.72958882649739, "correct_loss_per_char": 0.4019253905792808, "incorrect_loss_per_char": 0.6921213121601598, "correct_loss_per_token": 2.0637322939359226, "incorrect_loss_per_token": 3.1455909729003904, "correct_loss_uncond": -24.82349395751953, "incorrect_loss_uncond": -19.835215250651043}, "model_output": [{"sum_logits": -133.23204040527344, "num_tokens": 45, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -160.54092407226562, "logits_per_token": -2.9607120090060763, "logits_per_char": -0.6499123922208461, "num_chars": 205}, {"sum_logits": -107.31407928466797, "num_tokens": 52, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -132.1375732421875, "logits_per_token": -2.0637322939359226, "logits_per_char": -0.4019253905792808, "num_chars": 267}, {"sum_logits": -87.40878295898438, "num_tokens": 27, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -101.8572998046875, "logits_per_token": -3.237362331814236, "logits_per_char": -0.7223866360246642, "num_chars": 121}, {"sum_logits": -129.54794311523438, "num_tokens": 40, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -147.2961883544922, "logits_per_token": -3.2386985778808595, "logits_per_char": -0.7040649082349695, "num_chars": 184}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 804, "native_id": 14449, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 64.08137512207031, "incorrect_loss_raw": 95.13978830973308, "correct_loss_per_char": 0.3501714487544826, "incorrect_loss_per_char": 0.6095410317912333, "correct_loss_per_token": 1.6431121826171875, "incorrect_loss_per_token": 2.8416864915390865, "correct_loss_uncond": -30.00750732421875, "incorrect_loss_uncond": -9.708524068196615}, "model_output": [{"sum_logits": -113.17430114746094, "num_tokens": 36, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -122.79582214355469, "logits_per_token": -3.1437305874294705, "logits_per_char": -0.6150777236275051, "num_chars": 184}, {"sum_logits": -64.08137512207031, "num_tokens": 39, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -94.08888244628906, "logits_per_token": -1.6431121826171875, "logits_per_char": -0.3501714487544826, "num_chars": 183}, {"sum_logits": -77.82366180419922, "num_tokens": 29, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -86.73503112792969, "logits_per_token": -2.683574544972387, "logits_per_char": -0.6708936362430967, "num_chars": 116}, {"sum_logits": -94.42140197753906, "num_tokens": 35, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -105.01408386230469, "logits_per_token": -2.697754342215402, "logits_per_char": -0.5426517355030981, "num_chars": 174}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 805, "native_id": 43969, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 79.18428039550781, "incorrect_loss_raw": 115.79068756103516, "correct_loss_per_char": 0.37175718495543575, "incorrect_loss_per_char": 0.6522377487275516, "correct_loss_per_token": 1.8414948929187864, "incorrect_loss_per_token": 3.0039755646605317, "correct_loss_uncond": -25.82367706298828, "incorrect_loss_uncond": -25.56335703531901}, "model_output": [{"sum_logits": -126.00563049316406, "num_tokens": 44, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -155.14419555664062, "logits_per_token": -2.8637643293900923, "logits_per_char": -0.6774496263073336, "num_chars": 186}, {"sum_logits": -114.34386444091797, "num_tokens": 37, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -136.45822143554688, "logits_per_token": -3.0903747146194047, "logits_per_char": -0.684693799047413, "num_chars": 167}, {"sum_logits": -79.18428039550781, "num_tokens": 43, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -105.0079574584961, "logits_per_token": -1.8414948929187864, "logits_per_char": -0.37175718495543575, "num_chars": 213}, {"sum_logits": -107.02256774902344, "num_tokens": 35, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -132.459716796875, "logits_per_token": -3.057787649972098, "logits_per_char": -0.594569820827908, "num_chars": 180}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 806, "native_id": 22657, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 62.247196197509766, "incorrect_loss_raw": 133.33379618326822, "correct_loss_per_char": 0.42345031426877394, "incorrect_loss_per_char": 0.7300485266973377, "correct_loss_per_token": 1.830799888162052, "incorrect_loss_per_token": 3.4079609505075474, "correct_loss_uncond": -32.00831985473633, "incorrect_loss_uncond": -27.929407755533855}, "model_output": [{"sum_logits": -111.82481384277344, "num_tokens": 41, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -151.71531677246094, "logits_per_token": -2.7274344839700837, "logits_per_char": -0.4698521590032497, "num_chars": 238}, {"sum_logits": -129.14993286132812, "num_tokens": 39, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -151.98855590820312, "logits_per_token": -3.3115367400340543, "logits_per_char": -0.5843888364765979, "num_chars": 221}, {"sum_logits": -159.02664184570312, "num_tokens": 38, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -180.0857391357422, "logits_per_token": -4.184911627518503, "logits_per_char": -1.1359045846121651, "num_chars": 140}, {"sum_logits": -62.247196197509766, "num_tokens": 34, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -94.2555160522461, "logits_per_token": -1.830799888162052, "logits_per_char": -0.42345031426877394, "num_chars": 147}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 807, "native_id": 26351, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 80.95774841308594, "incorrect_loss_raw": 102.9920425415039, "correct_loss_per_char": 0.4906530206853693, "incorrect_loss_per_char": 0.6364104616506061, "correct_loss_per_token": 2.3811102474437043, "incorrect_loss_per_token": 3.024377146151044, "correct_loss_uncond": -35.47553253173828, "incorrect_loss_uncond": -20.78980255126953}, "model_output": [{"sum_logits": -114.7933578491211, "num_tokens": 34, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -140.04469299316406, "logits_per_token": -3.376275230856503, "logits_per_char": -0.7602209129080867, "num_chars": 151}, {"sum_logits": -110.56614685058594, "num_tokens": 38, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -126.54557800292969, "logits_per_token": -2.909635443436472, "logits_per_char": -0.6542375553289109, "num_chars": 169}, {"sum_logits": -80.95774841308594, "num_tokens": 34, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -116.43328094482422, "logits_per_token": -2.3811102474437043, "logits_per_char": -0.4906530206853693, "num_chars": 165}, {"sum_logits": -83.61662292480469, "num_tokens": 30, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -104.75526428222656, "logits_per_token": -2.787220764160156, "logits_per_char": -0.49477291671482065, "num_chars": 169}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 808, "native_id": 9392, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 55.23563766479492, "incorrect_loss_raw": 115.75137837727864, "correct_loss_per_char": 0.4931753362928118, "incorrect_loss_per_char": 0.6351871015716412, "correct_loss_per_token": 1.9727013451712472, "incorrect_loss_per_token": 2.8510173033766315, "correct_loss_uncond": -27.504398345947266, "incorrect_loss_uncond": -20.241973876953125}, "model_output": [{"sum_logits": -55.23563766479492, "num_tokens": 28, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -82.74003601074219, "logits_per_token": -1.9727013451712472, "logits_per_char": -0.4931753362928118, "num_chars": 112}, {"sum_logits": -130.00013732910156, "num_tokens": 42, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -149.53863525390625, "logits_per_token": -3.095241364978609, "logits_per_char": -0.6770840485890707, "num_chars": 192}, {"sum_logits": -82.53897094726562, "num_tokens": 38, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -109.35496520996094, "logits_per_token": -2.1720781828227795, "logits_per_char": -0.4716512625558036, "num_chars": 175}, {"sum_logits": -134.71502685546875, "num_tokens": 41, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -149.08645629882812, "logits_per_token": -3.285732362328506, "logits_per_char": -0.7568259935700492, "num_chars": 178}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 809, "native_id": 30111, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 87.54510498046875, "incorrect_loss_raw": 170.15919748942056, "correct_loss_per_char": 0.5119596782483553, "incorrect_loss_per_char": 0.8874899330565912, "correct_loss_per_token": 2.4318084716796875, "incorrect_loss_per_token": 4.315726171644256, "correct_loss_uncond": -37.717010498046875, "incorrect_loss_uncond": -5.0373280843098955}, "model_output": [{"sum_logits": -105.9251937866211, "num_tokens": 36, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -115.2540054321289, "logits_per_token": -2.9423664940728083, "logits_per_char": -0.5788261955553066, "num_chars": 183}, {"sum_logits": -87.54510498046875, "num_tokens": 36, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -125.26211547851562, "logits_per_token": -2.4318084716796875, "logits_per_char": -0.5119596782483553, "num_chars": 171}, {"sum_logits": -193.08917236328125, "num_tokens": 31, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -194.81396484375, "logits_per_token": -6.228682979460685, "logits_per_char": -1.2703235023900081, "num_chars": 152}, {"sum_logits": -211.46322631835938, "num_tokens": 56, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -215.5216064453125, "logits_per_token": -3.7761290413992747, "logits_per_char": -0.8133201012244591, "num_chars": 260}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 810, "native_id": 38872, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 120.22247314453125, "incorrect_loss_raw": 133.91910552978516, "correct_loss_per_char": 0.7706568791316106, "incorrect_loss_per_char": 0.7987162836060663, "correct_loss_per_token": 3.5359550924862133, "incorrect_loss_per_token": 3.231785016236092, "correct_loss_uncond": -31.581268310546875, "incorrect_loss_uncond": -23.833763122558594}, "model_output": [{"sum_logits": -120.22247314453125, "num_tokens": 34, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -151.80374145507812, "logits_per_token": -3.5359550924862133, "logits_per_char": -0.7706568791316106, "num_chars": 156}, {"sum_logits": -124.82565307617188, "num_tokens": 37, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -146.72000122070312, "logits_per_token": -3.3736662993559965, "logits_per_char": -0.8001644427959735, "num_chars": 156}, {"sum_logits": -101.22917938232422, "num_tokens": 37, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -124.2188720703125, "logits_per_token": -2.7359237670898438, "logits_per_char": -0.7388991195790089, "num_chars": 137}, {"sum_logits": -175.70248413085938, "num_tokens": 49, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -202.31973266601562, "logits_per_token": -3.585764982262436, "logits_per_char": -0.8570852884432164, "num_chars": 205}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 811, "native_id": 12837, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 68.36859130859375, "incorrect_loss_raw": 45.86532084147135, "correct_loss_per_char": 0.8043363683363971, "incorrect_loss_per_char": 0.8780047581277234, "correct_loss_per_token": 4.0216818416819855, "incorrect_loss_per_token": 3.5207319853506682, "correct_loss_uncond": -19.233963012695312, "incorrect_loss_uncond": -10.910035451253256}, "model_output": [{"sum_logits": -51.76762390136719, "num_tokens": 13, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -58.19586944580078, "logits_per_token": -3.9821249154897838, "logits_per_char": -0.8486495721535604, "num_chars": 61}, {"sum_logits": -37.75593566894531, "num_tokens": 12, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -55.77434158325195, "logits_per_token": -3.1463279724121094, "logits_per_char": -0.7403124640969669, "num_chars": 51}, {"sum_logits": -68.36859130859375, "num_tokens": 17, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -87.60255432128906, "logits_per_token": -4.0216818416819855, "logits_per_char": -0.8043363683363971, "num_chars": 85}, {"sum_logits": -48.07240295410156, "num_tokens": 14, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -56.355857849121094, "logits_per_token": -3.4337430681501115, "logits_per_char": -1.0450522381326426, "num_chars": 46}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 812, "native_id": 49427, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 117.00436401367188, "incorrect_loss_raw": 109.65099589029948, "correct_loss_per_char": 0.6125882932652977, "incorrect_loss_per_char": 0.7741454696600903, "correct_loss_per_token": 2.2076295096919223, "incorrect_loss_per_token": 3.1977076377138935, "correct_loss_uncond": -38.36811828613281, "incorrect_loss_uncond": -18.289594014485676}, "model_output": [{"sum_logits": -117.00436401367188, "num_tokens": 53, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -155.3724822998047, "logits_per_token": -2.2076295096919223, "logits_per_char": -0.6125882932652977, "num_chars": 191}, {"sum_logits": -108.23086547851562, "num_tokens": 39, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -134.64332580566406, "logits_per_token": -2.775150396885016, "logits_per_char": -0.6366521498736213, "num_chars": 170}, {"sum_logits": -134.1850128173828, "num_tokens": 36, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -140.1165771484375, "logits_per_token": -3.7273614671495228, "logits_per_char": -0.8770262275645936, "num_chars": 153}, {"sum_logits": -86.537109375, "num_tokens": 28, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -109.0618667602539, "logits_per_token": -3.090611049107143, "logits_per_char": -0.808758031542056, "num_chars": 107}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 813, "native_id": 16812, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 72.76483154296875, "incorrect_loss_raw": 80.79414494832356, "correct_loss_per_char": 0.3998067667196085, "incorrect_loss_per_char": 0.5673733804297761, "correct_loss_per_token": 1.966617068728885, "incorrect_loss_per_token": 2.8642291744416277, "correct_loss_uncond": -20.96893310546875, "incorrect_loss_uncond": -13.608025868733725}, "model_output": [{"sum_logits": -72.76483154296875, "num_tokens": 37, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -93.7337646484375, "logits_per_token": -1.966617068728885, "logits_per_char": -0.3998067667196085, "num_chars": 182}, {"sum_logits": -63.51010513305664, "num_tokens": 27, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -80.22059631347656, "logits_per_token": -2.3522261160391347, "logits_per_char": -0.496172696352005, "num_chars": 128}, {"sum_logits": -54.12579345703125, "num_tokens": 22, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -63.32127380371094, "logits_per_token": -2.460263338955966, "logits_per_char": -0.5011647542317709, "num_chars": 108}, {"sum_logits": -124.74653625488281, "num_tokens": 33, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -139.66464233398438, "logits_per_token": -3.7801980683297822, "logits_per_char": -0.7047826907055527, "num_chars": 177}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 814, "native_id": 7297, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 58.34181594848633, "incorrect_loss_raw": 86.26693979899089, "correct_loss_per_char": 0.4079847269124918, "incorrect_loss_per_char": 0.5432757379228442, "correct_loss_per_token": 1.8819940628543976, "incorrect_loss_per_token": 2.6349959857101464, "correct_loss_uncond": -25.252056121826172, "incorrect_loss_uncond": -16.787933349609375}, "model_output": [{"sum_logits": -58.34181594848633, "num_tokens": 31, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -83.5938720703125, "logits_per_token": -1.8819940628543976, "logits_per_char": -0.4079847269124918, "num_chars": 143}, {"sum_logits": -72.60155487060547, "num_tokens": 31, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -89.84850311279297, "logits_per_token": -2.3419856409872732, "logits_per_char": -0.48401036580403645, "num_chars": 150}, {"sum_logits": -101.43107604980469, "num_tokens": 31, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -113.1640625, "logits_per_token": -3.27197019515499, "logits_per_char": -0.6586433509727577, "num_chars": 154}, {"sum_logits": -84.7681884765625, "num_tokens": 37, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -106.15205383300781, "logits_per_token": -2.291032120988176, "logits_per_char": -0.4871734969917385, "num_chars": 174}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 815, "native_id": 49491, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 71.45358276367188, "incorrect_loss_raw": 67.47076161702473, "correct_loss_per_char": 0.5582311153411865, "incorrect_loss_per_char": 0.8420197658204196, "correct_loss_per_token": 2.463916647023168, "incorrect_loss_per_token": 3.8871585292580684, "correct_loss_uncond": -27.25354766845703, "incorrect_loss_uncond": -18.937327067057293}, "model_output": [{"sum_logits": -22.177654266357422, "num_tokens": 8, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -41.10188293457031, "logits_per_token": -2.7722067832946777, "logits_per_char": -0.5544413566589356, "num_chars": 40}, {"sum_logits": -71.45358276367188, "num_tokens": 29, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -98.7071304321289, "logits_per_token": -2.463916647023168, "logits_per_char": -0.5582311153411865, "num_chars": 128}, {"sum_logits": -55.50593948364258, "num_tokens": 13, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -69.2273941040039, "logits_per_token": -4.269687652587891, "logits_per_char": -0.9737884119937295, "num_chars": 57}, {"sum_logits": -124.72869110107422, "num_tokens": 27, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -148.89498901367188, "logits_per_token": -4.619581151891638, "logits_per_char": -0.9978295288085938, "num_chars": 125}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 816, "native_id": 14358, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 66.93020629882812, "incorrect_loss_raw": 119.18502298990886, "correct_loss_per_char": 0.5148477407602163, "incorrect_loss_per_char": 0.6368285763503085, "correct_loss_per_token": 2.390364510672433, "incorrect_loss_per_token": 2.933342011180708, "correct_loss_uncond": -14.755035400390625, "incorrect_loss_uncond": -16.60772196451823}, "model_output": [{"sum_logits": -66.93020629882812, "num_tokens": 28, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -81.68524169921875, "logits_per_token": -2.390364510672433, "logits_per_char": -0.5148477407602163, "num_chars": 130}, {"sum_logits": -102.142578125, "num_tokens": 33, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -113.88491821289062, "logits_per_token": -3.095229640151515, "logits_per_char": -0.7566116898148149, "num_chars": 135}, {"sum_logits": -111.92878723144531, "num_tokens": 38, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -125.52964782714844, "logits_per_token": -2.945494400827508, "logits_per_char": -0.608308626257855, "num_chars": 184}, {"sum_logits": -143.48370361328125, "num_tokens": 52, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -167.9636688232422, "logits_per_token": -2.759301992563101, "logits_per_char": -0.5455654129782557, "num_chars": 263}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 817, "native_id": 23408, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 63.163909912109375, "incorrect_loss_raw": 42.347120920817055, "correct_loss_per_char": 0.7177717035466974, "incorrect_loss_per_char": 0.7433130580351192, "correct_loss_per_token": 2.8710868141867896, "incorrect_loss_per_token": 2.9678013088938955, "correct_loss_uncond": -16.426422119140625, "incorrect_loss_uncond": -23.00995127360026}, "model_output": [{"sum_logits": -63.163909912109375, "num_tokens": 22, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -79.59033203125, "logits_per_token": -2.8710868141867896, "logits_per_char": -0.7177717035466974, "num_chars": 88}, {"sum_logits": -39.566978454589844, "num_tokens": 16, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -69.09077453613281, "logits_per_token": -2.4729361534118652, "logits_per_char": -0.5994996735543916, "num_chars": 66}, {"sum_logits": -33.178138732910156, "num_tokens": 13, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -59.260379791259766, "logits_per_token": -2.552164517916166, "logits_per_char": -0.896706452240815, "num_chars": 37}, {"sum_logits": -54.29624557495117, "num_tokens": 14, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -67.72006225585938, "logits_per_token": -3.878303255353655, "logits_per_char": -0.733733048310151, "num_chars": 74}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 818, "native_id": 24848, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 83.75531768798828, "incorrect_loss_raw": 120.84702046712239, "correct_loss_per_char": 0.5300969473923309, "incorrect_loss_per_char": 0.6037491686441679, "correct_loss_per_token": 2.204087307578639, "incorrect_loss_per_token": 2.8474016760149574, "correct_loss_uncond": -34.62347412109375, "incorrect_loss_uncond": -21.20526631673177}, "model_output": [{"sum_logits": -108.86859130859375, "num_tokens": 39, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -127.04969787597656, "logits_per_token": -2.7915023412459936, "logits_per_char": -0.6014839298817334, "num_chars": 181}, {"sum_logits": -117.58026123046875, "num_tokens": 52, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -136.92803955078125, "logits_per_token": -2.2611588698167067, "logits_per_char": -0.48586884805978825, "num_chars": 242}, {"sum_logits": -136.0922088623047, "num_tokens": 39, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -162.1791229248047, "logits_per_token": -3.4895438169821715, "logits_per_char": -0.7238947279909824, "num_chars": 188}, {"sum_logits": -83.75531768798828, "num_tokens": 38, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -118.37879180908203, "logits_per_token": -2.204087307578639, "logits_per_char": -0.5300969473923309, "num_chars": 158}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 819, "native_id": 20275, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 86.97138977050781, "incorrect_loss_raw": 114.24731826782227, "correct_loss_per_char": 0.608191536856698, "incorrect_loss_per_char": 0.638455426420448, "correct_loss_per_token": 2.635496659712358, "incorrect_loss_per_token": 2.735162471253195, "correct_loss_uncond": -16.810142517089844, "incorrect_loss_uncond": -18.188007354736328}, "model_output": [{"sum_logits": -166.2066192626953, "num_tokens": 54, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -185.75599670410156, "logits_per_token": -3.07790035671658, "logits_per_char": -0.7289764002749795, "num_chars": 228}, {"sum_logits": -86.97138977050781, "num_tokens": 33, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -103.78153228759766, "logits_per_token": -2.635496659712358, "logits_per_char": -0.608191536856698, "num_chars": 143}, {"sum_logits": -59.33384323120117, "num_tokens": 27, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -78.55440521240234, "logits_per_token": -2.197549749303747, "logits_per_char": -0.5204723090456244, "num_chars": 114}, {"sum_logits": -117.20149230957031, "num_tokens": 40, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -132.99557495117188, "logits_per_token": -2.9300373077392576, "logits_per_char": -0.6659175699407404, "num_chars": 176}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 820, "native_id": 20249, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 61.41070556640625, "incorrect_loss_raw": 115.52732594807942, "correct_loss_per_char": 0.4264632331000434, "incorrect_loss_per_char": 0.8513641584986438, "correct_loss_per_token": 1.9190845489501953, "incorrect_loss_per_token": 3.6104126873028153, "correct_loss_uncond": -28.03772735595703, "incorrect_loss_uncond": -20.571792602539062}, "model_output": [{"sum_logits": -164.95594787597656, "num_tokens": 49, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -186.86143493652344, "logits_per_token": -3.3664479158362566, "logits_per_char": -0.7780940937546065, "num_chars": 212}, {"sum_logits": -100.42301177978516, "num_tokens": 22, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -113.22242736816406, "logits_per_token": -4.564682353626598, "logits_per_char": -1.115811241997613, "num_chars": 90}, {"sum_logits": -81.20301818847656, "num_tokens": 28, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -108.21349334716797, "logits_per_token": -2.9001077924455916, "logits_per_char": -0.6601871397437119, "num_chars": 123}, {"sum_logits": -61.41070556640625, "num_tokens": 32, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -89.44843292236328, "logits_per_token": -1.9190845489501953, "logits_per_char": -0.4264632331000434, "num_chars": 144}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 821, "native_id": 27835, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 163.8794403076172, "incorrect_loss_raw": 194.0796661376953, "correct_loss_per_char": 0.6114904489090194, "incorrect_loss_per_char": 0.7653704639999681, "correct_loss_per_token": 2.686548201764216, "incorrect_loss_per_token": 3.314422978323387, "correct_loss_uncond": -57.15245056152344, "incorrect_loss_uncond": -29.182708740234375}, "model_output": [{"sum_logits": -163.8794403076172, "num_tokens": 61, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -221.03189086914062, "logits_per_token": -2.686548201764216, "logits_per_char": -0.6114904489090194, "num_chars": 268}, {"sum_logits": -203.75244140625, "num_tokens": 57, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -219.98419189453125, "logits_per_token": -3.5746042351973686, "logits_per_char": -0.8182828972138554, "num_chars": 249}, {"sum_logits": -188.3039093017578, "num_tokens": 54, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -220.741455078125, "logits_per_token": -3.4871094315140336, "logits_per_char": -0.8081712845569005, "num_chars": 233}, {"sum_logits": -190.18264770507812, "num_tokens": 66, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -229.0614776611328, "logits_per_token": -2.8815552682587593, "logits_per_char": -0.6696572102291484, "num_chars": 284}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 822, "native_id": 12315, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 86.25015258789062, "incorrect_loss_raw": 132.82683563232422, "correct_loss_per_char": 0.6073954407597931, "incorrect_loss_per_char": 0.6805908648370714, "correct_loss_per_token": 2.536769193761489, "incorrect_loss_per_token": 2.957339373633204, "correct_loss_uncond": -27.584686279296875, "incorrect_loss_uncond": -27.929959615071613}, "model_output": [{"sum_logits": -129.66566467285156, "num_tokens": 49, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -155.47023010253906, "logits_per_token": -2.646238054547991, "logits_per_char": -0.6059143209011755, "num_chars": 214}, {"sum_logits": -144.08253479003906, "num_tokens": 41, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -165.73666381835938, "logits_per_token": -3.5142081656107087, "logits_per_char": -0.7351149734185667, "num_chars": 196}, {"sum_logits": -86.25015258789062, "num_tokens": 34, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -113.8348388671875, "logits_per_token": -2.536769193761489, "logits_per_char": -0.6073954407597931, "num_chars": 142}, {"sum_logits": -124.73230743408203, "num_tokens": 46, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -161.06349182128906, "logits_per_token": -2.7115719007409136, "logits_per_char": -0.7007433001914721, "num_chars": 178}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 823, "native_id": 23178, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 63.89037322998047, "incorrect_loss_raw": 87.80824279785156, "correct_loss_per_char": 0.5324197769165039, "incorrect_loss_per_char": 0.6182418692562628, "correct_loss_per_token": 2.3663101196289062, "incorrect_loss_per_token": 2.4104528000516465, "correct_loss_uncond": -32.81371307373047, "incorrect_loss_uncond": -22.79474131266276}, "model_output": [{"sum_logits": -68.07089233398438, "num_tokens": 30, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -90.71861267089844, "logits_per_token": -2.269029744466146, "logits_per_char": -0.6875847710503472, "num_chars": 99}, {"sum_logits": -89.40020751953125, "num_tokens": 35, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -114.72999572753906, "logits_per_token": -2.5542916434151786, "logits_per_char": -0.5881592599969161, "num_chars": 152}, {"sum_logits": -105.95362854003906, "num_tokens": 44, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -126.36034393310547, "logits_per_token": -2.408037012273615, "logits_per_char": -0.578981576721525, "num_chars": 183}, {"sum_logits": -63.89037322998047, "num_tokens": 27, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -96.70408630371094, "logits_per_token": -2.3663101196289062, "logits_per_char": -0.5324197769165039, "num_chars": 120}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 824, "native_id": 10450, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 42.52737808227539, "incorrect_loss_raw": 100.33118438720703, "correct_loss_per_char": 0.35439481735229494, "incorrect_loss_per_char": 0.6551968290792433, "correct_loss_per_token": 1.8490164383597996, "incorrect_loss_per_token": 2.8029500060246293, "correct_loss_uncond": -17.810894012451172, "incorrect_loss_uncond": -20.735394795735676}, "model_output": [{"sum_logits": -42.52737808227539, "num_tokens": 23, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -60.33827209472656, "logits_per_token": -1.8490164383597996, "logits_per_char": -0.35439481735229494, "num_chars": 120}, {"sum_logits": -81.123046875, "num_tokens": 34, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -100.31845092773438, "logits_per_token": -2.3859719669117645, "logits_per_char": -0.5712890625, "num_chars": 142}, {"sum_logits": -112.73516845703125, "num_tokens": 37, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -139.9400634765625, "logits_per_token": -3.0468964447846285, "logits_per_char": -0.6554370259129724, "num_chars": 172}, {"sum_logits": -107.13533782958984, "num_tokens": 36, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -122.94122314453125, "logits_per_token": -2.9759816063774958, "logits_per_char": -0.7388643988247575, "num_chars": 145}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 825, "native_id": 7295, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 53.81448745727539, "incorrect_loss_raw": 143.56657918294272, "correct_loss_per_char": 0.467952064845873, "incorrect_loss_per_char": 0.6457749573032278, "correct_loss_per_token": 2.3397603242293648, "incorrect_loss_per_token": 3.066322351366482, "correct_loss_uncond": -18.50728988647461, "incorrect_loss_uncond": -19.10962422688802}, "model_output": [{"sum_logits": -123.78671264648438, "num_tokens": 46, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -149.58851623535156, "logits_per_token": -2.691015492314878, "logits_per_char": -0.5312734448346969, "num_chars": 233}, {"sum_logits": -174.92724609375, "num_tokens": 56, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -189.35801696777344, "logits_per_token": -3.1237008231026784, "logits_per_char": -0.6727971003605769, "num_chars": 260}, {"sum_logits": -53.81448745727539, "num_tokens": 23, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -72.32177734375, "logits_per_token": -2.3397603242293648, "logits_per_char": -0.467952064845873, "num_chars": 115}, {"sum_logits": -131.98577880859375, "num_tokens": 39, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -149.0820770263672, "logits_per_token": -3.384250738681891, "logits_per_char": -0.7332543267144097, "num_chars": 180}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 826, "native_id": 49953, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 71.92582702636719, "incorrect_loss_raw": 89.24329376220703, "correct_loss_per_char": 0.5407956919275728, "incorrect_loss_per_char": 0.5770223527827908, "correct_loss_per_token": 2.2476820945739746, "incorrect_loss_per_token": 2.421556457829214, "correct_loss_uncond": -33.822784423828125, "incorrect_loss_uncond": -33.01399485270182}, "model_output": [{"sum_logits": -77.16986083984375, "num_tokens": 31, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -120.17595672607422, "logits_per_token": -2.489350349672379, "logits_per_char": -0.6273972426003557, "num_chars": 123}, {"sum_logits": -108.09683227539062, "num_tokens": 46, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -140.51614379882812, "logits_per_token": -2.349931136421535, "logits_per_char": -0.6038929177396124, "num_chars": 179}, {"sum_logits": -82.46318817138672, "num_tokens": 34, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -106.07976531982422, "logits_per_token": -2.425387887393727, "logits_per_char": -0.49977689800840436, "num_chars": 165}, {"sum_logits": -71.92582702636719, "num_tokens": 32, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -105.74861145019531, "logits_per_token": -2.2476820945739746, "logits_per_char": -0.5407956919275728, "num_chars": 133}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 827, "native_id": 42383, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 31.30933380126953, "incorrect_loss_raw": 29.139381408691406, "correct_loss_per_char": 0.6389659959442762, "incorrect_loss_per_char": 0.548519161424296, "correct_loss_per_token": 3.478814866807726, "incorrect_loss_per_token": 2.9490465048587686, "correct_loss_uncond": -18.499141693115234, "incorrect_loss_uncond": -13.957501729329428}, "model_output": [{"sum_logits": -43.765228271484375, "num_tokens": 11, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -54.54052734375, "logits_per_token": -3.9786571155894888, "logits_per_char": -0.7058907785723286, "num_chars": 62}, {"sum_logits": -31.30933380126953, "num_tokens": 9, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -49.808475494384766, "logits_per_token": -3.478814866807726, "logits_per_char": -0.6389659959442762, "num_chars": 49}, {"sum_logits": -23.525283813476562, "num_tokens": 10, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -46.500457763671875, "logits_per_token": -2.352528381347656, "logits_per_char": -0.511419213336447, "num_chars": 46}, {"sum_logits": -20.12763214111328, "num_tokens": 8, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -28.249664306640625, "logits_per_token": -2.51595401763916, "logits_per_char": -0.42824749236411236, "num_chars": 47}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 828, "native_id": 31239, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 55.673343658447266, "incorrect_loss_raw": 65.91226959228516, "correct_loss_per_char": 0.4315763074298238, "incorrect_loss_per_char": 0.46928657165639126, "correct_loss_per_token": 2.0619756910536022, "incorrect_loss_per_token": 2.486638449923896, "correct_loss_uncond": -12.038524627685547, "incorrect_loss_uncond": -12.351888020833334}, "model_output": [{"sum_logits": -102.51266479492188, "num_tokens": 28, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -116.58679962158203, "logits_per_token": -3.6611665998186385, "logits_per_char": -0.6065838153545673, "num_chars": 169}, {"sum_logits": -55.673343658447266, "num_tokens": 27, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -67.71186828613281, "logits_per_token": -2.0619756910536022, "logits_per_char": -0.4315763074298238, "num_chars": 129}, {"sum_logits": -42.51988220214844, "num_tokens": 24, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -56.07652282714844, "logits_per_token": -1.7716617584228516, "logits_per_char": -0.37964180537632536, "num_chars": 112}, {"sum_logits": -52.704261779785156, "num_tokens": 26, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -62.129150390625, "logits_per_token": -2.027086991530198, "logits_per_char": -0.42163409423828124, "num_chars": 125}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 829, "native_id": 44890, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 130.3782958984375, "incorrect_loss_raw": 93.4603983561198, "correct_loss_per_char": 0.5980655775157684, "incorrect_loss_per_char": 0.6483657161951287, "correct_loss_per_token": 2.60756591796875, "incorrect_loss_per_token": 2.7891387650090405, "correct_loss_uncond": -33.311065673828125, "incorrect_loss_uncond": -15.682866414388021}, "model_output": [{"sum_logits": -130.3782958984375, "num_tokens": 50, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -163.68936157226562, "logits_per_token": -2.60756591796875, "logits_per_char": -0.5980655775157684, "num_chars": 218}, {"sum_logits": -79.6366195678711, "num_tokens": 32, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -92.0660629272461, "logits_per_token": -2.4886443614959717, "logits_per_char": -0.5171209062848773, "num_chars": 154}, {"sum_logits": -95.09176635742188, "num_tokens": 30, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -107.28795623779297, "logits_per_token": -3.169725545247396, "logits_per_char": -0.7990904735917804, "num_chars": 119}, {"sum_logits": -105.6528091430664, "num_tokens": 39, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -128.07577514648438, "logits_per_token": -2.709046388283754, "logits_per_char": -0.6288857687087286, "num_chars": 168}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 830, "native_id": 19682, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 102.66836547851562, "incorrect_loss_raw": 87.40106201171875, "correct_loss_per_char": 0.5375307093116002, "incorrect_loss_per_char": 0.6076552198488941, "correct_loss_per_token": 2.5667091369628907, "incorrect_loss_per_token": 2.568109871849181, "correct_loss_uncond": -19.45770263671875, "incorrect_loss_uncond": -22.38213602701823}, "model_output": [{"sum_logits": -102.66836547851562, "num_tokens": 40, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -122.12606811523438, "logits_per_token": -2.5667091369628907, "logits_per_char": -0.5375307093116002, "num_chars": 191}, {"sum_logits": -93.0995101928711, "num_tokens": 32, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -108.22042846679688, "logits_per_token": -2.9093596935272217, "logits_per_char": -0.7161500784067008, "num_chars": 130}, {"sum_logits": -64.57012176513672, "num_tokens": 28, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -85.02947998046875, "logits_per_token": -2.306075777326311, "logits_per_char": -0.5566389807339372, "num_chars": 116}, {"sum_logits": -104.53355407714844, "num_tokens": 42, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -136.0996856689453, "logits_per_token": -2.4888941446940103, "logits_per_char": -0.5501766004060444, "num_chars": 190}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 831, "native_id": 362, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 142.53468322753906, "incorrect_loss_raw": 106.59001159667969, "correct_loss_per_char": 0.7309470934745593, "incorrect_loss_per_char": 0.6457074540156152, "correct_loss_per_token": 3.239424618807706, "incorrect_loss_per_token": 2.8970666445218605, "correct_loss_uncond": -29.269363403320312, "incorrect_loss_uncond": -17.5728759765625}, "model_output": [{"sum_logits": -100.34701538085938, "num_tokens": 39, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -115.83557891845703, "logits_per_token": -2.5730003943810096, "logits_per_char": -0.5281421862150494, "num_chars": 190}, {"sum_logits": -101.2198486328125, "num_tokens": 32, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -119.09066009521484, "logits_per_token": -3.1631202697753906, "logits_per_char": -0.6747989908854166, "num_chars": 150}, {"sum_logits": -118.20317077636719, "num_tokens": 40, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -137.5624237060547, "logits_per_token": -2.95507926940918, "logits_per_char": -0.7341811849463801, "num_chars": 161}, {"sum_logits": -142.53468322753906, "num_tokens": 44, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -171.80404663085938, "logits_per_token": -3.239424618807706, "logits_per_char": -0.7309470934745593, "num_chars": 195}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 832, "native_id": 37514, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 138.80880737304688, "incorrect_loss_raw": 108.09543100992839, "correct_loss_per_char": 0.5047592995383523, "incorrect_loss_per_char": 0.6683991058125728, "correct_loss_per_token": 2.3134801228841146, "incorrect_loss_per_token": 2.9993080178548723, "correct_loss_uncond": -26.179962158203125, "incorrect_loss_uncond": -15.948824564615885}, "model_output": [{"sum_logits": -84.3415298461914, "num_tokens": 27, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -105.7266845703125, "logits_per_token": -3.123760364673756, "logits_per_char": -0.6801736277918662, "num_chars": 124}, {"sum_logits": -135.58047485351562, "num_tokens": 50, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -150.72425842285156, "logits_per_token": -2.7116094970703126, "logits_per_char": -0.6052699770246234, "num_chars": 224}, {"sum_logits": -104.36428833007812, "num_tokens": 33, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -115.68182373046875, "logits_per_token": -3.1625541918205493, "logits_per_char": -0.7197537126212284, "num_chars": 145}, {"sum_logits": -138.80880737304688, "num_tokens": 60, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -164.98876953125, "logits_per_token": -2.3134801228841146, "logits_per_char": -0.5047592995383523, "num_chars": 275}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 833, "native_id": 14590, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 45.11083221435547, "incorrect_loss_raw": 61.76068369547526, "correct_loss_per_char": 0.4850627119823169, "incorrect_loss_per_char": 0.859668278875147, "correct_loss_per_token": 2.1481348673502603, "incorrect_loss_per_token": 3.6372921994793947, "correct_loss_uncond": -39.79279327392578, "incorrect_loss_uncond": -15.925912221272787}, "model_output": [{"sum_logits": -127.68415832519531, "num_tokens": 26, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -136.72244262695312, "logits_per_token": -4.910929166353666, "logits_per_char": -1.0133663359142484, "num_chars": 126}, {"sum_logits": -35.16448211669922, "num_tokens": 11, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -61.189605712890625, "logits_per_token": -3.196771101518111, "logits_per_char": -0.7644452634065048, "num_chars": 46}, {"sum_logits": -22.43341064453125, "num_tokens": 8, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -35.14773941040039, "logits_per_token": -2.8041763305664062, "logits_per_char": -0.8011932373046875, "num_chars": 28}, {"sum_logits": -45.11083221435547, "num_tokens": 21, "num_tokens_all": 424, "is_greedy": false, "sum_logits_uncond": -84.90362548828125, "logits_per_token": -2.1481348673502603, "logits_per_char": -0.4850627119823169, "num_chars": 93}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 834, "native_id": 21796, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 68.80628967285156, "incorrect_loss_raw": 75.10933430989583, "correct_loss_per_char": 0.5022356910427122, "incorrect_loss_per_char": 0.42999677226200195, "correct_loss_per_token": 2.7522515869140625, "incorrect_loss_per_token": 2.302076302313929, "correct_loss_uncond": -29.285842895507812, "incorrect_loss_uncond": -23.81182352701823}, "model_output": [{"sum_logits": -79.53546142578125, "num_tokens": 31, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -109.24397277832031, "logits_per_token": -2.5656600459929435, "logits_per_char": -0.4791292856974774, "num_chars": 166}, {"sum_logits": -61.70257568359375, "num_tokens": 29, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -84.71516418457031, "logits_per_token": -2.1276750235721984, "logits_per_char": -0.40066607586749187, "num_chars": 154}, {"sum_logits": -68.80628967285156, "num_tokens": 25, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -98.09213256835938, "logits_per_token": -2.7522515869140625, "logits_per_char": -0.5022356910427122, "num_chars": 137}, {"sum_logits": -84.0899658203125, "num_tokens": 38, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -102.80433654785156, "logits_per_token": -2.2128938373766447, "logits_per_char": -0.4101949552210366, "num_chars": 205}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 835, "native_id": 44844, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 79.13257598876953, "incorrect_loss_raw": 114.4956766764323, "correct_loss_per_char": 0.4121488332748413, "incorrect_loss_per_char": 0.6452602877214525, "correct_loss_per_token": 2.198127110799154, "incorrect_loss_per_token": 3.148189501007799, "correct_loss_uncond": -30.157424926757812, "incorrect_loss_uncond": -13.8804931640625}, "model_output": [{"sum_logits": -105.06784057617188, "num_tokens": 33, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -118.66482543945312, "logits_per_token": -3.1838739568536933, "logits_per_char": -0.6291487459651011, "num_chars": 167}, {"sum_logits": -79.13257598876953, "num_tokens": 36, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -109.29000091552734, "logits_per_token": -2.198127110799154, "logits_per_char": -0.4121488332748413, "num_chars": 192}, {"sum_logits": -151.79505920410156, "num_tokens": 45, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -163.78012084960938, "logits_per_token": -3.3732235378689235, "logits_per_char": -0.7784362010466747, "num_chars": 195}, {"sum_logits": -86.62413024902344, "num_tokens": 30, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -102.68356323242188, "logits_per_token": -2.887471008300781, "logits_per_char": -0.528195916152582, "num_chars": 164}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 836, "native_id": 31225, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 103.44779968261719, "incorrect_loss_raw": 124.93741861979167, "correct_loss_per_char": 0.474531191204666, "incorrect_loss_per_char": 0.6180956576407489, "correct_loss_per_token": 2.4630428495861234, "incorrect_loss_per_token": 2.8606116783250446, "correct_loss_uncond": -35.86247253417969, "incorrect_loss_uncond": -24.73106638590495}, "model_output": [{"sum_logits": -103.44779968261719, "num_tokens": 42, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -139.31027221679688, "logits_per_token": -2.4630428495861234, "logits_per_char": -0.474531191204666, "num_chars": 218}, {"sum_logits": -168.34695434570312, "num_tokens": 57, "num_tokens_all": 500, "is_greedy": false, "sum_logits_uncond": -209.44972229003906, "logits_per_token": -2.9534553393983005, "logits_per_char": -0.7482086859809027, "num_chars": 225}, {"sum_logits": -86.48648071289062, "num_tokens": 35, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -107.01737213134766, "logits_per_token": -2.4710423060825892, "logits_per_char": -0.49704873972925645, "num_chars": 174}, {"sum_logits": -119.97882080078125, "num_tokens": 38, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -132.53836059570312, "logits_per_token": -3.1573373894942436, "logits_per_char": -0.6090295472120876, "num_chars": 197}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 837, "native_id": 46926, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 141.1356201171875, "incorrect_loss_raw": 108.76880900065105, "correct_loss_per_char": 0.6357460365639077, "incorrect_loss_per_char": 0.7176258342314098, "correct_loss_per_token": 2.476063510827851, "incorrect_loss_per_token": 3.0879577155065054, "correct_loss_uncond": -35.66377258300781, "incorrect_loss_uncond": -13.808939615885416}, "model_output": [{"sum_logits": -95.195556640625, "num_tokens": 30, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -111.63787841796875, "logits_per_token": -3.1731852213541667, "logits_per_char": -0.7266836384780534, "num_chars": 131}, {"sum_logits": -98.34506225585938, "num_tokens": 32, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -112.53584289550781, "logits_per_token": -3.0732831954956055, "logits_per_char": -0.7623648236888324, "num_chars": 129}, {"sum_logits": -132.76580810546875, "num_tokens": 44, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -143.5595245361328, "logits_per_token": -3.017404729669744, "logits_per_char": -0.6638290405273437, "num_chars": 200}, {"sum_logits": -141.1356201171875, "num_tokens": 57, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -176.7993927001953, "logits_per_token": -2.476063510827851, "logits_per_char": -0.6357460365639077, "num_chars": 222}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 838, "native_id": 33123, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.878944396972656, "incorrect_loss_raw": 27.834489822387695, "correct_loss_per_char": 0.30458831787109375, "incorrect_loss_per_char": 0.6588239021654482, "correct_loss_per_token": 1.3198827107747395, "incorrect_loss_per_token": 2.9233705323839945, "correct_loss_uncond": -37.20626449584961, "incorrect_loss_uncond": -19.772392908732098}, "model_output": [{"sum_logits": -21.36081314086914, "num_tokens": 7, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -40.079322814941406, "logits_per_token": -3.051544734409877, "logits_per_char": -0.6675254106521606, "num_chars": 32}, {"sum_logits": -32.40073013305664, "num_tokens": 10, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -54.90483093261719, "logits_per_token": -3.240073013305664, "logits_per_char": -0.6480146026611329, "num_chars": 50}, {"sum_logits": -11.878944396972656, "num_tokens": 9, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -49.085208892822266, "logits_per_token": -1.3198827107747395, "logits_per_char": -0.30458831787109375, "num_chars": 39}, {"sum_logits": -29.741926193237305, "num_tokens": 12, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -47.83649444580078, "logits_per_token": -2.478493849436442, "logits_per_char": -0.6609316931830512, "num_chars": 45}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 839, "native_id": 4948, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 101.26097106933594, "incorrect_loss_raw": 130.55791982014975, "correct_loss_per_char": 0.5274008909861246, "incorrect_loss_per_char": 0.7645829967592367, "correct_loss_per_token": 2.201325458029042, "incorrect_loss_per_token": 3.034190408583109, "correct_loss_uncond": -25.69365692138672, "incorrect_loss_uncond": -21.589149475097656}, "model_output": [{"sum_logits": -101.26097106933594, "num_tokens": 46, "num_tokens_all": 496, "is_greedy": false, "sum_logits_uncond": -126.95462799072266, "logits_per_token": -2.201325458029042, "logits_per_char": -0.5274008909861246, "num_chars": 192}, {"sum_logits": -101.87530517578125, "num_tokens": 37, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -127.50588989257812, "logits_per_token": -2.7533866263724662, "logits_per_char": -0.7225198948636968, "num_chars": 141}, {"sum_logits": -97.7375259399414, "num_tokens": 35, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -110.05696105957031, "logits_per_token": -2.792500741141183, "logits_per_char": -0.8353634695721488, "num_chars": 117}, {"sum_logits": -192.06092834472656, "num_tokens": 54, "num_tokens_all": 504, "is_greedy": false, "sum_logits_uncond": -218.87835693359375, "logits_per_token": -3.5566838582356772, "logits_per_char": -0.7358656258418642, "num_chars": 261}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 840, "native_id": 21580, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 105.39575958251953, "incorrect_loss_raw": 117.19156138102214, "correct_loss_per_char": 0.6546320470963946, "incorrect_loss_per_char": 0.5879363280685127, "correct_loss_per_token": 2.702455373910757, "incorrect_loss_per_token": 2.782019766581278, "correct_loss_uncond": -11.579086303710938, "incorrect_loss_uncond": -25.437527974446613}, "model_output": [{"sum_logits": -86.49491882324219, "num_tokens": 32, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -101.80537414550781, "logits_per_token": -2.7029662132263184, "logits_per_char": -0.6222656030449079, "num_chars": 139}, {"sum_logits": -122.1091537475586, "num_tokens": 43, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -147.8782501220703, "logits_per_token": -2.83974776157113, "logits_per_char": -0.5627149942283807, "num_chars": 217}, {"sum_logits": -105.39575958251953, "num_tokens": 39, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -116.97484588623047, "logits_per_token": -2.702455373910757, "logits_per_char": -0.6546320470963946, "num_chars": 161}, {"sum_logits": -142.97061157226562, "num_tokens": 51, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -178.20364379882812, "logits_per_token": -2.8033453249463847, "logits_per_char": -0.5788283869322495, "num_chars": 247}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 841, "native_id": 13146, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 118.25346374511719, "incorrect_loss_raw": 159.32940419514975, "correct_loss_per_char": 0.4637390735102635, "incorrect_loss_per_char": 0.7295282344183439, "correct_loss_per_token": 2.1116689954485213, "incorrect_loss_per_token": 3.2532429971669097, "correct_loss_uncond": -25.999053955078125, "incorrect_loss_uncond": -18.29393768310547}, "model_output": [{"sum_logits": -118.25346374511719, "num_tokens": 56, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -144.2525177001953, "logits_per_token": -2.1116689954485213, "logits_per_char": -0.4637390735102635, "num_chars": 255}, {"sum_logits": -105.98612976074219, "num_tokens": 41, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -138.20950317382812, "logits_per_token": -2.5850275551400532, "logits_per_char": -0.5987916935635151, "num_chars": 177}, {"sum_logits": -271.9969482421875, "num_tokens": 63, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -286.6339111328125, "logits_per_token": -4.317411876860119, "logits_per_char": -1.034208928677519, "num_chars": 263}, {"sum_logits": -100.00513458251953, "num_tokens": 35, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -108.026611328125, "logits_per_token": -2.857289559500558, "logits_per_char": -0.5555840810139974, "num_chars": 180}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 842, "native_id": 21176, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 82.01531982421875, "incorrect_loss_raw": 146.70782979329428, "correct_loss_per_char": 0.5395744725277549, "incorrect_loss_per_char": 0.6522061844516669, "correct_loss_per_token": 2.3432948521205357, "incorrect_loss_per_token": 2.687396817421589, "correct_loss_uncond": -30.307662963867188, "incorrect_loss_uncond": -21.122823079427082}, "model_output": [{"sum_logits": -119.27027893066406, "num_tokens": 55, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -137.55320739746094, "logits_per_token": -2.1685505260120737, "logits_per_char": -0.5573377520124488, "num_chars": 214}, {"sum_logits": -131.11190795898438, "num_tokens": 50, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -145.16998291015625, "logits_per_token": -2.6222381591796875, "logits_per_char": -0.6184523960329452, "num_chars": 212}, {"sum_logits": -82.01531982421875, "num_tokens": 35, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -112.32298278808594, "logits_per_token": -2.3432948521205357, "logits_per_char": -0.5395744725277549, "num_chars": 152}, {"sum_logits": -189.74130249023438, "num_tokens": 58, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -220.76876831054688, "logits_per_token": -3.2714017670730065, "logits_per_char": -0.7808284053096065, "num_chars": 243}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 843, "native_id": 25751, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 80.90381622314453, "incorrect_loss_raw": 168.83487447102866, "correct_loss_per_char": 0.4963424308168376, "incorrect_loss_per_char": 0.8819214047775216, "correct_loss_per_token": 2.789786766315329, "incorrect_loss_per_token": 3.849781050248579, "correct_loss_uncond": -19.02678680419922, "incorrect_loss_uncond": -19.595659891764324}, "model_output": [{"sum_logits": -90.76936340332031, "num_tokens": 25, "num_tokens_all": 441, "is_greedy": false, "sum_logits_uncond": -108.7039566040039, "logits_per_token": -3.6307745361328125, "logits_per_char": -0.8104407446725028, "num_chars": 112}, {"sum_logits": -217.8751220703125, "num_tokens": 55, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -232.78192138671875, "logits_per_token": -3.9613658558238636, "logits_per_char": -0.9193043125329642, "num_chars": 237}, {"sum_logits": -80.90381622314453, "num_tokens": 29, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -99.93060302734375, "logits_per_token": -2.789786766315329, "logits_per_char": -0.4963424308168376, "num_chars": 163}, {"sum_logits": -197.86013793945312, "num_tokens": 50, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -223.80572509765625, "logits_per_token": -3.9572027587890624, "logits_per_char": -0.9160191571270978, "num_chars": 216}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 844, "native_id": 50193, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 111.4345703125, "incorrect_loss_raw": 121.52393341064453, "correct_loss_per_char": 0.5065207741477272, "incorrect_loss_per_char": 0.7981669455650254, "correct_loss_per_token": 2.2741749043367347, "incorrect_loss_per_token": 3.4444315569187567, "correct_loss_uncond": -42.665802001953125, "incorrect_loss_uncond": -17.676058451334637}, "model_output": [{"sum_logits": -111.4345703125, "num_tokens": 49, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -154.10037231445312, "logits_per_token": -2.2741749043367347, "logits_per_char": -0.5065207741477272, "num_chars": 220}, {"sum_logits": -116.47150421142578, "num_tokens": 34, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -140.43960571289062, "logits_per_token": -3.4256324768066406, "logits_per_char": -0.7371614190596568, "num_chars": 158}, {"sum_logits": -124.84925842285156, "num_tokens": 32, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -143.29705810546875, "logits_per_token": -3.9015393257141113, "logits_per_char": -0.9530477742202409, "num_chars": 131}, {"sum_logits": -123.25103759765625, "num_tokens": 41, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -133.86331176757812, "logits_per_token": -3.006122868235518, "logits_per_char": -0.7042916434151786, "num_chars": 175}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 845, "native_id": 6908, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 61.660457611083984, "incorrect_loss_raw": 100.83762105305989, "correct_loss_per_char": 0.6559623150115318, "incorrect_loss_per_char": 0.7258280984251603, "correct_loss_per_token": 2.9362122671944753, "incorrect_loss_per_token": 3.236520555815435, "correct_loss_uncond": -10.654422760009766, "incorrect_loss_uncond": -14.164103190104166}, "model_output": [{"sum_logits": -130.53170776367188, "num_tokens": 38, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -143.8538818359375, "logits_per_token": -3.43504494114926, "logits_per_char": -0.8107559488426824, "num_chars": 161}, {"sum_logits": -100.04344940185547, "num_tokens": 27, "num_tokens_all": 435, "is_greedy": false, "sum_logits_uncond": -109.39738464355469, "logits_per_token": -3.7053129408094616, "logits_per_char": -0.8133613772508574, "num_chars": 123}, {"sum_logits": -71.93770599365234, "num_tokens": 28, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -91.75390625, "logits_per_token": -2.5692037854875838, "logits_per_char": -0.5533669691819411, "num_chars": 130}, {"sum_logits": -61.660457611083984, "num_tokens": 21, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -72.31488037109375, "logits_per_token": -2.9362122671944753, "logits_per_char": -0.6559623150115318, "num_chars": 94}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 846, "native_id": 3207, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 113.83238220214844, "incorrect_loss_raw": 112.74808247884114, "correct_loss_per_char": 0.6120020548502604, "incorrect_loss_per_char": 0.6551394717248736, "correct_loss_per_token": 2.647264702375545, "incorrect_loss_per_token": 2.9036262341688546, "correct_loss_uncond": -8.879409790039062, "incorrect_loss_uncond": -19.919464111328125}, "model_output": [{"sum_logits": -109.62966918945312, "num_tokens": 44, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -125.21705627441406, "logits_per_token": -2.491583390669389, "logits_per_char": -0.5195718918931428, "num_chars": 211}, {"sum_logits": -113.83238220214844, "num_tokens": 43, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -122.7117919921875, "logits_per_token": -2.647264702375545, "logits_per_char": -0.6120020548502604, "num_chars": 186}, {"sum_logits": -133.8365936279297, "num_tokens": 39, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -150.6967010498047, "logits_per_token": -3.431707528921274, "logits_per_char": -0.6625573941976717, "num_chars": 202}, {"sum_logits": -94.77798461914062, "num_tokens": 34, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -122.08888244628906, "logits_per_token": -2.787587782915901, "logits_per_char": -0.7832891290838068, "num_chars": 121}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 847, "native_id": 43282, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 86.79061126708984, "incorrect_loss_raw": 110.07388305664062, "correct_loss_per_char": 0.5390721196713655, "incorrect_loss_per_char": 0.6050189406883576, "correct_loss_per_token": 2.4797317504882814, "incorrect_loss_per_token": 2.5023431875263884, "correct_loss_uncond": -44.38286590576172, "incorrect_loss_uncond": -18.955764770507812}, "model_output": [{"sum_logits": -123.52537536621094, "num_tokens": 45, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -136.32510375976562, "logits_per_token": -2.745008341471354, "logits_per_char": -0.7058592878069196, "num_chars": 175}, {"sum_logits": -67.05104064941406, "num_tokens": 29, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -81.36386108398438, "logits_per_token": -2.312104849979795, "logits_per_char": -0.532151116265191, "num_chars": 126}, {"sum_logits": -139.64523315429688, "num_tokens": 57, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -169.3999786376953, "logits_per_token": -2.4499163711280154, "logits_per_char": -0.5770464179929623, "num_chars": 242}, {"sum_logits": -86.79061126708984, "num_tokens": 35, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -131.17347717285156, "logits_per_token": -2.4797317504882814, "logits_per_char": -0.5390721196713655, "num_chars": 161}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 848, "native_id": 29654, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 49.266075134277344, "incorrect_loss_raw": 82.09092203776042, "correct_loss_per_char": 0.49266075134277343, "incorrect_loss_per_char": 0.5093087187648528, "correct_loss_per_token": 2.1420032667077105, "incorrect_loss_per_token": 2.366911350597035, "correct_loss_uncond": -23.37085723876953, "incorrect_loss_uncond": -18.857706705729168}, "model_output": [{"sum_logits": -49.266075134277344, "num_tokens": 23, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -72.63693237304688, "logits_per_token": -2.1420032667077105, "logits_per_char": -0.49266075134277343, "num_chars": 100}, {"sum_logits": -78.86565399169922, "num_tokens": 30, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -96.92018127441406, "logits_per_token": -2.6288551330566405, "logits_per_char": -0.5023290063165555, "num_chars": 157}, {"sum_logits": -51.372230529785156, "num_tokens": 28, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -80.25718688964844, "logits_per_token": -1.8347225189208984, "logits_per_char": -0.4756688012017144, "num_chars": 108}, {"sum_logits": -116.03488159179688, "num_tokens": 44, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -125.66851806640625, "logits_per_token": -2.6371563998135654, "logits_per_char": -0.5499283487762885, "num_chars": 211}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 849, "native_id": 14155, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 104.92051696777344, "incorrect_loss_raw": 111.86817677815755, "correct_loss_per_char": 0.549322078365306, "incorrect_loss_per_char": 0.7524041402600982, "correct_loss_per_token": 2.623012924194336, "incorrect_loss_per_token": 3.455725812100553, "correct_loss_uncond": -26.697097778320312, "incorrect_loss_uncond": -21.02062225341797}, "model_output": [{"sum_logits": -104.92051696777344, "num_tokens": 40, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -131.61761474609375, "logits_per_token": -2.623012924194336, "logits_per_char": -0.549322078365306, "num_chars": 191}, {"sum_logits": -82.62841796875, "num_tokens": 24, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -96.15864562988281, "logits_per_token": -3.4428507486979165, "logits_per_char": -0.7312249377765486, "num_chars": 113}, {"sum_logits": -116.06310272216797, "num_tokens": 36, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -151.61143493652344, "logits_per_token": -3.223975075615777, "logits_per_char": -0.6594494472850453, "num_chars": 176}, {"sum_logits": -136.9130096435547, "num_tokens": 37, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -150.8963165283203, "logits_per_token": -3.7003516119879647, "logits_per_char": -0.8665380357187006, "num_chars": 158}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 850, "native_id": 41482, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 26.9907283782959, "incorrect_loss_raw": 42.32565498352051, "correct_loss_per_char": 0.4574699725134898, "incorrect_loss_per_char": 0.8307035713865046, "correct_loss_per_token": 2.4537025798450816, "incorrect_loss_per_token": 3.6296857540424052, "correct_loss_uncond": -26.17804527282715, "incorrect_loss_uncond": -15.345972061157227}, "model_output": [{"sum_logits": -25.09624671936035, "num_tokens": 10, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -42.85960006713867, "logits_per_token": -2.5096246719360353, "logits_per_char": -0.6274061679840088, "num_chars": 40}, {"sum_logits": -26.9907283782959, "num_tokens": 11, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -53.16877365112305, "logits_per_token": -2.4537025798450816, "logits_per_char": -0.4574699725134898, "num_chars": 59}, {"sum_logits": -78.3743667602539, "num_tokens": 13, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -86.04713439941406, "logits_per_token": -6.028797443096455, "logits_per_char": -1.3749888905307703, "num_chars": 57}, {"sum_logits": -23.506351470947266, "num_tokens": 10, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -44.10814666748047, "logits_per_token": -2.3506351470947267, "logits_per_char": -0.4897156556447347, "num_chars": 48}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 851, "native_id": 43836, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 89.69200897216797, "incorrect_loss_raw": 118.98469543457031, "correct_loss_per_char": 0.41143123381728425, "incorrect_loss_per_char": 0.6958115660808025, "correct_loss_per_token": 1.9083406164291057, "incorrect_loss_per_token": 3.2547270085147013, "correct_loss_uncond": -7.283729553222656, "incorrect_loss_uncond": -11.5645751953125}, "model_output": [{"sum_logits": -89.69200897216797, "num_tokens": 47, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -96.97573852539062, "logits_per_token": -1.9083406164291057, "logits_per_char": -0.41143123381728425, "num_chars": 218}, {"sum_logits": -130.94622802734375, "num_tokens": 36, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -149.42710876464844, "logits_per_token": -3.637395222981771, "logits_per_char": -0.8235611825619104, "num_chars": 159}, {"sum_logits": -103.42781829833984, "num_tokens": 38, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -110.38838195800781, "logits_per_token": -2.7217846920615747, "logits_per_char": -0.60484104268035, "num_chars": 171}, {"sum_logits": -122.58003997802734, "num_tokens": 36, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -131.8323211669922, "logits_per_token": -3.4050011105007596, "logits_per_char": -0.659032473000147, "num_chars": 186}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 852, "native_id": 6573, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 62.47660827636719, "incorrect_loss_raw": 92.13742319742839, "correct_loss_per_char": 0.4110303176076789, "incorrect_loss_per_char": 0.5390441713894047, "correct_loss_per_token": 2.0825536092122396, "incorrect_loss_per_token": 2.5556235429012415, "correct_loss_uncond": -18.930343627929688, "incorrect_loss_uncond": -14.301422119140625}, "model_output": [{"sum_logits": -62.47660827636719, "num_tokens": 30, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -81.40695190429688, "logits_per_token": -2.0825536092122396, "logits_per_char": -0.4110303176076789, "num_chars": 152}, {"sum_logits": -75.80513000488281, "num_tokens": 25, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -89.78477478027344, "logits_per_token": -3.0322052001953126, "logits_per_char": -0.5968907874400221, "num_chars": 127}, {"sum_logits": -125.06006622314453, "num_tokens": 55, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -141.81243896484375, "logits_per_token": -2.2738193858753553, "logits_per_char": -0.5063160575835811, "num_chars": 247}, {"sum_logits": -75.54707336425781, "num_tokens": 32, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -87.71932220458984, "logits_per_token": -2.3608460426330566, "logits_per_char": -0.513925669144611, "num_chars": 147}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 853, "native_id": 30646, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 93.02859497070312, "incorrect_loss_raw": 99.3903071085612, "correct_loss_per_char": 0.48706070665289597, "incorrect_loss_per_char": 0.5703981943002215, "correct_loss_per_token": 2.114286249334162, "incorrect_loss_per_token": 2.7685998952275312, "correct_loss_uncond": -31.830406188964844, "incorrect_loss_uncond": -25.25330352783203}, "model_output": [{"sum_logits": -99.033447265625, "num_tokens": 34, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -126.04060363769531, "logits_per_token": -2.9127484489889706, "logits_per_char": -0.5501858181423611, "num_chars": 180}, {"sum_logits": -93.02859497070312, "num_tokens": 44, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -124.85900115966797, "logits_per_token": -2.114286249334162, "logits_per_char": -0.48706070665289597, "num_chars": 191}, {"sum_logits": -94.76496124267578, "num_tokens": 38, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -120.53787994384766, "logits_per_token": -2.4938147695440995, "logits_per_char": -0.5206866002344823, "num_chars": 182}, {"sum_logits": -104.37251281738281, "num_tokens": 36, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -127.35234832763672, "logits_per_token": -2.8992364671495228, "logits_per_char": -0.640322164523821, "num_chars": 163}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 854, "native_id": 7745, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 32.70025634765625, "incorrect_loss_raw": 117.92419942220052, "correct_loss_per_char": 0.23868800253763686, "incorrect_loss_per_char": 0.6363890693788166, "correct_loss_per_token": 1.0218830108642578, "incorrect_loss_per_token": 2.7773727510545823, "correct_loss_uncond": -53.778564453125, "incorrect_loss_uncond": -25.0806147257487}, "model_output": [{"sum_logits": -32.70025634765625, "num_tokens": 32, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -86.47882080078125, "logits_per_token": -1.0218830108642578, "logits_per_char": -0.23868800253763686, "num_chars": 137}, {"sum_logits": -148.2376251220703, "num_tokens": 45, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -173.56724548339844, "logits_per_token": -3.294169447157118, "logits_per_char": -0.7126808900099534, "num_chars": 208}, {"sum_logits": -83.48014831542969, "num_tokens": 48, "num_tokens_all": 494, "is_greedy": false, "sum_logits_uncond": -123.0731430053711, "logits_per_token": -1.739169756571452, "logits_per_char": -0.46121628903552314, "num_chars": 181}, {"sum_logits": -122.05482482910156, "num_tokens": 37, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -132.37405395507812, "logits_per_token": -3.2987790494351774, "logits_per_char": -0.7352700290909733, "num_chars": 166}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 855, "native_id": 9920, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 78.42921447753906, "incorrect_loss_raw": 90.30416361490886, "correct_loss_per_char": 0.3788850940943916, "incorrect_loss_per_char": 0.6472517711151512, "correct_loss_per_token": 1.6339419682820637, "incorrect_loss_per_token": 2.542350995514647, "correct_loss_uncond": -23.99042510986328, "incorrect_loss_uncond": -15.006935119628906}, "model_output": [{"sum_logits": -79.64945983886719, "num_tokens": 34, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -90.56409454345703, "logits_per_token": -2.3426311717313877, "logits_per_char": -0.5418330601283482, "num_chars": 147}, {"sum_logits": -111.04832458496094, "num_tokens": 48, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -133.73876953125, "logits_per_token": -2.313506762186686, "logits_per_char": -0.5814048407589578, "num_chars": 191}, {"sum_logits": -78.42921447753906, "num_tokens": 48, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -102.41963958740234, "logits_per_token": -1.6339419682820637, "logits_per_char": -0.3788850940943916, "num_chars": 207}, {"sum_logits": -80.21470642089844, "num_tokens": 27, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -91.63043212890625, "logits_per_token": -2.970915052625868, "logits_per_char": -0.8185174124581474, "num_chars": 98}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 856, "native_id": 22308, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 42.312583923339844, "incorrect_loss_raw": 48.560769399007164, "correct_loss_per_char": 0.3917831844753689, "incorrect_loss_per_char": 0.738950272298443, "correct_loss_per_token": 1.7630243301391602, "incorrect_loss_per_token": 3.273023650759742, "correct_loss_uncond": -38.137420654296875, "incorrect_loss_uncond": -18.874712626139324}, "model_output": [{"sum_logits": -54.519981384277344, "num_tokens": 14, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -60.1448860168457, "logits_per_token": -3.8942843845912387, "logits_per_char": -0.8937701866274974, "num_chars": 61}, {"sum_logits": -54.51387405395508, "num_tokens": 15, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -71.43859100341797, "logits_per_token": -3.634258270263672, "logits_per_char": -0.7678010430134518, "num_chars": 71}, {"sum_logits": -36.64845275878906, "num_tokens": 16, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -70.72296905517578, "logits_per_token": -2.2905282974243164, "logits_per_char": -0.5552795872543798, "num_chars": 66}, {"sum_logits": -42.312583923339844, "num_tokens": 24, "num_tokens_all": 427, "is_greedy": false, "sum_logits_uncond": -80.45000457763672, "logits_per_token": -1.7630243301391602, "logits_per_char": -0.3917831844753689, "num_chars": 108}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 857, "native_id": 40789, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 34.82168960571289, "incorrect_loss_raw": 50.91399129231771, "correct_loss_per_char": 0.5616401549308531, "incorrect_loss_per_char": 0.6785724238960272, "correct_loss_per_token": 2.3214459737141926, "incorrect_loss_per_token": 2.997542828133397, "correct_loss_uncond": -43.03054428100586, "incorrect_loss_uncond": -30.875203450520832}, "model_output": [{"sum_logits": -37.72776794433594, "num_tokens": 11, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -55.84455871582031, "logits_per_token": -3.4297970858487217, "logits_per_char": -0.820168868355129, "num_chars": 46}, {"sum_logits": -61.421852111816406, "num_tokens": 19, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -101.78258514404297, "logits_per_token": -3.232729058516653, "logits_per_char": -0.69013316979569, "num_chars": 89}, {"sum_logits": -53.59235382080078, "num_tokens": 23, "num_tokens_all": 434, "is_greedy": false, "sum_logits_uncond": -87.74044036865234, "logits_per_token": -2.3301023400348164, "logits_per_char": -0.5254152335372626, "num_chars": 102}, {"sum_logits": -34.82168960571289, "num_tokens": 15, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -77.85223388671875, "logits_per_token": -2.3214459737141926, "logits_per_char": -0.5616401549308531, "num_chars": 62}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 858, "native_id": 39317, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 81.37158966064453, "incorrect_loss_raw": 120.0392837524414, "correct_loss_per_char": 0.45972649525787873, "incorrect_loss_per_char": 0.7239018609849172, "correct_loss_per_token": 1.8923625502475472, "incorrect_loss_per_token": 3.039939095746023, "correct_loss_uncond": -22.709083557128906, "incorrect_loss_uncond": -14.767448425292969}, "model_output": [{"sum_logits": -105.78211975097656, "num_tokens": 30, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -120.37931823730469, "logits_per_token": -3.5260706583658856, "logits_per_char": -0.8137086134690504, "num_chars": 130}, {"sum_logits": -81.37158966064453, "num_tokens": 43, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -104.08067321777344, "logits_per_token": -1.8923625502475472, "logits_per_char": -0.45972649525787873, "num_chars": 177}, {"sum_logits": -132.5244140625, "num_tokens": 48, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -143.5751495361328, "logits_per_token": -2.76092529296875, "logits_per_char": -0.7202413807744565, "num_chars": 184}, {"sum_logits": -121.81131744384766, "num_tokens": 43, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -140.46572875976562, "logits_per_token": -2.832821335903434, "logits_per_char": -0.6377555887112443, "num_chars": 191}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 859, "native_id": 35455, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 102.292724609375, "incorrect_loss_raw": 127.22838083902995, "correct_loss_per_char": 0.4780033860251168, "incorrect_loss_per_char": 0.6178046060273945, "correct_loss_per_token": 1.7946092036732457, "incorrect_loss_per_token": 3.1602102092783966, "correct_loss_uncond": -29.97100830078125, "incorrect_loss_uncond": -22.072919209798176}, "model_output": [{"sum_logits": -102.292724609375, "num_tokens": 57, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -132.26373291015625, "logits_per_token": -1.7946092036732457, "logits_per_char": -0.4780033860251168, "num_chars": 214}, {"sum_logits": -137.68588256835938, "num_tokens": 37, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -170.35906982421875, "logits_per_token": -3.7212400694151184, "logits_per_char": -0.7402466804750504, "num_chars": 186}, {"sum_logits": -180.39511108398438, "num_tokens": 53, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -190.14041137695312, "logits_per_token": -3.4036813412072524, "logits_per_char": -0.6489032772805193, "num_chars": 278}, {"sum_logits": -63.604148864746094, "num_tokens": 27, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -87.4044189453125, "logits_per_token": -2.3557092172128185, "logits_per_char": -0.4642638603266138, "num_chars": 137}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 860, "native_id": 10823, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 32.691627502441406, "incorrect_loss_raw": 21.230370839436848, "correct_loss_per_char": 0.6054005093044705, "incorrect_loss_per_char": 0.8842708587646485, "correct_loss_per_token": 2.5147405771108775, "incorrect_loss_per_token": 3.6541221618652346, "correct_loss_uncond": -24.85808563232422, "incorrect_loss_uncond": -11.756790161132812}, "model_output": [{"sum_logits": -29.01397705078125, "num_tokens": 6, "num_tokens_all": 390, "is_greedy": false, "sum_logits_uncond": -41.06349563598633, "logits_per_token": -4.835662841796875, "logits_per_char": -1.2089157104492188, "num_chars": 24}, {"sum_logits": -24.261703491210938, "num_tokens": 6, "num_tokens_all": 390, "is_greedy": false, "sum_logits_uncond": -33.23916244506836, "logits_per_token": -4.043617248535156, "logits_per_char": -0.9704681396484375, "num_chars": 25}, {"sum_logits": -10.41543197631836, "num_tokens": 5, "num_tokens_all": 389, "is_greedy": false, "sum_logits_uncond": -24.658824920654297, "logits_per_token": -2.083086395263672, "logits_per_char": -0.47342872619628906, "num_chars": 22}, {"sum_logits": -32.691627502441406, "num_tokens": 13, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -57.549713134765625, "logits_per_token": -2.5147405771108775, "logits_per_char": -0.6054005093044705, "num_chars": 54}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 861, "native_id": 13485, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 37.947547912597656, "incorrect_loss_raw": 54.7635440826416, "correct_loss_per_char": 0.41247334687606146, "incorrect_loss_per_char": 0.5893819088168563, "correct_loss_per_token": 1.8973773956298827, "incorrect_loss_per_token": 2.6635132432362387, "correct_loss_uncond": -35.43585205078125, "incorrect_loss_uncond": -27.769559860229492}, "model_output": [{"sum_logits": -37.947547912597656, "num_tokens": 20, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -73.3833999633789, "logits_per_token": -1.8973773956298827, "logits_per_char": -0.41247334687606146, "num_chars": 92}, {"sum_logits": -90.65476989746094, "num_tokens": 25, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -116.72776794433594, "logits_per_token": -3.6261907958984376, "logits_per_char": -0.6867785598292495, "num_chars": 132}, {"sum_logits": -24.020620346069336, "num_tokens": 12, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -43.018943786621094, "logits_per_token": -2.001718362172445, "logits_per_char": -0.5110770286397731, "num_chars": 47}, {"sum_logits": -49.61524200439453, "num_tokens": 21, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -87.85260009765625, "logits_per_token": -2.3626305716378346, "logits_per_char": -0.5702901379815464, "num_chars": 87}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 862, "native_id": 12697, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 152.32940673828125, "incorrect_loss_raw": 92.7258415222168, "correct_loss_per_char": 0.5326203032807036, "incorrect_loss_per_char": 0.6980855484359894, "correct_loss_per_token": 2.5818543514962924, "incorrect_loss_per_token": 3.1413385497199164, "correct_loss_uncond": -34.02508544921875, "incorrect_loss_uncond": -14.703734079996744}, "model_output": [{"sum_logits": -60.83377456665039, "num_tokens": 20, "num_tokens_all": 442, "is_greedy": false, "sum_logits_uncond": -75.1722640991211, "logits_per_token": -3.0416887283325194, "logits_per_char": -0.6612366800722869, "num_chars": 92}, {"sum_logits": -152.32940673828125, "num_tokens": 59, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -186.3544921875, "logits_per_token": -2.5818543514962924, "logits_per_char": -0.5326203032807036, "num_chars": 286}, {"sum_logits": -99.36015319824219, "num_tokens": 32, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -110.85803985595703, "logits_per_token": -3.1050047874450684, "logits_per_char": -0.747068821039415, "num_chars": 133}, {"sum_logits": -117.98359680175781, "num_tokens": 36, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -136.2584228515625, "logits_per_token": -3.2773221333821616, "logits_per_char": -0.6859511441962663, "num_chars": 172}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 863, "native_id": 1810, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 81.37322235107422, "incorrect_loss_raw": 111.09830983479817, "correct_loss_per_char": 0.5650918218824599, "incorrect_loss_per_char": 0.5642912419392471, "correct_loss_per_token": 2.7124407450358072, "incorrect_loss_per_token": 2.980794602716905, "correct_loss_uncond": -14.608451843261719, "incorrect_loss_uncond": -18.462748209635418}, "model_output": [{"sum_logits": -78.0108413696289, "num_tokens": 35, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -95.53369903564453, "logits_per_token": -2.228881181989397, "logits_per_char": -0.49063422245049626, "num_chars": 159}, {"sum_logits": -121.03651428222656, "num_tokens": 37, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -146.53659057617188, "logits_per_token": -3.27125714276288, "logits_per_char": -0.608223689860435, "num_chars": 199}, {"sum_logits": -134.24757385253906, "num_tokens": 39, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -146.61288452148438, "logits_per_token": -3.4422454833984375, "logits_per_char": -0.59401581350681, "num_chars": 226}, {"sum_logits": -81.37322235107422, "num_tokens": 30, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -95.98167419433594, "logits_per_token": -2.7124407450358072, "logits_per_char": -0.5650918218824599, "num_chars": 144}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 864, "native_id": 21982, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 100.25233459472656, "incorrect_loss_raw": 121.30502319335938, "correct_loss_per_char": 0.4108702237488794, "incorrect_loss_per_char": 0.6949523392346301, "correct_loss_per_token": 1.9279295114370494, "incorrect_loss_per_token": 3.220956279788863, "correct_loss_uncond": -36.46708679199219, "incorrect_loss_uncond": -8.469975789388021}, "model_output": [{"sum_logits": -103.86871337890625, "num_tokens": 35, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -114.59877014160156, "logits_per_token": -2.967677525111607, "logits_per_char": -0.6658250857622195, "num_chars": 156}, {"sum_logits": -88.06210327148438, "num_tokens": 23, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -90.20208740234375, "logits_per_token": -3.8287870987601904, "logits_per_char": -0.7724745901007402, "num_chars": 114}, {"sum_logits": -100.25233459472656, "num_tokens": 52, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -136.71942138671875, "logits_per_token": -1.9279295114370494, "logits_per_char": -0.4108702237488794, "num_chars": 244}, {"sum_logits": -171.9842529296875, "num_tokens": 60, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -184.52413940429688, "logits_per_token": -2.8664042154947915, "logits_per_char": -0.6465573418409305, "num_chars": 266}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 865, "native_id": 33157, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 43.64569854736328, "incorrect_loss_raw": 32.696075439453125, "correct_loss_per_char": 0.6325463557588882, "incorrect_loss_per_char": 0.6242107187583797, "correct_loss_per_token": 2.727856159210205, "incorrect_loss_per_token": 2.9782689412434897, "correct_loss_uncond": -22.310142517089844, "incorrect_loss_uncond": -18.440715789794922}, "model_output": [{"sum_logits": -43.64569854736328, "num_tokens": 16, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -65.95584106445312, "logits_per_token": -2.727856159210205, "logits_per_char": -0.6325463557588882, "num_chars": 69}, {"sum_logits": -30.622833251953125, "num_tokens": 12, "num_tokens_all": 395, "is_greedy": false, "sum_logits_uncond": -47.625038146972656, "logits_per_token": -2.5519027709960938, "logits_per_char": -0.5190310720670022, "num_chars": 59}, {"sum_logits": -27.46551513671875, "num_tokens": 10, "num_tokens_all": 393, "is_greedy": false, "sum_logits_uncond": -47.640052795410156, "logits_per_token": -2.746551513671875, "logits_per_char": -0.5843726624833777, "num_chars": 47}, {"sum_logits": -39.9998779296875, "num_tokens": 11, "num_tokens_all": 394, "is_greedy": false, "sum_logits_uncond": -58.14528274536133, "logits_per_token": -3.6363525390625, "logits_per_char": -0.7692284217247596, "num_chars": 52}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 866, "native_id": 25756, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 106.53678894042969, "incorrect_loss_raw": 98.53966013590495, "correct_loss_per_char": 0.6417878851833114, "incorrect_loss_per_char": 0.5965897044264201, "correct_loss_per_token": 2.5984582668397485, "incorrect_loss_per_token": 2.54033781217595, "correct_loss_uncond": -11.878501892089844, "incorrect_loss_uncond": -20.73021189371745}, "model_output": [{"sum_logits": -106.53678894042969, "num_tokens": 41, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -118.41529083251953, "logits_per_token": -2.5984582668397485, "logits_per_char": -0.6417878851833114, "num_chars": 166}, {"sum_logits": -63.18268585205078, "num_tokens": 38, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -96.86396789550781, "logits_per_token": -1.6627022592644942, "logits_per_char": -0.4129587310591554, "num_chars": 153}, {"sum_logits": -98.65400695800781, "num_tokens": 44, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -117.33746337890625, "logits_per_token": -2.242136521772905, "logits_per_char": -0.5192316155684622, "num_chars": 190}, {"sum_logits": -133.78228759765625, "num_tokens": 36, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -143.60818481445312, "logits_per_token": -3.7161746554904513, "logits_per_char": -0.8575787666516427, "num_chars": 156}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 867, "native_id": 46150, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 69.63655090332031, "incorrect_loss_raw": 108.96750386555989, "correct_loss_per_char": 0.449268070344002, "incorrect_loss_per_char": 0.5242019308529792, "correct_loss_per_token": 2.24634035172001, "incorrect_loss_per_token": 2.48404358174512, "correct_loss_uncond": -34.45790100097656, "incorrect_loss_uncond": -18.382898966471355}, "model_output": [{"sum_logits": -131.9605255126953, "num_tokens": 48, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -148.25318908691406, "logits_per_token": -2.749177614847819, "logits_per_char": -0.578774234704804, "num_chars": 228}, {"sum_logits": -69.63655090332031, "num_tokens": 31, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -104.09445190429688, "logits_per_token": -2.24634035172001, "logits_per_char": -0.449268070344002, "num_chars": 155}, {"sum_logits": -105.86386108398438, "num_tokens": 41, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -124.0316162109375, "logits_per_token": -2.5820453922923017, "logits_per_char": -0.5346659650706281, "num_chars": 198}, {"sum_logits": -89.078125, "num_tokens": 42, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -109.76640319824219, "logits_per_token": -2.120907738095238, "logits_per_char": -0.45916559278350516, "num_chars": 194}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 868, "native_id": 16534, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 13.385505676269531, "incorrect_loss_raw": 25.806601842244465, "correct_loss_per_char": 0.3617704236829603, "incorrect_loss_per_char": 0.48278017208076296, "correct_loss_per_token": 1.3385505676269531, "incorrect_loss_per_token": 1.9617655210508758, "correct_loss_uncond": -30.131370544433594, "incorrect_loss_uncond": -28.17609977722168}, "model_output": [{"sum_logits": -34.65773010253906, "num_tokens": 14, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -61.49188995361328, "logits_per_token": -2.4755521501813615, "logits_per_char": -0.5681595098776896, "num_chars": 61}, {"sum_logits": -19.705825805664062, "num_tokens": 15, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -51.180564880371094, "logits_per_token": -1.3137217203776042, "logits_per_char": -0.37895818857046276, "num_chars": 52}, {"sum_logits": -23.056249618530273, "num_tokens": 11, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -49.27565002441406, "logits_per_token": -2.096022692593661, "logits_per_char": -0.5012228177941364, "num_chars": 46}, {"sum_logits": -13.385505676269531, "num_tokens": 10, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -43.516876220703125, "logits_per_token": -1.3385505676269531, "logits_per_char": -0.3617704236829603, "num_chars": 37}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 869, "native_id": 47781, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 10.17495346069336, "incorrect_loss_raw": 42.8361562093099, "correct_loss_per_char": 0.2826375961303711, "incorrect_loss_per_char": 0.6842173607524895, "correct_loss_per_token": 1.017495346069336, "incorrect_loss_per_token": 3.294781805288912, "correct_loss_uncond": -28.088550567626953, "incorrect_loss_uncond": -21.070582071940105}, "model_output": [{"sum_logits": -31.555580139160156, "num_tokens": 11, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -55.12689208984375, "logits_per_token": -2.868689103560014, "logits_per_char": -0.5008822244311136, "num_chars": 63}, {"sum_logits": -51.06005096435547, "num_tokens": 16, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -79.81285095214844, "logits_per_token": -3.191253185272217, "logits_per_char": -0.6151813369199454, "num_chars": 83}, {"sum_logits": -45.89283752441406, "num_tokens": 12, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -56.78047180175781, "logits_per_token": -3.8244031270345054, "logits_per_char": -0.9365885209064094, "num_chars": 49}, {"sum_logits": -10.17495346069336, "num_tokens": 10, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -38.26350402832031, "logits_per_token": -1.017495346069336, "logits_per_char": -0.2826375961303711, "num_chars": 36}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 870, "native_id": 38412, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 18.74245834350586, "incorrect_loss_raw": 30.214139302571613, "correct_loss_per_char": 0.4932225879869963, "incorrect_loss_per_char": 0.7348733593719174, "correct_loss_per_token": 1.874245834350586, "incorrect_loss_per_token": 2.92262382996388, "correct_loss_uncond": -21.52847671508789, "incorrect_loss_uncond": -15.407552083333334}, "model_output": [{"sum_logits": -17.84515380859375, "num_tokens": 6, "num_tokens_all": 394, "is_greedy": false, "sum_logits_uncond": -33.26704406738281, "logits_per_token": -2.9741923014322915, "logits_per_char": -0.7435480753580729, "num_chars": 24}, {"sum_logits": -30.246784210205078, "num_tokens": 12, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -45.54985046386719, "logits_per_token": -2.5205653508504233, "logits_per_char": -0.6874269138682972, "num_chars": 44}, {"sum_logits": -42.550479888916016, "num_tokens": 13, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -58.048179626464844, "logits_per_token": -3.2731138376089244, "logits_per_char": -0.7736450888893821, "num_chars": 55}, {"sum_logits": -18.74245834350586, "num_tokens": 10, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -40.27093505859375, "logits_per_token": -1.874245834350586, "logits_per_char": -0.4932225879869963, "num_chars": 38}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 871, "native_id": 11514, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 213.5147247314453, "incorrect_loss_raw": 74.85035959879558, "correct_loss_per_char": 0.9447554191656872, "incorrect_loss_per_char": 0.6218274986486618, "correct_loss_per_token": 3.6188936395160223, "incorrect_loss_per_token": 2.3602050925578086, "correct_loss_uncond": -19.994491577148438, "incorrect_loss_uncond": -28.08489990234375}, "model_output": [{"sum_logits": -213.5147247314453, "num_tokens": 59, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -233.50921630859375, "logits_per_token": -3.6188936395160223, "logits_per_char": -0.9447554191656872, "num_chars": 226}, {"sum_logits": -53.0053596496582, "num_tokens": 23, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -76.81230926513672, "logits_per_token": -2.3045808543329653, "logits_per_char": -0.5761452135832413, "num_chars": 92}, {"sum_logits": -58.65658950805664, "num_tokens": 29, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -97.36309814453125, "logits_per_token": -2.0226410175191947, "logits_per_char": -0.5924908031116832, "num_chars": 99}, {"sum_logits": -112.88912963867188, "num_tokens": 41, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -134.63037109375, "logits_per_token": -2.7533934058212655, "logits_per_char": -0.6968464792510609, "num_chars": 162}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 872, "native_id": 21286, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 34.43442916870117, "incorrect_loss_raw": 92.63951365152995, "correct_loss_per_char": 0.3957980364218526, "incorrect_loss_per_char": 0.5810237839540529, "correct_loss_per_token": 1.4971490942913552, "incorrect_loss_per_token": 2.165810567473049, "correct_loss_uncond": -39.23587417602539, "incorrect_loss_uncond": -21.2123540242513}, "model_output": [{"sum_logits": -34.43442916870117, "num_tokens": 23, "num_tokens_all": 440, "is_greedy": false, "sum_logits_uncond": -73.67030334472656, "logits_per_token": -1.4971490942913552, "logits_per_char": -0.3957980364218526, "num_chars": 87}, {"sum_logits": -72.90731048583984, "num_tokens": 47, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -100.15845489501953, "logits_per_token": -1.5512193720391456, "logits_per_char": -0.455670690536499, "num_chars": 160}, {"sum_logits": -109.00523376464844, "num_tokens": 37, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -119.93421173095703, "logits_per_token": -2.9460873990445524, "logits_per_char": -0.7569807900322808, "num_chars": 144}, {"sum_logits": -96.00599670410156, "num_tokens": 48, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -121.46293640136719, "logits_per_token": -2.000124931335449, "logits_per_char": -0.5304198712933788, "num_chars": 181}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 873, "native_id": 11731, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 53.95458221435547, "incorrect_loss_raw": 65.13318125406902, "correct_loss_per_char": 0.5620268980662028, "incorrect_loss_per_char": 0.573431572918431, "correct_loss_per_token": 2.45248100974343, "incorrect_loss_per_token": 2.6280353274440493, "correct_loss_uncond": -22.62206268310547, "incorrect_loss_uncond": -20.172581990559895}, "model_output": [{"sum_logits": -75.88800811767578, "num_tokens": 26, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -91.0661849975586, "logits_per_token": -2.91876954298753, "logits_per_char": -0.6169756757534617, "num_chars": 123}, {"sum_logits": -75.33869934082031, "num_tokens": 30, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -98.07177734375, "logits_per_token": -2.5112899780273437, "logits_per_char": -0.6125097507383765, "num_chars": 123}, {"sum_logits": -44.17283630371094, "num_tokens": 18, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -66.77932739257812, "logits_per_token": -2.454046461317274, "logits_per_char": -0.4908092922634549, "num_chars": 90}, {"sum_logits": -53.95458221435547, "num_tokens": 22, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -76.57664489746094, "logits_per_token": -2.45248100974343, "logits_per_char": -0.5620268980662028, "num_chars": 96}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 874, "native_id": 45249, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 28.82851219177246, "incorrect_loss_raw": 40.41508706410726, "correct_loss_per_char": 0.6704305160877316, "incorrect_loss_per_char": 0.7852253718682897, "correct_loss_per_token": 2.882851219177246, "incorrect_loss_per_token": 3.325661487680263, "correct_loss_uncond": -19.46095848083496, "incorrect_loss_uncond": -14.231078147888184}, "model_output": [{"sum_logits": -28.82851219177246, "num_tokens": 10, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -48.28947067260742, "logits_per_token": -2.882851219177246, "logits_per_char": -0.6704305160877316, "num_chars": 43}, {"sum_logits": -27.88192367553711, "num_tokens": 6, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -39.656185150146484, "logits_per_token": -4.646987279256185, "logits_per_char": -1.1617468198140461, "num_chars": 24}, {"sum_logits": -13.924952507019043, "num_tokens": 9, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -34.41255187988281, "logits_per_token": -1.547216945224338, "logits_per_char": -0.3664461186057643, "num_chars": 38}, {"sum_logits": -79.43838500976562, "num_tokens": 21, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -89.86975860595703, "logits_per_token": -3.782780238560268, "logits_per_char": -0.8274831771850586, "num_chars": 96}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 875, "native_id": 46170, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 65.1805191040039, "incorrect_loss_raw": 129.5297368367513, "correct_loss_per_char": 0.5173057071746342, "incorrect_loss_per_char": 0.6779324222202708, "correct_loss_per_token": 2.3278756822858537, "incorrect_loss_per_token": 2.787382591295542, "correct_loss_uncond": -32.99505615234375, "incorrect_loss_uncond": -28.130348205566406}, "model_output": [{"sum_logits": -173.05519104003906, "num_tokens": 53, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -201.52833557128906, "logits_per_token": -3.265192283774322, "logits_per_char": -0.7974893596315165, "num_chars": 217}, {"sum_logits": -110.63182067871094, "num_tokens": 40, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -143.83551025390625, "logits_per_token": -2.7657955169677733, "logits_per_char": -0.646969711571409, "num_chars": 171}, {"sum_logits": -65.1805191040039, "num_tokens": 28, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -98.17557525634766, "logits_per_token": -2.3278756822858537, "logits_per_char": -0.5173057071746342, "num_chars": 126}, {"sum_logits": -104.9021987915039, "num_tokens": 45, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -127.61640930175781, "logits_per_token": -2.331159973144531, "logits_per_char": -0.5893381954578871, "num_chars": 178}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 876, "native_id": 41310, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 152.56219482421875, "incorrect_loss_raw": 122.86204020182292, "correct_loss_per_char": 0.46798219271232744, "incorrect_loss_per_char": 0.5291139296634112, "correct_loss_per_token": 2.4606805616809475, "incorrect_loss_per_token": 2.485048151935364, "correct_loss_uncond": -28.565444946289062, "incorrect_loss_uncond": -14.825241088867188}, "model_output": [{"sum_logits": -90.77081298828125, "num_tokens": 43, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -113.92904663085938, "logits_per_token": -2.1109491392623547, "logits_per_char": -0.449360460338026, "num_chars": 202}, {"sum_logits": -184.20660400390625, "num_tokens": 69, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -194.55999755859375, "logits_per_token": -2.6696609275928442, "logits_per_char": -0.5774501692912422, "num_chars": 319}, {"sum_logits": -152.56219482421875, "num_tokens": 62, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -181.1276397705078, "logits_per_token": -2.4606805616809475, "logits_per_char": -0.46798219271232744, "num_chars": 326}, {"sum_logits": -93.60870361328125, "num_tokens": 35, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -104.57279968261719, "logits_per_token": -2.674534388950893, "logits_per_char": -0.5605311593609655, "num_chars": 167}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 877, "native_id": 28926, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 62.115047454833984, "incorrect_loss_raw": 124.56817626953125, "correct_loss_per_char": 0.48151199577390685, "incorrect_loss_per_char": 0.6294720765487613, "correct_loss_per_token": 1.941095232963562, "incorrect_loss_per_token": 3.034722406991056, "correct_loss_uncond": -28.942058563232422, "incorrect_loss_uncond": -21.187937418619793}, "model_output": [{"sum_logits": -90.54301452636719, "num_tokens": 34, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -111.99723815917969, "logits_per_token": -2.6630298390108, "logits_per_char": -0.6287709342108833, "num_chars": 144}, {"sum_logits": -179.77194213867188, "num_tokens": 56, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -200.91964721679688, "logits_per_token": -3.2102132524762834, "logits_per_char": -0.6093964140293961, "num_chars": 295}, {"sum_logits": -103.38957214355469, "num_tokens": 32, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -124.35145568847656, "logits_per_token": -3.230924129486084, "logits_per_char": -0.6502488814060043, "num_chars": 159}, {"sum_logits": -62.115047454833984, "num_tokens": 32, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -91.0571060180664, "logits_per_token": -1.941095232963562, "logits_per_char": -0.48151199577390685, "num_chars": 129}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 878, "native_id": 32910, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 70.55619812011719, "incorrect_loss_raw": 99.50991821289062, "correct_loss_per_char": 0.4465582159501088, "incorrect_loss_per_char": 0.6632907574494639, "correct_loss_per_token": 2.138066609700521, "incorrect_loss_per_token": 2.8597654445196987, "correct_loss_uncond": -22.640380859375, "incorrect_loss_uncond": -22.339459737141926}, "model_output": [{"sum_logits": -138.92771911621094, "num_tokens": 50, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -165.6234893798828, "logits_per_token": -2.7785543823242187, "logits_per_char": -0.5886767759161481, "num_chars": 236}, {"sum_logits": -84.25402069091797, "num_tokens": 25, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -103.92993927001953, "logits_per_token": -3.3701608276367185, "logits_per_char": -0.8260198106952742, "num_chars": 102}, {"sum_logits": -75.34801483154297, "num_tokens": 31, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -95.99470520019531, "logits_per_token": -2.4305811235981603, "logits_per_char": -0.5751756857369692, "num_chars": 131}, {"sum_logits": -70.55619812011719, "num_tokens": 33, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -93.19657897949219, "logits_per_token": -2.138066609700521, "logits_per_char": -0.4465582159501088, "num_chars": 158}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 879, "native_id": 11862, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 20.491477966308594, "incorrect_loss_raw": 45.122659047444664, "correct_loss_per_char": 0.7066026884933998, "incorrect_loss_per_char": 0.5814077227092157, "correct_loss_per_token": 4.098295593261719, "incorrect_loss_per_token": 2.914898705860925, "correct_loss_uncond": -15.3779296875, "incorrect_loss_uncond": -22.501725514729817}, "model_output": [{"sum_logits": -26.643688201904297, "num_tokens": 9, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -41.67227554321289, "logits_per_token": -2.9604098002115884, "logits_per_char": -0.6055383682250977, "num_chars": 44}, {"sum_logits": -83.23284149169922, "num_tokens": 21, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -108.41470336914062, "logits_per_token": -3.9634686424618675, "logits_per_char": -0.8160082499186198, "num_chars": 102}, {"sum_logits": -25.49144744873047, "num_tokens": 14, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -52.78617477416992, "logits_per_token": -1.8208176749093192, "logits_per_char": -0.32267654998392997, "num_chars": 79}, {"sum_logits": -20.491477966308594, "num_tokens": 5, "num_tokens_all": 402, "is_greedy": false, "sum_logits_uncond": -35.869407653808594, "logits_per_token": -4.098295593261719, "logits_per_char": -0.7066026884933998, "num_chars": 29}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 880, "native_id": 5533, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 100.7186279296875, "incorrect_loss_raw": 78.60727437337239, "correct_loss_per_char": 0.5995156424386161, "incorrect_loss_per_char": 0.5926688987341288, "correct_loss_per_token": 3.1474571228027344, "incorrect_loss_per_token": 2.647346364044221, "correct_loss_uncond": -11.082374572753906, "incorrect_loss_uncond": -17.6996332804362}, "model_output": [{"sum_logits": -66.99825286865234, "num_tokens": 22, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -75.92704010009766, "logits_per_token": -3.0453751303932886, "logits_per_char": -0.6978984673817953, "num_chars": 96}, {"sum_logits": -100.7186279296875, "num_tokens": 32, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -111.8010025024414, "logits_per_token": -3.1474571228027344, "logits_per_char": -0.5995156424386161, "num_chars": 168}, {"sum_logits": -71.29515075683594, "num_tokens": 29, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -92.27552795410156, "logits_per_token": -2.458453474373653, "logits_per_char": -0.5442377920369156, "num_chars": 131}, {"sum_logits": -97.5284194946289, "num_tokens": 40, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -120.71815490722656, "logits_per_token": -2.4382104873657227, "logits_per_char": -0.5358704367836753, "num_chars": 182}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 881, "native_id": 45813, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 91.91905212402344, "incorrect_loss_raw": 109.07238260904948, "correct_loss_per_char": 0.5639205651780579, "incorrect_loss_per_char": 0.5823778712791056, "correct_loss_per_token": 2.9651307136781755, "incorrect_loss_per_token": 2.8123390539153745, "correct_loss_uncond": -16.987937927246094, "incorrect_loss_uncond": -27.757003784179688}, "model_output": [{"sum_logits": -122.417236328125, "num_tokens": 42, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -146.49185180664062, "logits_per_token": -2.9146961030505953, "logits_per_char": -0.591387615111715, "num_chars": 207}, {"sum_logits": -55.11357116699219, "num_tokens": 27, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -94.23434448242188, "logits_per_token": -2.041243376555266, "logits_per_char": -0.38812374061262106, "num_chars": 142}, {"sum_logits": -91.91905212402344, "num_tokens": 31, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -108.90699005126953, "logits_per_token": -2.9651307136781755, "logits_per_char": -0.5639205651780579, "num_chars": 163}, {"sum_logits": -149.68634033203125, "num_tokens": 43, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -169.761962890625, "logits_per_token": -3.4810776821402616, "logits_per_char": -0.7676222581129808, "num_chars": 195}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 882, "native_id": 46093, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 108.2283706665039, "incorrect_loss_raw": 71.56657409667969, "correct_loss_per_char": 0.668076362138913, "incorrect_loss_per_char": 0.5213766256690526, "correct_loss_per_token": 3.0063436296251087, "incorrect_loss_per_token": 2.198711787544281, "correct_loss_uncond": -31.511314392089844, "incorrect_loss_uncond": -20.18765131632487}, "model_output": [{"sum_logits": -94.9923095703125, "num_tokens": 40, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -119.36808776855469, "logits_per_token": -2.3748077392578124, "logits_per_char": -0.620864768433415, "num_chars": 153}, {"sum_logits": -77.02926635742188, "num_tokens": 39, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -97.89785766601562, "logits_per_token": -1.9751093937800481, "logits_per_char": -0.41637441274282094, "num_chars": 185}, {"sum_logits": -108.2283706665039, "num_tokens": 36, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -139.73968505859375, "logits_per_token": -3.0063436296251087, "logits_per_char": -0.668076362138913, "num_chars": 162}, {"sum_logits": -42.67814636230469, "num_tokens": 19, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -57.99673080444336, "logits_per_token": -2.2462182295949837, "logits_per_char": -0.5268906958309221, "num_chars": 81}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 883, "native_id": 33915, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 88.61306762695312, "incorrect_loss_raw": 94.25383758544922, "correct_loss_per_char": 0.6816389817457933, "incorrect_loss_per_char": 0.6558783879387575, "correct_loss_per_token": 2.769158363342285, "incorrect_loss_per_token": 2.82319859939431, "correct_loss_uncond": -25.93433380126953, "incorrect_loss_uncond": -14.122342427571615}, "model_output": [{"sum_logits": -88.61306762695312, "num_tokens": 32, "num_tokens_all": 438, "is_greedy": false, "sum_logits_uncond": -114.54740142822266, "logits_per_token": -2.769158363342285, "logits_per_char": -0.6816389817457933, "num_chars": 130}, {"sum_logits": -134.1126708984375, "num_tokens": 44, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -141.27511596679688, "logits_per_token": -3.0480152476917612, "logits_per_char": -0.6574150534237132, "num_chars": 204}, {"sum_logits": -82.0654525756836, "num_tokens": 31, "num_tokens_all": 437, "is_greedy": false, "sum_logits_uncond": -96.86474609375, "logits_per_token": -2.6472726637317288, "logits_per_char": -0.5620921409293397, "num_chars": 146}, {"sum_logits": -66.58338928222656, "num_tokens": 24, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -86.98867797851562, "logits_per_token": -2.77430788675944, "logits_per_char": -0.7481279694632198, "num_chars": 89}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 884, "native_id": 23460, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 48.498538970947266, "incorrect_loss_raw": 67.1333516438802, "correct_loss_per_char": 0.381878259613758, "incorrect_loss_per_char": 0.5209724659911538, "correct_loss_per_token": 1.732090677533831, "incorrect_loss_per_token": 2.3174187840239973, "correct_loss_uncond": -24.821086883544922, "incorrect_loss_uncond": -21.823699951171875}, "model_output": [{"sum_logits": -66.21813201904297, "num_tokens": 34, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -82.72080993652344, "logits_per_token": -1.947592118207146, "logits_per_char": -0.4138633251190186, "num_chars": 160}, {"sum_logits": -68.93559265136719, "num_tokens": 23, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -89.58905029296875, "logits_per_token": -2.9971996804942256, "logits_per_char": -0.6825306203105662, "num_chars": 101}, {"sum_logits": -48.498538970947266, "num_tokens": 28, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -73.31962585449219, "logits_per_token": -1.732090677533831, "logits_per_char": -0.381878259613758, "num_chars": 127}, {"sum_logits": -66.24633026123047, "num_tokens": 33, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -94.56129455566406, "logits_per_token": -2.00746455337062, "logits_per_char": -0.46652345254387656, "num_chars": 142}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 885, "native_id": 23696, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 25.864431381225586, "incorrect_loss_raw": 27.20359230041504, "correct_loss_per_char": 0.6466107845306397, "incorrect_loss_per_char": 0.8742952457582108, "correct_loss_per_token": 2.873825709025065, "incorrect_loss_per_token": 3.60051214919131, "correct_loss_uncond": -24.464067459106445, "incorrect_loss_uncond": -9.461609522501627}, "model_output": [{"sum_logits": -16.15847396850586, "num_tokens": 5, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -23.725242614746094, "logits_per_token": -3.231694793701172, "logits_per_char": -0.7694511413574219, "num_chars": 21}, {"sum_logits": -37.204612731933594, "num_tokens": 13, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -51.85348892211914, "logits_per_token": -2.861893287071815, "logits_per_char": -0.6764475042169744, "num_chars": 55}, {"sum_logits": -28.247690200805664, "num_tokens": 6, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -34.416873931884766, "logits_per_token": -4.707948366800944, "logits_per_char": -1.176987091700236, "num_chars": 24}, {"sum_logits": -25.864431381225586, "num_tokens": 9, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -50.32849884033203, "logits_per_token": -2.873825709025065, "logits_per_char": -0.6466107845306397, "num_chars": 40}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 886, "native_id": 39798, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 14.19117546081543, "incorrect_loss_raw": 39.750806172688804, "correct_loss_per_char": 0.4893508779591528, "incorrect_loss_per_char": 0.9108017239019676, "correct_loss_per_token": 2.02731078011649, "incorrect_loss_per_token": 3.8456567064278855, "correct_loss_uncond": -21.093027114868164, "incorrect_loss_uncond": -16.330827077229817}, "model_output": [{"sum_logits": -55.285282135009766, "num_tokens": 11, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -70.23207092285156, "logits_per_token": -5.025934739546343, "logits_per_char": -1.3163162413097562, "num_chars": 42}, {"sum_logits": -34.44413757324219, "num_tokens": 9, "num_tokens_all": 399, "is_greedy": false, "sum_logits_uncond": -52.39495849609375, "logits_per_token": -3.82712639702691, "logits_per_char": -0.8010264551916788, "num_chars": 43}, {"sum_logits": -29.522998809814453, "num_tokens": 11, "num_tokens_all": 401, "is_greedy": false, "sum_logits_uncond": -45.61787033081055, "logits_per_token": -2.683908982710405, "logits_per_char": -0.6150624752044678, "num_chars": 48}, {"sum_logits": -14.19117546081543, "num_tokens": 7, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -35.284202575683594, "logits_per_token": -2.02731078011649, "logits_per_char": -0.4893508779591528, "num_chars": 29}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 887, "native_id": 12269, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 85.26888275146484, "incorrect_loss_raw": 77.10035451253255, "correct_loss_per_char": 0.7681881328960797, "incorrect_loss_per_char": 0.8466230677631806, "correct_loss_per_token": 3.410755310058594, "incorrect_loss_per_token": 3.5922719589387526, "correct_loss_uncond": -12.093368530273438, "incorrect_loss_uncond": -15.908724466959635}, "model_output": [{"sum_logits": -57.00041198730469, "num_tokens": 18, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -79.12840270996094, "logits_per_token": -3.1666895548502603, "logits_per_char": -0.8028227040465449, "num_chars": 71}, {"sum_logits": -91.76618194580078, "num_tokens": 22, "num_tokens_all": 433, "is_greedy": false, "sum_logits_uncond": -99.15284729003906, "logits_per_token": -4.17119008844549, "logits_per_char": -0.986733139202159, "num_chars": 93}, {"sum_logits": -82.53446960449219, "num_tokens": 24, "num_tokens_all": 435, "is_greedy": false, "sum_logits_uncond": -100.74598693847656, "logits_per_token": -3.438936233520508, "logits_per_char": -0.750313360040838, "num_chars": 110}, {"sum_logits": -85.26888275146484, "num_tokens": 25, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -97.36225128173828, "logits_per_token": -3.410755310058594, "logits_per_char": -0.7681881328960797, "num_chars": 111}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 888, "native_id": 29085, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 30.284133911132812, "incorrect_loss_raw": 50.98074467976888, "correct_loss_per_char": 0.45200199867362406, "incorrect_loss_per_char": 0.9901380478453686, "correct_loss_per_token": 2.3295487623948317, "incorrect_loss_per_token": 4.665306462181939, "correct_loss_uncond": -24.50722885131836, "incorrect_loss_uncond": -21.60123570760091}, "model_output": [{"sum_logits": -49.0887451171875, "num_tokens": 18, "num_tokens_all": 424, "is_greedy": false, "sum_logits_uncond": -78.56436157226562, "logits_per_token": -2.7271525065104165, "logits_per_char": -0.5914306640625, "num_chars": 83}, {"sum_logits": -68.51676940917969, "num_tokens": 10, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -82.7554931640625, "logits_per_token": -6.851676940917969, "logits_per_char": -1.5571993047540837, "num_chars": 44}, {"sum_logits": -35.33671951293945, "num_tokens": 8, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -56.42608642578125, "logits_per_token": -4.417089939117432, "logits_per_char": -0.8217841747195221, "num_chars": 43}, {"sum_logits": -30.284133911132812, "num_tokens": 13, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -54.79136276245117, "logits_per_token": -2.3295487623948317, "logits_per_char": -0.45200199867362406, "num_chars": 67}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 889, "native_id": 24163, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 31.55562973022461, "incorrect_loss_raw": 28.63908513387044, "correct_loss_per_char": 0.5173054054135182, "incorrect_loss_per_char": 0.743811813783852, "correct_loss_per_token": 2.1037086486816405, "incorrect_loss_per_token": 2.9791717364158465, "correct_loss_uncond": -34.37947463989258, "incorrect_loss_uncond": -17.42568842569987}, "model_output": [{"sum_logits": -26.48446273803711, "num_tokens": 11, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -50.594539642333984, "logits_per_token": -2.4076784307306465, "logits_per_char": -0.6305824461437407, "num_chars": 42}, {"sum_logits": -31.55562973022461, "num_tokens": 15, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -65.93510437011719, "logits_per_token": -2.1037086486816405, "logits_per_char": -0.5173054054135182, "num_chars": 61}, {"sum_logits": -37.740821838378906, "num_tokens": 11, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -49.63383865356445, "logits_per_token": -3.4309838034889917, "logits_per_char": -0.9435205459594727, "num_chars": 40}, {"sum_logits": -21.691970825195312, "num_tokens": 7, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -37.9659423828125, "logits_per_token": -3.098852975027902, "logits_per_char": -0.6573324492483428, "num_chars": 33}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 890, "native_id": 37028, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 19.884553909301758, "incorrect_loss_raw": 49.50103251139323, "correct_loss_per_char": 0.39769107818603516, "incorrect_loss_per_char": 0.6903308904974566, "correct_loss_per_token": 1.6570461591084797, "incorrect_loss_per_token": 3.0971543946965183, "correct_loss_uncond": -37.54849815368652, "incorrect_loss_uncond": -30.99158986409505}, "model_output": [{"sum_logits": -47.64312744140625, "num_tokens": 18, "num_tokens_all": 430, "is_greedy": false, "sum_logits_uncond": -80.06695556640625, "logits_per_token": -2.6468404134114585, "logits_per_char": -0.48615436164700254, "num_chars": 98}, {"sum_logits": -19.884553909301758, "num_tokens": 12, "num_tokens_all": 424, "is_greedy": false, "sum_logits_uncond": -57.43305206298828, "logits_per_token": -1.6570461591084797, "logits_per_char": -0.39769107818603516, "num_chars": 50}, {"sum_logits": -44.399757385253906, "num_tokens": 17, "num_tokens_all": 429, "is_greedy": false, "sum_logits_uncond": -66.20984649658203, "logits_per_token": -2.6117504344267, "logits_per_char": -0.5766202257825183, "num_chars": 77}, {"sum_logits": -56.46021270751953, "num_tokens": 14, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -95.20106506347656, "logits_per_token": -4.032872336251395, "logits_per_char": -1.0082180840628487, "num_chars": 56}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 891, "native_id": 32587, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 30.749435424804688, "incorrect_loss_raw": 52.820556640625, "correct_loss_per_char": 0.504089105324667, "incorrect_loss_per_char": 0.7127019137481191, "correct_loss_per_token": 2.7954032204367896, "incorrect_loss_per_token": 3.3783833185831704, "correct_loss_uncond": -17.596351623535156, "incorrect_loss_uncond": -15.878456115722656}, "model_output": [{"sum_logits": -58.060752868652344, "num_tokens": 16, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -74.6191635131836, "logits_per_token": -3.6287970542907715, "logits_per_char": -0.6597812825983221, "num_chars": 88}, {"sum_logits": -55.51094055175781, "num_tokens": 15, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -72.58881378173828, "logits_per_token": -3.7007293701171875, "logits_per_char": -0.8953377508348034, "num_chars": 62}, {"sum_logits": -30.749435424804688, "num_tokens": 11, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -48.345787048339844, "logits_per_token": -2.7954032204367896, "logits_per_char": -0.504089105324667, "num_chars": 61}, {"sum_logits": -44.889976501464844, "num_tokens": 16, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -58.889060974121094, "logits_per_token": -2.8056235313415527, "logits_per_char": -0.5829867078112317, "num_chars": 77}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 892, "native_id": 31285, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 46.89306640625, "incorrect_loss_raw": 40.417466481526695, "correct_loss_per_char": 0.6011931590544872, "incorrect_loss_per_char": 0.8590792229062035, "correct_loss_per_token": 2.6051703559027777, "incorrect_loss_per_token": 3.537294988278989, "correct_loss_uncond": -25.885299682617188, "incorrect_loss_uncond": -11.382526397705078}, "model_output": [{"sum_logits": -49.35649108886719, "num_tokens": 16, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -55.96965408325195, "logits_per_token": -3.084780693054199, "logits_per_char": -0.7834363664899554, "num_chars": 63}, {"sum_logits": -41.51969909667969, "num_tokens": 10, "num_tokens_all": 398, "is_greedy": false, "sum_logits_uncond": -53.27202606201172, "logits_per_token": -4.151969909667969, "logits_per_char": -1.186277117047991, "num_chars": 35}, {"sum_logits": -46.89306640625, "num_tokens": 18, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -72.77836608886719, "logits_per_token": -2.6051703559027777, "logits_per_char": -0.6011931590544872, "num_chars": 78}, {"sum_logits": -30.376209259033203, "num_tokens": 9, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -46.15829849243164, "logits_per_token": -3.3751343621148004, "logits_per_char": -0.6075241851806641, "num_chars": 50}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 893, "native_id": 33142, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 106.77081298828125, "incorrect_loss_raw": 107.39805094401042, "correct_loss_per_char": 0.5771395296663852, "incorrect_loss_per_char": 0.5931618873063567, "correct_loss_per_token": 2.6041661704458843, "incorrect_loss_per_token": 2.8992426493758434, "correct_loss_uncond": -38.435150146484375, "incorrect_loss_uncond": -15.87756093343099}, "model_output": [{"sum_logits": -107.30726623535156, "num_tokens": 45, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -124.59105682373047, "logits_per_token": -2.3846059163411457, "logits_per_char": -0.4605462070186762, "num_chars": 233}, {"sum_logits": -96.60958099365234, "num_tokens": 31, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -109.17782592773438, "logits_per_token": -3.1164380965694303, "logits_per_char": -0.5750570297241211, "num_chars": 168}, {"sum_logits": -106.77081298828125, "num_tokens": 41, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -145.20596313476562, "logits_per_token": -2.6041661704458843, "logits_per_char": -0.5771395296663852, "num_chars": 185}, {"sum_logits": -118.27730560302734, "num_tokens": 37, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -136.05795288085938, "logits_per_token": -3.196683935216955, "logits_per_char": -0.7438824251762726, "num_chars": 159}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 894, "native_id": 20910, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 66.54736328125, "incorrect_loss_raw": 97.4135233561198, "correct_loss_per_char": 0.5786727241847827, "incorrect_loss_per_char": 0.5643034592350674, "correct_loss_per_token": 2.3766915457589284, "incorrect_loss_per_token": 2.6144068610702544, "correct_loss_uncond": -26.37250518798828, "incorrect_loss_uncond": -25.463577270507812}, "model_output": [{"sum_logits": -104.34580993652344, "num_tokens": 41, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -126.423095703125, "logits_per_token": -2.5450197545493523, "logits_per_char": -0.5550309039176778, "num_chars": 188}, {"sum_logits": -95.656494140625, "num_tokens": 37, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -121.77376556396484, "logits_per_token": -2.585310652449324, "logits_per_char": -0.5466085379464286, "num_chars": 175}, {"sum_logits": -92.23826599121094, "num_tokens": 34, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -120.43444061279297, "logits_per_token": -2.7128901762120865, "logits_per_char": -0.5912709358410958, "num_chars": 156}, {"sum_logits": -66.54736328125, "num_tokens": 28, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -92.91986846923828, "logits_per_token": -2.3766915457589284, "logits_per_char": -0.5786727241847827, "num_chars": 115}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 895, "native_id": 26604, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 32.43915939331055, "incorrect_loss_raw": 46.287806828816734, "correct_loss_per_char": 0.5498162609035686, "incorrect_loss_per_char": 0.5691075022109963, "correct_loss_per_token": 2.703263282775879, "incorrect_loss_per_token": 2.6966447658329216, "correct_loss_uncond": -24.122711181640625, "incorrect_loss_uncond": -31.2141056060791}, "model_output": [{"sum_logits": -44.278892517089844, "num_tokens": 14, "num_tokens_all": 424, "is_greedy": false, "sum_logits_uncond": -75.52251434326172, "logits_per_token": -3.1627780369349887, "logits_per_char": -0.6511601840748507, "num_chars": 68}, {"sum_logits": -32.43915939331055, "num_tokens": 12, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -56.56187057495117, "logits_per_token": -2.703263282775879, "logits_per_char": -0.5498162609035686, "num_chars": 59}, {"sum_logits": -63.60728454589844, "num_tokens": 25, "num_tokens_all": 435, "is_greedy": false, "sum_logits_uncond": -100.0259017944336, "logits_per_token": -2.5442913818359374, "logits_per_char": -0.5483386598784348, "num_chars": 116}, {"sum_logits": -30.977243423461914, "num_tokens": 13, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -56.95732116699219, "logits_per_token": -2.3828648787278395, "logits_per_char": -0.5078236626797035, "num_chars": 61}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 896, "native_id": 44102, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 81.13245391845703, "incorrect_loss_raw": 90.3901850382487, "correct_loss_per_char": 0.45073585510253905, "incorrect_loss_per_char": 0.5932874624279961, "correct_loss_per_token": 2.2536792755126953, "incorrect_loss_per_token": 2.5570394784924346, "correct_loss_uncond": -24.831878662109375, "incorrect_loss_uncond": -21.023569742838543}, "model_output": [{"sum_logits": -131.94000244140625, "num_tokens": 44, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -144.90499877929688, "logits_per_token": -2.998636419122869, "logits_per_char": -0.6871875127156576, "num_chars": 192}, {"sum_logits": -57.79290008544922, "num_tokens": 31, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -79.98961639404297, "logits_per_token": -1.86428709953062, "logits_per_char": -0.5025469572647758, "num_chars": 115}, {"sum_logits": -81.13245391845703, "num_tokens": 36, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -105.9643325805664, "logits_per_token": -2.2536792755126953, "logits_per_char": -0.45073585510253905, "num_chars": 180}, {"sum_logits": -81.43765258789062, "num_tokens": 29, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -109.34664916992188, "logits_per_token": -2.8081949168238145, "logits_per_char": -0.5901279173035553, "num_chars": 138}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 897, "native_id": 4848, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 35.02825164794922, "incorrect_loss_raw": 45.95522944132487, "correct_loss_per_char": 0.5560039944118924, "incorrect_loss_per_char": 0.6450134693492543, "correct_loss_per_token": 2.5020179748535156, "incorrect_loss_per_token": 2.939039981726444, "correct_loss_uncond": -35.090919494628906, "incorrect_loss_uncond": -21.04572296142578}, "model_output": [{"sum_logits": -81.7547378540039, "num_tokens": 22, "num_tokens_all": 433, "is_greedy": false, "sum_logits_uncond": -99.54960632324219, "logits_per_token": -3.7161244479092685, "logits_per_char": -0.8258054328687263, "num_chars": 99}, {"sum_logits": -23.001663208007812, "num_tokens": 11, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -51.716896057128906, "logits_per_token": -2.0910602916370737, "logits_per_char": -0.4600332641601563, "num_chars": 50}, {"sum_logits": -33.10928726196289, "num_tokens": 11, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -49.73635482788086, "logits_per_token": -3.00993520563299, "logits_per_char": -0.6492017110188802, "num_chars": 51}, {"sum_logits": -35.02825164794922, "num_tokens": 14, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -70.11917114257812, "logits_per_token": -2.5020179748535156, "logits_per_char": -0.5560039944118924, "num_chars": 63}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 898, "native_id": 19326, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 46.155487060546875, "incorrect_loss_raw": 71.09537506103516, "correct_loss_per_char": 0.5842466716524921, "incorrect_loss_per_char": 0.696680539353005, "correct_loss_per_token": 3.077032470703125, "incorrect_loss_per_token": 3.4176754041919417, "correct_loss_uncond": -31.638656616210938, "incorrect_loss_uncond": -29.036603291829426}, "model_output": [{"sum_logits": -70.7253646850586, "num_tokens": 18, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -107.43760681152344, "logits_per_token": -3.9291869269476996, "logits_per_char": -0.6609847166827906, "num_chars": 107}, {"sum_logits": -46.155487060546875, "num_tokens": 15, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -77.79414367675781, "logits_per_token": -3.077032470703125, "logits_per_char": -0.5842466716524921, "num_chars": 79}, {"sum_logits": -63.52594757080078, "num_tokens": 22, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -96.86389923095703, "logits_per_token": -2.8875430714000356, "logits_per_char": -0.6617286205291748, "num_chars": 96}, {"sum_logits": -79.0348129272461, "num_tokens": 23, "num_tokens_all": 427, "is_greedy": false, "sum_logits_uncond": -96.09442901611328, "logits_per_token": -3.436296214228091, "logits_per_char": -0.7673282808470494, "num_chars": 103}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 899, "native_id": 1122, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 68.60952758789062, "incorrect_loss_raw": 113.87931823730469, "correct_loss_per_char": 0.48659239424035905, "incorrect_loss_per_char": 1.1020868380577953, "correct_loss_per_token": 2.144047737121582, "incorrect_loss_per_token": 4.073672473021229, "correct_loss_uncond": -46.43565368652344, "incorrect_loss_uncond": -21.437118530273438}, "model_output": [{"sum_logits": -85.3209228515625, "num_tokens": 27, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -103.76097106933594, "logits_per_token": -3.1600341796875, "logits_per_char": -1.1687797650898972, "num_chars": 73}, {"sum_logits": -122.89385986328125, "num_tokens": 33, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -149.44757080078125, "logits_per_token": -3.724056359493371, "logits_per_char": -0.8905352164005887, "num_chars": 138}, {"sum_logits": -133.4231719970703, "num_tokens": 25, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -152.7407684326172, "logits_per_token": -5.336926879882813, "logits_per_char": -1.2469455326829002, "num_chars": 107}, {"sum_logits": -68.60952758789062, "num_tokens": 32, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -115.04518127441406, "logits_per_token": -2.144047737121582, "logits_per_char": -0.48659239424035905, "num_chars": 141}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 900, "native_id": 33600, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 16.419811248779297, "incorrect_loss_raw": 18.592157046000164, "correct_loss_per_char": 0.6315312018761268, "incorrect_loss_per_char": 0.5284079657654596, "correct_loss_per_token": 2.3456873212541853, "incorrect_loss_per_token": 2.449370209376017, "correct_loss_uncond": -18.888416290283203, "incorrect_loss_uncond": -18.533153851826984}, "model_output": [{"sum_logits": -30.373828887939453, "num_tokens": 10, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -47.072288513183594, "logits_per_token": -3.037382888793945, "logits_per_char": -0.5730911110931972, "num_chars": 53}, {"sum_logits": -16.419811248779297, "num_tokens": 7, "num_tokens_all": 394, "is_greedy": false, "sum_logits_uncond": -35.3082275390625, "logits_per_token": -2.3456873212541853, "logits_per_char": -0.6315312018761268, "num_chars": 26}, {"sum_logits": -10.264009475708008, "num_tokens": 8, "num_tokens_all": 395, "is_greedy": false, "sum_logits_uncond": -32.72582244873047, "logits_per_token": -1.283001184463501, "logits_per_char": -0.35393136123131064, "num_chars": 29}, {"sum_logits": -15.138632774353027, "num_tokens": 5, "num_tokens_all": 392, "is_greedy": false, "sum_logits_uncond": -31.577821731567383, "logits_per_token": -3.0277265548706054, "logits_per_char": -0.6582014249718707, "num_chars": 23}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 901, "native_id": 14138, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 77.52946472167969, "incorrect_loss_raw": 119.6953837076823, "correct_loss_per_char": 0.5100622679057875, "incorrect_loss_per_char": 0.6926492493034849, "correct_loss_per_token": 2.2151275634765626, "incorrect_loss_per_token": 3.0610355393476536, "correct_loss_uncond": -25.2181396484375, "incorrect_loss_uncond": -16.396759033203125}, "model_output": [{"sum_logits": -115.89572143554688, "num_tokens": 41, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -139.09300231933594, "logits_per_token": -2.826724913062119, "logits_per_char": -0.6403078532350656, "num_chars": 181}, {"sum_logits": -77.52946472167969, "num_tokens": 35, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -102.74760437011719, "logits_per_token": -2.2151275634765626, "logits_per_char": -0.5100622679057875, "num_chars": 152}, {"sum_logits": -165.53070068359375, "num_tokens": 45, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -172.08851623535156, "logits_per_token": -3.6784600151909723, "logits_per_char": -0.8402573638761104, "num_chars": 197}, {"sum_logits": -77.65972900390625, "num_tokens": 29, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -97.09490966796875, "logits_per_token": -2.6779216897898706, "logits_per_char": -0.5973825307992788, "num_chars": 130}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 902, "native_id": 21954, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 67.2597427368164, "incorrect_loss_raw": 94.0871073404948, "correct_loss_per_char": 0.5899977433054071, "incorrect_loss_per_char": 0.5667013926773504, "correct_loss_per_token": 2.4911015828450522, "incorrect_loss_per_token": 2.619911192216249, "correct_loss_uncond": -33.003150939941406, "incorrect_loss_uncond": -35.61674499511719}, "model_output": [{"sum_logits": -91.31558990478516, "num_tokens": 32, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -132.3380126953125, "logits_per_token": -2.853612184524536, "logits_per_char": -0.5707224369049072, "num_chars": 160}, {"sum_logits": -121.02020263671875, "num_tokens": 44, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -157.40786743164062, "logits_per_token": -2.750459150834517, "logits_per_char": -0.6369484349300987, "num_chars": 190}, {"sum_logits": -69.92552947998047, "num_tokens": 31, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -99.36567687988281, "logits_per_token": -2.2556622412896927, "logits_per_char": -0.49243330619704556, "num_chars": 142}, {"sum_logits": -67.2597427368164, "num_tokens": 27, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -100.26289367675781, "logits_per_token": -2.4911015828450522, "logits_per_char": -0.5899977433054071, "num_chars": 114}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 903, "native_id": 7225, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 83.8251953125, "incorrect_loss_raw": 95.01834106445312, "correct_loss_per_char": 0.4790011160714286, "incorrect_loss_per_char": 0.5904798509171179, "correct_loss_per_token": 2.0956298828125, "incorrect_loss_per_token": 2.5699688895615336, "correct_loss_uncond": -9.527786254882812, "incorrect_loss_uncond": -16.272005716959637}, "model_output": [{"sum_logits": -83.8251953125, "num_tokens": 40, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -93.35298156738281, "logits_per_token": -2.0956298828125, "logits_per_char": -0.4790011160714286, "num_chars": 175}, {"sum_logits": -68.85043334960938, "num_tokens": 26, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -84.2670669555664, "logits_per_token": -2.6480935903695912, "logits_per_char": -0.6039511697334156, "num_chars": 114}, {"sum_logits": -88.79608154296875, "num_tokens": 34, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -98.07940673828125, "logits_per_token": -2.6116494571461395, "logits_per_char": -0.6253245179082306, "num_chars": 142}, {"sum_logits": -127.40850830078125, "num_tokens": 52, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -151.52456665039062, "logits_per_token": -2.45016362116887, "logits_per_char": -0.5421638651097075, "num_chars": 235}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 904, "native_id": 27419, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 104.27262115478516, "incorrect_loss_raw": 116.31820170084636, "correct_loss_per_char": 0.5697957440152195, "incorrect_loss_per_char": 0.6754267074928477, "correct_loss_per_token": 2.7440163461785567, "incorrect_loss_per_token": 3.1583922431582496, "correct_loss_uncond": -40.85979461669922, "incorrect_loss_uncond": -12.305974324544271}, "model_output": [{"sum_logits": -95.78433227539062, "num_tokens": 35, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -115.86866760253906, "logits_per_token": -2.7366952078683036, "logits_per_char": -0.6260413874208538, "num_chars": 153}, {"sum_logits": -104.27262115478516, "num_tokens": 38, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -145.13241577148438, "logits_per_token": -2.7440163461785567, "logits_per_char": -0.5697957440152195, "num_chars": 183}, {"sum_logits": -114.58291625976562, "num_tokens": 35, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -130.12339782714844, "logits_per_token": -3.273797607421875, "logits_per_char": -0.7073019522207754, "num_chars": 162}, {"sum_logits": -138.5873565673828, "num_tokens": 40, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -139.88046264648438, "logits_per_token": -3.4646839141845702, "logits_per_char": -0.6929367828369141, "num_chars": 200}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 905, "native_id": 19734, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 183.0475616455078, "incorrect_loss_raw": 105.39732869466145, "correct_loss_per_char": 0.6779539320203993, "incorrect_loss_per_char": 0.7972315061624754, "correct_loss_per_token": 3.050792694091797, "incorrect_loss_per_token": 3.3160099534346466, "correct_loss_uncond": -35.836761474609375, "incorrect_loss_uncond": -17.4743169148763}, "model_output": [{"sum_logits": -183.0475616455078, "num_tokens": 60, "num_tokens_all": 499, "is_greedy": false, "sum_logits_uncond": -218.8843231201172, "logits_per_token": -3.050792694091797, "logits_per_char": -0.6779539320203993, "num_chars": 270}, {"sum_logits": -87.06031799316406, "num_tokens": 28, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -100.22476959228516, "logits_per_token": -3.109297071184431, "logits_per_char": -0.6909549047076513, "num_chars": 126}, {"sum_logits": -111.71314239501953, "num_tokens": 33, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -132.49456787109375, "logits_per_token": -3.385246739243016, "logits_per_char": -0.8796310424804688, "num_chars": 127}, {"sum_logits": -117.41852569580078, "num_tokens": 34, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -135.89559936523438, "logits_per_token": -3.4534860498764934, "logits_per_char": -0.8211085712993061, "num_chars": 143}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 906, "native_id": 30600, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 87.46806335449219, "incorrect_loss_raw": 118.11491394042969, "correct_loss_per_char": 0.43516449430095616, "incorrect_loss_per_char": 0.5900831704429689, "correct_loss_per_token": 1.9437347412109376, "incorrect_loss_per_token": 2.9708811372230333, "correct_loss_uncond": -30.7811279296875, "incorrect_loss_uncond": -19.144912719726562}, "model_output": [{"sum_logits": -94.96237182617188, "num_tokens": 37, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -113.15348815917969, "logits_per_token": -2.5665505898965373, "logits_per_char": -0.4655018226773131, "num_chars": 204}, {"sum_logits": -102.59712219238281, "num_tokens": 38, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -124.83241271972656, "logits_per_token": -2.6999242682206, "logits_per_char": -0.554579038877745, "num_chars": 185}, {"sum_logits": -87.46806335449219, "num_tokens": 45, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -118.24919128417969, "logits_per_token": -1.9437347412109376, "logits_per_char": -0.43516449430095616, "num_chars": 201}, {"sum_logits": -156.78524780273438, "num_tokens": 43, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -173.7935791015625, "logits_per_token": -3.6461685535519623, "logits_per_char": -0.7501686497738487, "num_chars": 209}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 907, "native_id": 4046, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 54.29014587402344, "incorrect_loss_raw": 114.64297231038411, "correct_loss_per_char": 0.4208538439846778, "incorrect_loss_per_char": 0.5951736699370824, "correct_loss_per_token": 1.8720739956559806, "incorrect_loss_per_token": 2.6617025714931177, "correct_loss_uncond": -19.07341766357422, "incorrect_loss_uncond": -16.55779774983724}, "model_output": [{"sum_logits": -108.99569702148438, "num_tokens": 41, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -138.2853546142578, "logits_per_token": -2.6584316346703507, "logits_per_char": -0.6055316501193576, "num_chars": 180}, {"sum_logits": -129.5326690673828, "num_tokens": 42, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -136.20303344726562, "logits_per_token": -3.0841111682710194, "logits_per_char": -0.647663345336914, "num_chars": 200}, {"sum_logits": -54.29014587402344, "num_tokens": 29, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -73.36356353759766, "logits_per_token": -1.8720739956559806, "logits_per_char": -0.4208538439846778, "num_chars": 129}, {"sum_logits": -105.40055084228516, "num_tokens": 47, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -119.11392211914062, "logits_per_token": -2.242564911537982, "logits_per_char": -0.5323260143549755, "num_chars": 198}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 908, "native_id": 15043, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 29.501319885253906, "incorrect_loss_raw": 63.807624181111656, "correct_loss_per_char": 0.4609581232070923, "incorrect_loss_per_char": 0.8287381209336319, "correct_loss_per_token": 1.9667546590169271, "incorrect_loss_per_token": 3.706117649218102, "correct_loss_uncond": -39.83667755126953, "incorrect_loss_uncond": -14.32112948099772}, "model_output": [{"sum_logits": -29.501319885253906, "num_tokens": 15, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -69.33799743652344, "logits_per_token": -1.9667546590169271, "logits_per_char": -0.4609581232070923, "num_chars": 64}, {"sum_logits": -110.5557861328125, "num_tokens": 26, "num_tokens_all": 427, "is_greedy": false, "sum_logits_uncond": -123.39904022216797, "logits_per_token": -4.252145620492788, "logits_per_char": -0.96978759765625, "num_chars": 114}, {"sum_logits": -22.54951286315918, "num_tokens": 7, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -34.62055206298828, "logits_per_token": -3.221358980451311, "logits_per_char": -0.6833185716108843, "num_chars": 33}, {"sum_logits": -58.31757354736328, "num_tokens": 16, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -76.36666870117188, "logits_per_token": -3.644848346710205, "logits_per_char": -0.8331081935337612, "num_chars": 70}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 909, "native_id": 14143, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 83.83406066894531, "incorrect_loss_raw": 115.39134216308594, "correct_loss_per_char": 0.5862521725101071, "incorrect_loss_per_char": 0.630967350563801, "correct_loss_per_token": 2.39525887625558, "incorrect_loss_per_token": 2.7252636594117803, "correct_loss_uncond": -21.24384307861328, "incorrect_loss_uncond": -18.006312052408855}, "model_output": [{"sum_logits": -95.72737884521484, "num_tokens": 43, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -121.53262329101562, "logits_per_token": -2.226218112679415, "logits_per_char": -0.5631022285012638, "num_chars": 170}, {"sum_logits": -110.39220428466797, "num_tokens": 41, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -125.926025390625, "logits_per_token": -2.692492787430926, "logits_per_char": -0.6532083093767336, "num_chars": 169}, {"sum_logits": -140.054443359375, "num_tokens": 43, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -152.73431396484375, "logits_per_token": -3.257080078125, "logits_per_char": -0.6765915138134058, "num_chars": 207}, {"sum_logits": -83.83406066894531, "num_tokens": 35, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -105.0779037475586, "logits_per_token": -2.39525887625558, "logits_per_char": -0.5862521725101071, "num_chars": 143}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 910, "native_id": 10826, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.87545108795166, "incorrect_loss_raw": 16.052786509195965, "correct_loss_per_char": 0.2164744167793088, "incorrect_loss_per_char": 0.49915265682747817, "correct_loss_per_token": 0.9861612319946289, "incorrect_loss_per_token": 1.841442716070068, "correct_loss_uncond": -32.565855979919434, "incorrect_loss_uncond": -24.636857350667317}, "model_output": [{"sum_logits": -8.87545108795166, "num_tokens": 9, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -41.441307067871094, "logits_per_token": -0.9861612319946289, "logits_per_char": -0.2164744167793088, "num_chars": 41}, {"sum_logits": -17.449054718017578, "num_tokens": 7, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -37.69971466064453, "logits_per_token": -2.4927221025739397, "logits_per_char": -0.7270439465840658, "num_chars": 24}, {"sum_logits": -23.673673629760742, "num_tokens": 11, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -48.44502258300781, "logits_per_token": -2.1521521481600674, "logits_per_char": -0.5505505495293196, "num_chars": 43}, {"sum_logits": -7.03563117980957, "num_tokens": 8, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -35.9241943359375, "logits_per_token": -0.8794538974761963, "logits_per_char": -0.21986347436904907, "num_chars": 32}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 911, "native_id": 940, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 76.25559997558594, "incorrect_loss_raw": 95.83367665608723, "correct_loss_per_char": 0.44078381488778, "incorrect_loss_per_char": 0.5625792581829914, "correct_loss_per_token": 1.8156095232282365, "incorrect_loss_per_token": 2.4545325226012173, "correct_loss_uncond": -29.27136993408203, "incorrect_loss_uncond": -22.35325876871745}, "model_output": [{"sum_logits": -89.16254425048828, "num_tokens": 35, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -102.19049072265625, "logits_per_token": -2.5475012642996653, "logits_per_char": -0.5984063372515992, "num_chars": 149}, {"sum_logits": -76.25559997558594, "num_tokens": 42, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -105.52696990966797, "logits_per_token": -1.8156095232282365, "logits_per_char": -0.44078381488778, "num_chars": 173}, {"sum_logits": -126.61265563964844, "num_tokens": 44, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -151.15550231933594, "logits_per_token": -2.877560355446555, "logits_per_char": -0.649295669946915, "num_chars": 195}, {"sum_logits": -71.725830078125, "num_tokens": 37, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -101.21481323242188, "logits_per_token": -1.9385359480574325, "logits_per_char": -0.4400357673504601, "num_chars": 163}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 912, "native_id": 12684, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 86.23625183105469, "incorrect_loss_raw": 92.62148539225261, "correct_loss_per_char": 0.49561064270721084, "incorrect_loss_per_char": 0.560340131802831, "correct_loss_per_token": 2.395451439751519, "incorrect_loss_per_token": 2.9697516880819737, "correct_loss_uncond": -21.053985595703125, "incorrect_loss_uncond": -14.835067749023438}, "model_output": [{"sum_logits": -121.70020294189453, "num_tokens": 41, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -136.81863403320312, "logits_per_token": -2.968297632729135, "logits_per_char": -0.5686925371116567, "num_chars": 214}, {"sum_logits": -83.66670989990234, "num_tokens": 30, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -97.67904663085938, "logits_per_token": -2.788890329996745, "logits_per_char": -0.5229169368743897, "num_chars": 160}, {"sum_logits": -72.49754333496094, "num_tokens": 23, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -87.87197875976562, "logits_per_token": -3.1520671015200405, "logits_per_char": -0.5894109214224467, "num_chars": 123}, {"sum_logits": -86.23625183105469, "num_tokens": 36, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -107.29023742675781, "logits_per_token": -2.395451439751519, "logits_per_char": -0.49561064270721084, "num_chars": 174}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 913, "native_id": 24654, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 61.350643157958984, "incorrect_loss_raw": 148.00066121419272, "correct_loss_per_char": 0.47930189967155457, "incorrect_loss_per_char": 0.5732037614221009, "correct_loss_per_token": 2.359640121459961, "incorrect_loss_per_token": 2.9897827726421937, "correct_loss_uncond": -26.390186309814453, "incorrect_loss_uncond": -29.707112630208332}, "model_output": [{"sum_logits": -158.40554809570312, "num_tokens": 48, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -199.0663604736328, "logits_per_token": -3.3001155853271484, "logits_per_char": -0.6023024642422172, "num_chars": 263}, {"sum_logits": -167.64529418945312, "num_tokens": 55, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -179.68975830078125, "logits_per_token": -3.048096257990057, "logits_per_char": -0.6278849969642439, "num_chars": 267}, {"sum_logits": -61.350643157958984, "num_tokens": 26, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -87.74082946777344, "logits_per_token": -2.359640121459961, "logits_per_char": -0.47930189967155457, "num_chars": 128}, {"sum_logits": -117.95114135742188, "num_tokens": 45, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -154.36720275878906, "logits_per_token": -2.621136474609375, "logits_per_char": -0.4894238230598418, "num_chars": 241}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 914, "native_id": 40065, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 131.15451049804688, "incorrect_loss_raw": 95.43432362874348, "correct_loss_per_char": 0.5803296924692339, "incorrect_loss_per_char": 0.6241526941784167, "correct_loss_per_token": 2.851185010827106, "incorrect_loss_per_token": 2.908031135769511, "correct_loss_uncond": -24.848876953125, "incorrect_loss_uncond": -24.052151997884113}, "model_output": [{"sum_logits": -131.15451049804688, "num_tokens": 46, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -156.00338745117188, "logits_per_token": -2.851185010827106, "logits_per_char": -0.5803296924692339, "num_chars": 226}, {"sum_logits": -80.77218627929688, "num_tokens": 31, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -98.01007080078125, "logits_per_token": -2.605554396106351, "logits_per_char": -0.6787578678932511, "num_chars": 119}, {"sum_logits": -82.48487854003906, "num_tokens": 33, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -123.19204711914062, "logits_per_token": -2.4995417739405776, "logits_per_char": -0.5426636746055201, "num_chars": 152}, {"sum_logits": -123.04590606689453, "num_tokens": 34, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -137.25730895996094, "logits_per_token": -3.618997237261604, "logits_per_char": -0.651036540036479, "num_chars": 189}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 915, "native_id": 21475, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 68.6878662109375, "incorrect_loss_raw": 169.71406046549478, "correct_loss_per_char": 0.5539344049269154, "incorrect_loss_per_char": 0.7262874767084276, "correct_loss_per_token": 2.543995044849537, "incorrect_loss_per_token": 3.4150684419664326, "correct_loss_uncond": -20.137039184570312, "incorrect_loss_uncond": -18.933339436848957}, "model_output": [{"sum_logits": -194.53067016601562, "num_tokens": 50, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -218.4652862548828, "logits_per_token": -3.8906134033203124, "logits_per_char": -0.7940027353714924, "num_chars": 245}, {"sum_logits": -193.9720916748047, "num_tokens": 61, "num_tokens_all": 496, "is_greedy": false, "sum_logits_uncond": -207.22657775878906, "logits_per_token": -3.179870355324667, "logits_per_char": -0.6401719197188274, "num_chars": 303}, {"sum_logits": -68.6878662109375, "num_tokens": 27, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -88.82490539550781, "logits_per_token": -2.543995044849537, "logits_per_char": -0.5539344049269154, "num_chars": 124}, {"sum_logits": -120.63941955566406, "num_tokens": 38, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -140.25033569335938, "logits_per_token": -3.1747215672543176, "logits_per_char": -0.7446877750349633, "num_chars": 162}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 916, "native_id": 39704, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 188.73553466796875, "incorrect_loss_raw": 124.88008371988933, "correct_loss_per_char": 0.7016190879850139, "incorrect_loss_per_char": 0.6384441950068954, "correct_loss_per_token": 2.9958021375868054, "incorrect_loss_per_token": 3.0542661590864104, "correct_loss_uncond": -21.798583984375, "incorrect_loss_uncond": -16.093533833821613}, "model_output": [{"sum_logits": -134.5977783203125, "num_tokens": 44, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -150.35113525390625, "logits_per_token": -3.0590404163707388, "logits_per_char": -0.6260361782340116, "num_chars": 215}, {"sum_logits": -188.73553466796875, "num_tokens": 63, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -210.53411865234375, "logits_per_token": -2.9958021375868054, "logits_per_char": -0.7016190879850139, "num_chars": 269}, {"sum_logits": -52.40038299560547, "num_tokens": 21, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -71.3089599609375, "logits_per_token": -2.49525633312407, "logits_per_char": -0.5758283845670931, "num_chars": 91}, {"sum_logits": -187.64208984375, "num_tokens": 52, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -201.26075744628906, "logits_per_token": -3.608501727764423, "logits_per_char": -0.7134680222195817, "num_chars": 263}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 917, "native_id": 35839, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 94.592529296875, "incorrect_loss_raw": 109.10081354777019, "correct_loss_per_char": 0.7166100704308712, "incorrect_loss_per_char": 0.6145706733306956, "correct_loss_per_token": 2.5565548458614864, "incorrect_loss_per_token": 2.8729369396384037, "correct_loss_uncond": -29.849609375, "incorrect_loss_uncond": -16.720239003499348}, "model_output": [{"sum_logits": -147.9709014892578, "num_tokens": 41, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -165.48092651367188, "logits_per_token": -3.609046377786776, "logits_per_char": -0.7549535790268256, "num_chars": 196}, {"sum_logits": -94.592529296875, "num_tokens": 37, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -124.442138671875, "logits_per_token": -2.5565548458614864, "logits_per_char": -0.7166100704308712, "num_chars": 132}, {"sum_logits": -116.3319320678711, "num_tokens": 41, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -122.11809539794922, "logits_per_token": -2.8373641967773438, "logits_per_char": -0.5965740106044671, "num_chars": 195}, {"sum_logits": -62.99960708618164, "num_tokens": 29, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -89.8641357421875, "logits_per_token": -2.172400244351091, "logits_per_char": -0.49218443036079407, "num_chars": 128}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 918, "native_id": 13220, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 68.86642456054688, "incorrect_loss_raw": 89.8668950398763, "correct_loss_per_char": 0.43041515350341797, "incorrect_loss_per_char": 0.49879329302199227, "correct_loss_per_token": 2.15207576751709, "incorrect_loss_per_token": 2.4665594197203418, "correct_loss_uncond": -30.859085083007812, "incorrect_loss_uncond": -23.451853434244793}, "model_output": [{"sum_logits": -82.39823913574219, "num_tokens": 31, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -95.81669616699219, "logits_per_token": -2.6580077140561995, "logits_per_char": -0.5117903051909453, "num_chars": 161}, {"sum_logits": -68.86642456054688, "num_tokens": 32, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -99.72550964355469, "logits_per_token": -2.15207576751709, "logits_per_char": -0.43041515350341797, "num_chars": 160}, {"sum_logits": -87.5907974243164, "num_tokens": 34, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -104.0533676147461, "logits_per_token": -2.5761999242446003, "logits_per_char": -0.5276554061705807, "num_chars": 166}, {"sum_logits": -99.61164855957031, "num_tokens": 46, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -140.086181640625, "logits_per_token": -2.165470620860224, "logits_per_char": -0.456934167704451, "num_chars": 218}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 919, "native_id": 7286, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 120.12876892089844, "incorrect_loss_raw": 117.49111938476562, "correct_loss_per_char": 0.5775421582735502, "incorrect_loss_per_char": 0.5608389538446109, "correct_loss_per_token": 3.080224844125601, "incorrect_loss_per_token": 2.487099537333689, "correct_loss_uncond": -18.682342529296875, "incorrect_loss_uncond": -15.27228037516276}, "model_output": [{"sum_logits": -88.69149780273438, "num_tokens": 42, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -100.07862091064453, "logits_per_token": -2.111702328636533, "logits_per_char": -0.4820190097974694, "num_chars": 184}, {"sum_logits": -120.12876892089844, "num_tokens": 39, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -138.8111114501953, "logits_per_token": -3.080224844125601, "logits_per_char": -0.5775421582735502, "num_chars": 208}, {"sum_logits": -118.40060424804688, "num_tokens": 43, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -129.67828369140625, "logits_per_token": -2.753502424373183, "logits_per_char": -0.6434815448263417, "num_chars": 184}, {"sum_logits": -145.38125610351562, "num_tokens": 56, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -168.53329467773438, "logits_per_token": -2.5960938589913503, "logits_per_char": -0.5570163069100216, "num_chars": 261}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 920, "native_id": 46393, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 40.23927688598633, "incorrect_loss_raw": 36.4029115041097, "correct_loss_per_char": 1.1835081437054802, "incorrect_loss_per_char": 1.2479451029207542, "correct_loss_per_token": 4.023927688598633, "incorrect_loss_per_token": 4.9204720643462325, "correct_loss_uncond": -13.337566375732422, "incorrect_loss_uncond": -3.529207229614258}, "model_output": [{"sum_logits": -40.23927688598633, "num_tokens": 10, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -53.57684326171875, "logits_per_token": -4.023927688598633, "logits_per_char": -1.1835081437054802, "num_chars": 34}, {"sum_logits": -44.94873809814453, "num_tokens": 7, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -40.23874282836914, "logits_per_token": -6.421248299734933, "logits_per_char": -1.549956486142915, "num_chars": 29}, {"sum_logits": -21.603029251098633, "num_tokens": 6, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -33.06915283203125, "logits_per_token": -3.6005048751831055, "logits_per_char": -0.9392621413521145, "num_chars": 23}, {"sum_logits": -42.65696716308594, "num_tokens": 9, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -46.488460540771484, "logits_per_token": -4.73966301812066, "logits_per_char": -1.2546166812672335, "num_chars": 34}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 921, "native_id": 14554, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 92.75592041015625, "incorrect_loss_raw": 135.31672159830728, "correct_loss_per_char": 0.6142776186103063, "incorrect_loss_per_char": 0.7094702213529076, "correct_loss_per_token": 2.506916767842061, "incorrect_loss_per_token": 3.139816268282193, "correct_loss_uncond": -47.087921142578125, "incorrect_loss_uncond": -21.8213628133138}, "model_output": [{"sum_logits": -82.63995361328125, "num_tokens": 37, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -105.99080657958984, "logits_per_token": -2.2335122598184123, "logits_per_char": -0.526369131294785, "num_chars": 157}, {"sum_logits": -172.8040771484375, "num_tokens": 42, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -191.70706176757812, "logits_per_token": -4.114382789248512, "logits_per_char": -0.9240859740558155, "num_chars": 187}, {"sum_logits": -150.50613403320312, "num_tokens": 49, "num_tokens_all": 496, "is_greedy": false, "sum_logits_uncond": -173.7163848876953, "logits_per_token": -3.0715537557796555, "logits_per_char": -0.6779555587081222, "num_chars": 222}, {"sum_logits": -92.75592041015625, "num_tokens": 37, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -139.84384155273438, "logits_per_token": -2.506916767842061, "logits_per_char": -0.6142776186103063, "num_chars": 151}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 922, "native_id": 50114, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 75.65264129638672, "incorrect_loss_raw": 123.35882822672527, "correct_loss_per_char": 0.5645719499730352, "incorrect_loss_per_char": 0.5480066481688807, "correct_loss_per_token": 1.891316032409668, "incorrect_loss_per_token": 2.5589881945742037, "correct_loss_uncond": -24.322471618652344, "incorrect_loss_uncond": -28.764874776204426}, "model_output": [{"sum_logits": -75.65264129638672, "num_tokens": 40, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -99.97511291503906, "logits_per_token": -1.891316032409668, "logits_per_char": -0.5645719499730352, "num_chars": 134}, {"sum_logits": -137.19119262695312, "num_tokens": 52, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -165.6581573486328, "logits_per_token": -2.6382921659029446, "logits_per_char": -0.5444094945514013, "num_chars": 252}, {"sum_logits": -112.406005859375, "num_tokens": 42, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -137.70263671875, "logits_per_token": -2.676333472842262, "logits_per_char": -0.5735000298947704, "num_chars": 196}, {"sum_logits": -120.47928619384766, "num_tokens": 51, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -153.01031494140625, "logits_per_token": -2.362338944977405, "logits_per_char": -0.5261104200604702, "num_chars": 229}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 923, "native_id": 4664, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 28.913694381713867, "incorrect_loss_raw": 32.98397572835287, "correct_loss_per_char": 0.6571294177662242, "incorrect_loss_per_char": 0.6107141451402144, "correct_loss_per_token": 2.891369438171387, "incorrect_loss_per_token": 2.7562948758970793, "correct_loss_uncond": -27.8227596282959, "incorrect_loss_uncond": -22.762872060139973}, "model_output": [{"sum_logits": -28.913694381713867, "num_tokens": 10, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -56.736454010009766, "logits_per_token": -2.891369438171387, "logits_per_char": -0.6571294177662242, "num_chars": 44}, {"sum_logits": -32.067081451416016, "num_tokens": 11, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -55.146705627441406, "logits_per_token": -2.9151892228560015, "logits_per_char": -0.6680641969045004, "num_chars": 48}, {"sum_logits": -34.32651138305664, "num_tokens": 13, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -57.544498443603516, "logits_per_token": -2.6405008756197414, "logits_per_char": -0.5721085230509441, "num_chars": 60}, {"sum_logits": -32.55833435058594, "num_tokens": 12, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -54.549339294433594, "logits_per_token": -2.7131945292154946, "logits_per_char": -0.5919697154651988, "num_chars": 55}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 924, "native_id": 8941, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 27.8962345123291, "incorrect_loss_raw": 71.17557144165039, "correct_loss_per_char": 0.5072042638605291, "incorrect_loss_per_char": 0.7609454993290082, "correct_loss_per_token": 2.5360213193026455, "incorrect_loss_per_token": 3.1763418272020574, "correct_loss_uncond": -20.98310661315918, "incorrect_loss_uncond": -28.479122161865234}, "model_output": [{"sum_logits": -100.71392822265625, "num_tokens": 27, "num_tokens_all": 442, "is_greedy": false, "sum_logits_uncond": -128.4740447998047, "logits_per_token": -3.730145489728009, "logits_per_char": -0.8323465142368285, "num_chars": 121}, {"sum_logits": -27.8962345123291, "num_tokens": 11, "num_tokens_all": 426, "is_greedy": false, "sum_logits_uncond": -48.87934112548828, "logits_per_token": -2.5360213193026455, "logits_per_char": -0.5072042638605291, "num_chars": 55}, {"sum_logits": -73.4459228515625, "num_tokens": 22, "num_tokens_all": 437, "is_greedy": false, "sum_logits_uncond": -101.60940551757812, "logits_per_token": -3.3384510387073862, "logits_per_char": -0.8540223587390988, "num_chars": 86}, {"sum_logits": -39.36686325073242, "num_tokens": 16, "num_tokens_all": 431, "is_greedy": false, "sum_logits_uncond": -68.88063049316406, "logits_per_token": -2.4604289531707764, "logits_per_char": -0.5964676250110973, "num_chars": 66}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 925, "native_id": 17663, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 114.67261505126953, "incorrect_loss_raw": 76.986083984375, "correct_loss_per_char": 0.8132809578104222, "incorrect_loss_per_char": 0.511173521357917, "correct_loss_per_token": 3.6991166145570817, "incorrect_loss_per_token": 2.170581648852011, "correct_loss_uncond": -28.50566864013672, "incorrect_loss_uncond": -36.40739949544271}, "model_output": [{"sum_logits": -91.78851318359375, "num_tokens": 39, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -128.66159057617188, "logits_per_token": -2.3535516200921474, "logits_per_char": -0.5562940192945076, "num_chars": 165}, {"sum_logits": -114.67261505126953, "num_tokens": 31, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -143.17828369140625, "logits_per_token": -3.6991166145570817, "logits_per_char": -0.8132809578104222, "num_chars": 141}, {"sum_logits": -66.27820587158203, "num_tokens": 34, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -118.30720520019531, "logits_per_token": -1.949358996223001, "logits_per_char": -0.460265318552653, "num_chars": 144}, {"sum_logits": -72.89153289794922, "num_tokens": 33, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -93.21165466308594, "logits_per_token": -2.2088343302408853, "logits_per_char": -0.5169612262265902, "num_chars": 141}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 926, "native_id": 16012, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 101.92146301269531, "incorrect_loss_raw": 85.1589126586914, "correct_loss_per_char": 0.7228472554091866, "incorrect_loss_per_char": 0.6713734086682783, "correct_loss_per_token": 2.8311517503526478, "incorrect_loss_per_token": 2.7295677050116924, "correct_loss_uncond": -27.626068115234375, "incorrect_loss_uncond": -34.030626932779946}, "model_output": [{"sum_logits": -59.758140563964844, "num_tokens": 25, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -104.53590393066406, "logits_per_token": -2.390325622558594, "logits_per_char": -0.5151563841721107, "num_chars": 116}, {"sum_logits": -101.92146301269531, "num_tokens": 36, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -129.5475311279297, "logits_per_token": -2.8311517503526478, "logits_per_char": -0.7228472554091866, "num_chars": 141}, {"sum_logits": -60.966827392578125, "num_tokens": 20, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -87.60055541992188, "logits_per_token": -3.0483413696289063, "logits_per_char": -0.8351620190764126, "num_chars": 73}, {"sum_logits": -134.75177001953125, "num_tokens": 49, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -165.43215942382812, "logits_per_token": -2.7500361228475767, "logits_per_char": -0.6638018227563116, "num_chars": 203}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 927, "native_id": 50463, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 118.08901977539062, "incorrect_loss_raw": 84.19977060953777, "correct_loss_per_char": 0.5046539306640625, "incorrect_loss_per_char": 0.53761328505758, "correct_loss_per_token": 2.409979995416135, "incorrect_loss_per_token": 2.2423011283190366, "correct_loss_uncond": -15.6346435546875, "incorrect_loss_uncond": -18.377176920572918}, "model_output": [{"sum_logits": -46.968849182128906, "num_tokens": 22, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -65.31037902832031, "logits_per_token": -2.1349476900967685, "logits_per_char": -0.5161411998036144, "num_chars": 91}, {"sum_logits": -119.5177230834961, "num_tokens": 49, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -137.34898376464844, "logits_per_token": -2.4391372057856344, "logits_per_char": -0.558494033100449, "num_chars": 214}, {"sum_logits": -86.11273956298828, "num_tokens": 40, "num_tokens_all": 454, "is_greedy": false, "sum_logits_uncond": -105.07147979736328, "logits_per_token": -2.152818489074707, "logits_per_char": -0.5382046222686767, "num_chars": 160}, {"sum_logits": -118.08901977539062, "num_tokens": 49, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -133.72366333007812, "logits_per_token": -2.409979995416135, "logits_per_char": -0.5046539306640625, "num_chars": 234}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 928, "native_id": 19891, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 100.7564697265625, "incorrect_loss_raw": 112.04578653971355, "correct_loss_per_char": 0.5388046509441845, "incorrect_loss_per_char": 0.6435346547759351, "correct_loss_per_token": 2.8787562779017857, "incorrect_loss_per_token": 3.047170229925625, "correct_loss_uncond": -16.599075317382812, "incorrect_loss_uncond": -13.762402852376303}, "model_output": [{"sum_logits": -100.7564697265625, "num_tokens": 35, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -117.35554504394531, "logits_per_token": -2.8787562779017857, "logits_per_char": -0.5388046509441845, "num_chars": 187}, {"sum_logits": -57.27314758300781, "num_tokens": 22, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -76.6368179321289, "logits_per_token": -2.6033248901367188, "logits_per_char": -0.6363683064778646, "num_chars": 90}, {"sum_logits": -145.9488983154297, "num_tokens": 40, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -161.4989013671875, "logits_per_token": -3.648722457885742, "logits_per_char": -0.7334115493237673, "num_chars": 199}, {"sum_logits": -132.91531372070312, "num_tokens": 46, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -139.28884887695312, "logits_per_token": -2.8894633417544155, "logits_per_char": -0.5608241085261735, "num_chars": 237}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 929, "native_id": 31286, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 86.9563217163086, "incorrect_loss_raw": 127.79915873209636, "correct_loss_per_char": 0.37972192889217726, "incorrect_loss_per_char": 0.594089866237335, "correct_loss_per_token": 1.7391264343261719, "incorrect_loss_per_token": 3.1300890317568175, "correct_loss_uncond": -28.49346160888672, "incorrect_loss_uncond": -26.553665161132812}, "model_output": [{"sum_logits": -79.86695861816406, "num_tokens": 26, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -111.0078125, "logits_per_token": -3.071806100698618, "logits_per_char": -0.5396416122848923, "num_chars": 148}, {"sum_logits": -161.4256591796875, "num_tokens": 55, "num_tokens_all": 494, "is_greedy": false, "sum_logits_uncond": -185.57791137695312, "logits_per_token": -2.9350119850852274, "logits_per_char": -0.5785865920418907, "num_chars": 279}, {"sum_logits": -142.1048583984375, "num_tokens": 42, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -166.47274780273438, "logits_per_token": -3.383449009486607, "logits_per_char": -0.6640413943852219, "num_chars": 214}, {"sum_logits": -86.9563217163086, "num_tokens": 50, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -115.44978332519531, "logits_per_token": -1.7391264343261719, "logits_per_char": -0.37972192889217726, "num_chars": 229}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 930, "native_id": 31057, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 21.693580627441406, "incorrect_loss_raw": 42.35309092203776, "correct_loss_per_char": 0.5045018750567769, "incorrect_loss_per_char": 0.6751851277802121, "correct_loss_per_token": 2.1693580627441404, "incorrect_loss_per_token": 3.0429234758478585, "correct_loss_uncond": -27.703567504882812, "incorrect_loss_uncond": -23.17468516031901}, "model_output": [{"sum_logits": -42.719032287597656, "num_tokens": 12, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -59.01549530029297, "logits_per_token": -3.5599193572998047, "logits_per_char": -0.8543806457519532, "num_chars": 50}, {"sum_logits": -48.20772933959961, "num_tokens": 14, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -76.683837890625, "logits_per_token": -3.4434092385428294, "logits_per_char": -0.7195183483522329, "num_chars": 67}, {"sum_logits": -36.132511138916016, "num_tokens": 17, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -60.883995056152344, "logits_per_token": -2.1254418317009423, "logits_per_char": -0.4516563892364502, "num_chars": 80}, {"sum_logits": -21.693580627441406, "num_tokens": 10, "num_tokens_all": 406, "is_greedy": false, "sum_logits_uncond": -49.39714813232422, "logits_per_token": -2.1693580627441404, "logits_per_char": -0.5045018750567769, "num_chars": 43}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 931, "native_id": 29112, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 70.85763549804688, "incorrect_loss_raw": 100.60648091634114, "correct_loss_per_char": 0.43739281171633876, "incorrect_loss_per_char": 0.5335173299466084, "correct_loss_per_token": 2.285730177356351, "incorrect_loss_per_token": 2.6451041150843992, "correct_loss_uncond": -14.993255615234375, "incorrect_loss_uncond": -20.101476033528645}, "model_output": [{"sum_logits": -70.85763549804688, "num_tokens": 31, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -85.85089111328125, "logits_per_token": -2.285730177356351, "logits_per_char": -0.43739281171633876, "num_chars": 162}, {"sum_logits": -103.62411499023438, "num_tokens": 43, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -123.294921875, "logits_per_token": -2.409863139307776, "logits_per_char": -0.512990668268487, "num_chars": 202}, {"sum_logits": -95.13455200195312, "num_tokens": 33, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -114.05339813232422, "logits_per_token": -2.882865212180398, "logits_per_char": -0.5696679760595995, "num_chars": 167}, {"sum_logits": -103.06077575683594, "num_tokens": 39, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -124.77555084228516, "logits_per_token": -2.642583993765024, "logits_per_char": -0.5178933455117384, "num_chars": 199}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 932, "native_id": 19223, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 33.926910400390625, "incorrect_loss_raw": 58.95556767781576, "correct_loss_per_char": 0.6058376857212612, "incorrect_loss_per_char": 0.8188032228078562, "correct_loss_per_token": 3.3926910400390624, "incorrect_loss_per_token": 3.938993386089257, "correct_loss_uncond": -21.740550994873047, "incorrect_loss_uncond": -21.088115692138672}, "model_output": [{"sum_logits": -36.51683044433594, "num_tokens": 9, "num_tokens_all": 395, "is_greedy": false, "sum_logits_uncond": -60.29039764404297, "logits_per_token": -4.057425604926215, "logits_per_char": -0.7452414376395089, "num_chars": 49}, {"sum_logits": -53.13057327270508, "num_tokens": 14, "num_tokens_all": 400, "is_greedy": false, "sum_logits_uncond": -75.56334686279297, "logits_per_token": -3.795040948050363, "logits_per_char": -0.8301652073860168, "num_chars": 64}, {"sum_logits": -33.926910400390625, "num_tokens": 10, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -55.66746139526367, "logits_per_token": -3.3926910400390624, "logits_per_char": -0.6058376857212612, "num_chars": 56}, {"sum_logits": -87.21929931640625, "num_tokens": 22, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -104.27730560302734, "logits_per_token": -3.9645136052911933, "logits_per_char": -0.8810030233980429, "num_chars": 99}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 933, "native_id": 12147, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 72.21697235107422, "incorrect_loss_raw": 134.62852478027344, "correct_loss_per_char": 0.4980480851798222, "incorrect_loss_per_char": 0.6339170405715103, "correct_loss_per_token": 2.407232411702474, "incorrect_loss_per_token": 2.941177823250754, "correct_loss_uncond": -39.47527313232422, "incorrect_loss_uncond": -26.185508728027344}, "model_output": [{"sum_logits": -94.56065368652344, "num_tokens": 38, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -122.8493881225586, "logits_per_token": -2.4884382549085116, "logits_per_char": -0.5224345507542731, "num_chars": 181}, {"sum_logits": -72.21697235107422, "num_tokens": 30, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -111.69224548339844, "logits_per_token": -2.407232411702474, "logits_per_char": -0.4980480851798222, "num_chars": 145}, {"sum_logits": -211.01980590820312, "num_tokens": 69, "num_tokens_all": 501, "is_greedy": false, "sum_logits_uncond": -239.2880096435547, "logits_per_token": -3.058258056640625, "logits_per_char": -0.6918682160924693, "num_chars": 305}, {"sum_logits": -98.30511474609375, "num_tokens": 30, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -120.30470275878906, "logits_per_token": -3.276837158203125, "logits_per_char": -0.6874483548677884, "num_chars": 143}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 934, "native_id": 38957, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 106.03461456298828, "incorrect_loss_raw": 81.70117060343425, "correct_loss_per_char": 0.6059120832170759, "incorrect_loss_per_char": 0.6668840455960342, "correct_loss_per_token": 2.256055628999751, "incorrect_loss_per_token": 2.6240859864007278, "correct_loss_uncond": -33.838279724121094, "incorrect_loss_uncond": -20.333667755126953}, "model_output": [{"sum_logits": -106.03461456298828, "num_tokens": 47, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -139.87289428710938, "logits_per_token": -2.256055628999751, "logits_per_char": -0.6059120832170759, "num_chars": 175}, {"sum_logits": -70.74454498291016, "num_tokens": 43, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -98.75877380371094, "logits_per_token": -1.6452219763467477, "logits_per_char": -0.38870629111489097, "num_chars": 182}, {"sum_logits": -120.4083023071289, "num_tokens": 29, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -136.9530029296875, "logits_per_token": -4.1520104243837554, "logits_per_char": -1.0380026060959389, "num_chars": 116}, {"sum_logits": -53.95066452026367, "num_tokens": 26, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -70.39273834228516, "logits_per_token": -2.0750255584716797, "logits_per_char": -0.5739432395772731, "num_chars": 94}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 935, "native_id": 26680, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 26.89370346069336, "incorrect_loss_raw": 62.77865473429362, "correct_loss_per_char": 0.672342586517334, "incorrect_loss_per_char": 0.7440796254956114, "correct_loss_per_token": 2.444882132790305, "incorrect_loss_per_token": 3.3415579618567115, "correct_loss_uncond": -23.787246704101562, "incorrect_loss_uncond": -22.18233871459961}, "model_output": [{"sum_logits": -26.89370346069336, "num_tokens": 11, "num_tokens_all": 417, "is_greedy": false, "sum_logits_uncond": -50.68095016479492, "logits_per_token": -2.444882132790305, "logits_per_char": -0.672342586517334, "num_chars": 40}, {"sum_logits": -79.39411163330078, "num_tokens": 21, "num_tokens_all": 427, "is_greedy": false, "sum_logits_uncond": -107.83600616455078, "logits_per_token": -3.7806719825381325, "logits_per_char": -0.8629794742750085, "num_chars": 92}, {"sum_logits": -51.70231246948242, "num_tokens": 16, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -69.07511138916016, "logits_per_token": -3.2313945293426514, "logits_per_char": -0.7603281245512121, "num_chars": 68}, {"sum_logits": -57.239540100097656, "num_tokens": 19, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -77.97186279296875, "logits_per_token": -3.01260737368935, "logits_per_char": -0.6089312776606134, "num_chars": 94}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 936, "native_id": 15409, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 73.28950500488281, "incorrect_loss_raw": 181.31986490885416, "correct_loss_per_char": 0.6007336475810067, "incorrect_loss_per_char": 0.8295414160540598, "correct_loss_per_token": 2.714426111291956, "incorrect_loss_per_token": 3.741904448231869, "correct_loss_uncond": -26.430763244628906, "incorrect_loss_uncond": -15.848704020182291}, "model_output": [{"sum_logits": -174.44451904296875, "num_tokens": 43, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -188.85183715820312, "logits_per_token": -4.0568492800690406, "logits_per_char": -0.95324873793972, "num_chars": 183}, {"sum_logits": -166.5586395263672, "num_tokens": 51, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -183.23715209960938, "logits_per_token": -3.265855676987592, "logits_per_char": -0.6826173751080622, "num_chars": 244}, {"sum_logits": -73.28950500488281, "num_tokens": 27, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -99.72026824951172, "logits_per_token": -2.714426111291956, "logits_per_char": -0.6007336475810067, "num_chars": 122}, {"sum_logits": -202.95643615722656, "num_tokens": 52, "num_tokens_all": 480, "is_greedy": false, "sum_logits_uncond": -219.41671752929688, "logits_per_token": -3.9030083876389723, "logits_per_char": -0.8527581351143974, "num_chars": 238}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 937, "native_id": 34076, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 61.86299133300781, "incorrect_loss_raw": 49.79783630371094, "correct_loss_per_char": 0.5474601002921046, "incorrect_loss_per_char": 0.8126294897060201, "correct_loss_per_token": 2.6896952753481655, "incorrect_loss_per_token": 3.8354332994072986, "correct_loss_uncond": -32.3338623046875, "incorrect_loss_uncond": -23.29796854654948}, "model_output": [{"sum_logits": -35.759056091308594, "num_tokens": 9, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -47.086090087890625, "logits_per_token": -3.9732284545898438, "logits_per_char": -0.8939764022827148, "num_chars": 40}, {"sum_logits": -61.86299133300781, "num_tokens": 23, "num_tokens_all": 427, "is_greedy": false, "sum_logits_uncond": -94.19685363769531, "logits_per_token": -2.6896952753481655, "logits_per_char": -0.5474601002921046, "num_chars": 113}, {"sum_logits": -64.61840057373047, "num_tokens": 21, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -101.76896667480469, "logits_per_token": -3.0770666939871654, "logits_per_char": -0.6527111169063684, "num_chars": 99}, {"sum_logits": -49.01605224609375, "num_tokens": 11, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -70.43235778808594, "logits_per_token": -4.456004749644887, "logits_per_char": -0.8912009499289772, "num_chars": 55}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 938, "native_id": 23730, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 78.24266815185547, "incorrect_loss_raw": 51.3080940246582, "correct_loss_per_char": 0.5882907379838757, "incorrect_loss_per_char": 0.4731920530145819, "correct_loss_per_token": 2.37098994399562, "incorrect_loss_per_token": 1.9004288234842512, "correct_loss_uncond": -32.868316650390625, "incorrect_loss_uncond": -36.02411778767904}, "model_output": [{"sum_logits": -50.40571594238281, "num_tokens": 30, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -88.3216323852539, "logits_per_token": -1.6801905314127603, "logits_per_char": -0.4582337812943892, "num_chars": 110}, {"sum_logits": -63.92039489746094, "num_tokens": 33, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -103.99420166015625, "logits_per_token": -1.9369816635594224, "logits_per_char": -0.5113631591796876, "num_chars": 125}, {"sum_logits": -78.24266815185547, "num_tokens": 33, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -111.1109848022461, "logits_per_token": -2.37098994399562, "logits_per_char": -0.5882907379838757, "num_chars": 133}, {"sum_logits": -39.59817123413086, "num_tokens": 19, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -69.68080139160156, "logits_per_token": -2.0841142754805717, "logits_per_char": -0.4499792185696689, "num_chars": 88}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 939, "native_id": 8753, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 65.25687408447266, "incorrect_loss_raw": 84.3932622273763, "correct_loss_per_char": 0.4078554630279541, "incorrect_loss_per_char": 0.5982888055565828, "correct_loss_per_token": 1.763699299580342, "incorrect_loss_per_token": 2.8337987937901956, "correct_loss_uncond": -47.0582275390625, "incorrect_loss_uncond": -19.492586771647137}, "model_output": [{"sum_logits": -83.25694274902344, "num_tokens": 34, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -112.92718505859375, "logits_per_token": -2.4487336102653954, "logits_per_char": -0.54774304440147, "num_chars": 152}, {"sum_logits": -85.0475082397461, "num_tokens": 36, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -106.20076751708984, "logits_per_token": -2.3624307844373913, "logits_per_char": -0.515439443877249, "num_chars": 165}, {"sum_logits": -65.25687408447266, "num_tokens": 37, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -112.31510162353516, "logits_per_token": -1.763699299580342, "logits_per_char": -0.4078554630279541, "num_chars": 160}, {"sum_logits": -84.87533569335938, "num_tokens": 23, "num_tokens_all": 442, "is_greedy": false, "sum_logits_uncond": -92.52959442138672, "logits_per_token": -3.690231986667799, "logits_per_char": -0.7316839283910291, "num_chars": 116}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 940, "native_id": 9866, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 59.67041778564453, "incorrect_loss_raw": 41.549168268839516, "correct_loss_per_char": 0.5188731981360394, "incorrect_loss_per_char": 0.6149246170659869, "correct_loss_per_token": 2.21001547354239, "incorrect_loss_per_token": 2.9260488409990213, "correct_loss_uncond": -35.437416076660156, "incorrect_loss_uncond": -25.9616756439209}, "model_output": [{"sum_logits": -26.87232780456543, "num_tokens": 13, "num_tokens_all": 441, "is_greedy": false, "sum_logits_uncond": -59.556312561035156, "logits_per_token": -2.0671021388127255, "logits_per_char": -0.4714443474485163, "num_chars": 57}, {"sum_logits": -59.67041778564453, "num_tokens": 27, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -95.10783386230469, "logits_per_token": -2.21001547354239, "logits_per_char": -0.5188731981360394, "num_chars": 115}, {"sum_logits": -40.46684265136719, "num_tokens": 14, "num_tokens_all": 442, "is_greedy": false, "sum_logits_uncond": -56.259498596191406, "logits_per_token": -2.890488760811942, "logits_per_char": -0.6744473775227865, "num_chars": 60}, {"sum_logits": -57.30833435058594, "num_tokens": 15, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -86.71672058105469, "logits_per_token": -3.8205556233723956, "logits_per_char": -0.6988821262266578, "num_chars": 82}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 941, "native_id": 21727, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 122.76876831054688, "incorrect_loss_raw": 143.3169148763021, "correct_loss_per_char": 0.5874103746916118, "incorrect_loss_per_char": 0.6566793333288853, "correct_loss_per_token": 2.4553753662109377, "incorrect_loss_per_token": 2.7127740150952975, "correct_loss_uncond": -25.4095458984375, "incorrect_loss_uncond": -12.751439412434896}, "model_output": [{"sum_logits": -146.8865203857422, "num_tokens": 47, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -161.1375732421875, "logits_per_token": -3.125245114590259, "logits_per_char": -0.8115277369378021, "num_chars": 181}, {"sum_logits": -137.23899841308594, "num_tokens": 54, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -147.5174560546875, "logits_per_token": -2.5414629335756653, "logits_per_char": -0.6019254316363418, "num_chars": 228}, {"sum_logits": -122.76876831054688, "num_tokens": 50, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -148.17831420898438, "logits_per_token": -2.4553753662109377, "logits_per_char": -0.5874103746916118, "num_chars": 209}, {"sum_logits": -145.82522583007812, "num_tokens": 59, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -159.55003356933594, "logits_per_token": -2.4716139971199684, "logits_per_char": -0.5565848314125119, "num_chars": 262}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 942, "native_id": 1517, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 25.534618377685547, "incorrect_loss_raw": 22.352285385131836, "correct_loss_per_char": 0.9119506563459124, "incorrect_loss_per_char": 0.6286831548891714, "correct_loss_per_token": 3.6478026253836497, "incorrect_loss_per_token": 2.5520688548232573, "correct_loss_uncond": -24.362186431884766, "incorrect_loss_uncond": -26.42197863260905}, "model_output": [{"sum_logits": -32.17664337158203, "num_tokens": 11, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -53.86664962768555, "logits_per_token": -2.9251493974165483, "logits_per_char": -0.7661105564662388, "num_chars": 42}, {"sum_logits": -14.102500915527344, "num_tokens": 8, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -43.18669891357422, "logits_per_token": -1.762812614440918, "logits_per_char": -0.4273485125917377, "num_chars": 33}, {"sum_logits": -20.777711868286133, "num_tokens": 7, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -49.26944351196289, "logits_per_token": -2.9682445526123047, "logits_per_char": -0.6925903956095377, "num_chars": 30}, {"sum_logits": -25.534618377685547, "num_tokens": 7, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -49.89680480957031, "logits_per_token": -3.6478026253836497, "logits_per_char": -0.9119506563459124, "num_chars": 28}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 943, "native_id": 25238, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 79.74424743652344, "incorrect_loss_raw": 115.52076466878255, "correct_loss_per_char": 0.5655620385569038, "incorrect_loss_per_char": 0.7219303395199943, "correct_loss_per_token": 2.0447242932441907, "incorrect_loss_per_token": 3.3327290772675746, "correct_loss_uncond": -56.83488464355469, "incorrect_loss_uncond": -27.30450693766276}, "model_output": [{"sum_logits": -103.33219909667969, "num_tokens": 40, "num_tokens_all": 501, "is_greedy": false, "sum_logits_uncond": -148.6036376953125, "logits_per_token": -2.5833049774169923, "logits_per_char": -0.5708961276059651, "num_chars": 181}, {"sum_logits": -133.373779296875, "num_tokens": 30, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -148.78363037109375, "logits_per_token": -4.445792643229167, "logits_per_char": -0.9879539207175926, "num_chars": 135}, {"sum_logits": -79.74424743652344, "num_tokens": 39, "num_tokens_all": 500, "is_greedy": false, "sum_logits_uncond": -136.57913208007812, "logits_per_token": -2.0447242932441907, "logits_per_char": -0.5655620385569038, "num_chars": 141}, {"sum_logits": -109.85631561279297, "num_tokens": 37, "num_tokens_all": 498, "is_greedy": false, "sum_logits_uncond": -131.0885467529297, "logits_per_token": -2.9690896111565666, "logits_per_char": -0.6069409702364252, "num_chars": 181}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 944, "native_id": 6407, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.919634819030762, "incorrect_loss_raw": 41.38204574584961, "correct_loss_per_char": 0.49690903150118315, "incorrect_loss_per_char": 0.7109778184157151, "correct_loss_per_token": 1.8456621170043945, "incorrect_loss_per_token": 2.972773701536889, "correct_loss_uncond": -29.208638191223145, "incorrect_loss_uncond": -24.984210968017578}, "model_output": [{"sum_logits": -12.919634819030762, "num_tokens": 7, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -42.128273010253906, "logits_per_token": -1.8456621170043945, "logits_per_char": -0.49690903150118315, "num_chars": 26}, {"sum_logits": -21.62636947631836, "num_tokens": 12, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -55.98284912109375, "logits_per_token": -1.8021974563598633, "logits_per_char": -0.4805859883626302, "num_chars": 45}, {"sum_logits": -76.15643310546875, "num_tokens": 17, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -96.60377502441406, "logits_per_token": -4.479790182674632, "logits_per_char": -0.9763645269931891, "num_chars": 78}, {"sum_logits": -26.36333465576172, "num_tokens": 10, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -46.51214599609375, "logits_per_token": -2.636333465576172, "logits_per_char": -0.6759829398913261, "num_chars": 39}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 945, "native_id": 3028, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 120.92185974121094, "incorrect_loss_raw": 117.36630503336589, "correct_loss_per_char": 0.48368743896484373, "incorrect_loss_per_char": 0.5606653645104597, "correct_loss_per_token": 2.418437194824219, "incorrect_loss_per_token": 2.8643001735719857, "correct_loss_uncond": -21.549942016601562, "incorrect_loss_uncond": -43.01134999593099}, "model_output": [{"sum_logits": -177.30148315429688, "num_tokens": 54, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -231.031005859375, "logits_per_token": -3.283360799153646, "logits_per_char": -0.6898890395108828, "num_chars": 257}, {"sum_logits": -104.99828338623047, "num_tokens": 40, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -149.45774841308594, "logits_per_token": -2.6249570846557617, "logits_per_char": -0.4468012058988531, "num_chars": 235}, {"sum_logits": -69.79914855957031, "num_tokens": 26, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -100.64421081542969, "logits_per_token": -2.6845826369065504, "logits_per_char": -0.5453058481216431, "num_chars": 128}, {"sum_logits": -120.92185974121094, "num_tokens": 50, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -142.4718017578125, "logits_per_token": -2.418437194824219, "logits_per_char": -0.48368743896484373, "num_chars": 250}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 946, "native_id": 40220, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 80.98249816894531, "incorrect_loss_raw": 133.82433064778647, "correct_loss_per_char": 0.5784464154924666, "incorrect_loss_per_char": 0.6983939953630055, "correct_loss_per_token": 2.6994166056315105, "incorrect_loss_per_token": 3.378262038870105, "correct_loss_uncond": -23.273056030273438, "incorrect_loss_uncond": -5.8149973551432295}, "model_output": [{"sum_logits": -142.742431640625, "num_tokens": 41, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -146.54945373535156, "logits_per_token": -3.4815227229420733, "logits_per_char": -0.6701522612235915, "num_chars": 213}, {"sum_logits": -139.03158569335938, "num_tokens": 43, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -147.60629272460938, "logits_per_token": -3.2332926905432413, "logits_per_char": -0.6951579284667969, "num_chars": 200}, {"sum_logits": -80.98249816894531, "num_tokens": 30, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -104.25555419921875, "logits_per_token": -2.6994166056315105, "logits_per_char": -0.5784464154924666, "num_chars": 140}, {"sum_logits": -119.698974609375, "num_tokens": 35, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -124.76223754882812, "logits_per_token": -3.419970703125, "logits_per_char": -0.7298717963986281, "num_chars": 164}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 947, "native_id": 43317, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 76.27598571777344, "incorrect_loss_raw": 79.6879399617513, "correct_loss_per_char": 0.630380047254326, "incorrect_loss_per_char": 0.6622189801858195, "correct_loss_per_token": 3.3163472051205845, "incorrect_loss_per_token": 3.0829881465426716, "correct_loss_uncond": -23.44140625, "incorrect_loss_uncond": -16.591206868489582}, "model_output": [{"sum_logits": -91.53685760498047, "num_tokens": 34, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -107.9671401977539, "logits_per_token": -2.6922605177935433, "logits_per_char": -0.526073894281497, "num_chars": 174}, {"sum_logits": -76.27598571777344, "num_tokens": 23, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -99.71739196777344, "logits_per_token": -3.3163472051205845, "logits_per_char": -0.630380047254326, "num_chars": 121}, {"sum_logits": -72.09901428222656, "num_tokens": 22, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -92.46060180664062, "logits_per_token": -3.277227921919389, "logits_per_char": -0.7282728715376421, "num_chars": 99}, {"sum_logits": -75.42794799804688, "num_tokens": 23, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -88.40969848632812, "logits_per_token": -3.2794759999150815, "logits_per_char": -0.7323101747383192, "num_chars": 103}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 948, "native_id": 15004, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 31.945812225341797, "incorrect_loss_raw": 34.27021725972494, "correct_loss_per_char": 0.37583308500402113, "incorrect_loss_per_char": 0.7966520611284557, "correct_loss_per_token": 1.8791654250201058, "incorrect_loss_per_token": 3.3716099891827738, "correct_loss_uncond": -42.31572341918945, "incorrect_loss_uncond": -16.799645105997723}, "model_output": [{"sum_logits": -31.945812225341797, "num_tokens": 17, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -74.26153564453125, "logits_per_token": -1.8791654250201058, "logits_per_char": -0.37583308500402113, "num_chars": 85}, {"sum_logits": -43.45476150512695, "num_tokens": 11, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -60.617637634277344, "logits_per_token": -3.95043286410245, "logits_per_char": -0.9876082160256126, "num_chars": 44}, {"sum_logits": -38.89226531982422, "num_tokens": 12, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -54.457908630371094, "logits_per_token": -3.2410221099853516, "logits_per_char": -0.7202271355523003, "num_chars": 54}, {"sum_logits": -20.463624954223633, "num_tokens": 7, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -38.13404083251953, "logits_per_token": -2.923374993460519, "logits_per_char": -0.6821208318074544, "num_chars": 30}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 949, "native_id": 14925, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 26.948780059814453, "incorrect_loss_raw": 51.12583033243815, "correct_loss_per_char": 0.41459661630483774, "incorrect_loss_per_char": 0.7161703814838764, "correct_loss_per_token": 2.2457316716512046, "incorrect_loss_per_token": 3.0169632485038353, "correct_loss_uncond": -42.88869857788086, "incorrect_loss_uncond": -19.497419993082683}, "model_output": [{"sum_logits": -68.8280029296875, "num_tokens": 19, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -78.32868957519531, "logits_per_token": -3.6225264699835527, "logits_per_char": -0.9056316174958882, "num_chars": 76}, {"sum_logits": -34.56486511230469, "num_tokens": 15, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -56.865386962890625, "logits_per_token": -2.3043243408203127, "logits_per_char": -0.5486486525762648, "num_chars": 63}, {"sum_logits": -26.948780059814453, "num_tokens": 12, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -69.83747863769531, "logits_per_token": -2.2457316716512046, "logits_per_char": -0.41459661630483774, "num_chars": 65}, {"sum_logits": -49.984622955322266, "num_tokens": 16, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -76.67567443847656, "logits_per_token": -3.1240389347076416, "logits_per_char": -0.694230874379476, "num_chars": 72}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 950, "native_id": 38880, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 105.30369567871094, "incorrect_loss_raw": 84.84741465250652, "correct_loss_per_char": 0.5428025550449017, "incorrect_loss_per_char": 0.5961604537079909, "correct_loss_per_token": 2.448923155318859, "incorrect_loss_per_token": 2.6109608079148487, "correct_loss_uncond": -29.144393920898438, "incorrect_loss_uncond": -10.71441396077474}, "model_output": [{"sum_logits": -82.88221740722656, "num_tokens": 25, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -90.7795181274414, "logits_per_token": -3.3152886962890626, "logits_per_char": -0.7400197982788086, "num_chars": 112}, {"sum_logits": -105.30369567871094, "num_tokens": 43, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -134.44808959960938, "logits_per_token": -2.448923155318859, "logits_per_char": -0.5428025550449017, "num_chars": 194}, {"sum_logits": -87.92664337158203, "num_tokens": 39, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -99.14282989501953, "logits_per_token": -2.2545293172200522, "logits_per_char": -0.552997757053975, "num_chars": 159}, {"sum_logits": -83.73338317871094, "num_tokens": 37, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -96.76313781738281, "logits_per_token": -2.263064410235431, "logits_per_char": -0.495463805791189, "num_chars": 169}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 951, "native_id": 21221, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 49.381134033203125, "incorrect_loss_raw": 108.97458140055339, "correct_loss_per_char": 0.4572327225296586, "incorrect_loss_per_char": 0.6958320080131729, "correct_loss_per_token": 2.0575472513834634, "incorrect_loss_per_token": 2.938937897869419, "correct_loss_uncond": -28.657257080078125, "incorrect_loss_uncond": -24.3944574991862}, "model_output": [{"sum_logits": -135.08499145507812, "num_tokens": 42, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -159.1945343017578, "logits_per_token": -3.216309320359003, "logits_per_char": -0.7463259196413156, "num_chars": 181}, {"sum_logits": -111.89553833007812, "num_tokens": 32, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -125.66456604003906, "logits_per_token": -3.4967355728149414, "logits_per_char": -0.8476934721975615, "num_chars": 132}, {"sum_logits": -49.381134033203125, "num_tokens": 24, "num_tokens_all": 435, "is_greedy": false, "sum_logits_uncond": -78.03839111328125, "logits_per_token": -2.0575472513834634, "logits_per_char": -0.4572327225296586, "num_chars": 108}, {"sum_logits": -79.9432144165039, "num_tokens": 38, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -115.24801635742188, "logits_per_token": -2.103768800434313, "logits_per_char": -0.4934766322006414, "num_chars": 162}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 952, "native_id": 20186, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 75.58289337158203, "incorrect_loss_raw": 138.89826456705728, "correct_loss_per_char": 0.45807814164595173, "incorrect_loss_per_char": 0.695605168882223, "correct_loss_per_token": 1.9380229069636419, "incorrect_loss_per_token": 3.4138390098134757, "correct_loss_uncond": -13.026679992675781, "incorrect_loss_uncond": -21.674397786458332}, "model_output": [{"sum_logits": -75.58289337158203, "num_tokens": 39, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -88.60957336425781, "logits_per_token": -1.9380229069636419, "logits_per_char": -0.45807814164595173, "num_chars": 165}, {"sum_logits": -132.1270751953125, "num_tokens": 35, "num_tokens_all": 481, "is_greedy": false, "sum_logits_uncond": -158.43948364257812, "logits_per_token": -3.775059291294643, "logits_per_char": -0.734039306640625, "num_chars": 180}, {"sum_logits": -135.33804321289062, "num_tokens": 42, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -153.0474853515625, "logits_per_token": -3.2223343622116816, "logits_per_char": -0.7123054905941612, "num_chars": 190}, {"sum_logits": -149.22967529296875, "num_tokens": 46, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -170.23101806640625, "logits_per_token": -3.244123375934103, "logits_per_char": -0.640470709411883, "num_chars": 233}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 953, "native_id": 35360, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 84.36226654052734, "incorrect_loss_raw": 101.20288594563802, "correct_loss_per_char": 0.6025876181466239, "incorrect_loss_per_char": 0.8238213657376465, "correct_loss_per_token": 2.6363208293914795, "incorrect_loss_per_token": 3.5839002581610195, "correct_loss_uncond": -18.06352996826172, "incorrect_loss_uncond": -11.004158020019531}, "model_output": [{"sum_logits": -84.36226654052734, "num_tokens": 32, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -102.42579650878906, "logits_per_token": -2.6363208293914795, "logits_per_char": -0.6025876181466239, "num_chars": 140}, {"sum_logits": -100.35128784179688, "num_tokens": 23, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -119.15723419189453, "logits_per_token": -4.363099471382473, "logits_per_char": -0.9838361553117341, "num_chars": 102}, {"sum_logits": -116.11289978027344, "num_tokens": 40, "num_tokens_all": 491, "is_greedy": false, "sum_logits_uncond": -131.82192993164062, "logits_per_token": -2.902822494506836, "logits_per_char": -0.749115482453377, "num_chars": 155}, {"sum_logits": -87.14447021484375, "num_tokens": 25, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -85.6419677734375, "logits_per_token": -3.48577880859375, "logits_per_char": -0.7385124594478284, "num_chars": 118}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 954, "native_id": 14141, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 54.01493453979492, "incorrect_loss_raw": 47.0248769124349, "correct_loss_per_char": 0.6587187138999381, "incorrect_loss_per_char": 0.7562036865069025, "correct_loss_per_token": 3.3759334087371826, "incorrect_loss_per_token": 3.4318636107066323, "correct_loss_uncond": -23.624347686767578, "incorrect_loss_uncond": -27.28101348876953}, "model_output": [{"sum_logits": -50.673004150390625, "num_tokens": 12, "num_tokens_all": 407, "is_greedy": false, "sum_logits_uncond": -70.37348175048828, "logits_per_token": -4.222750345865886, "logits_per_char": -1.0556875864664714, "num_chars": 48}, {"sum_logits": -54.01493453979492, "num_tokens": 16, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -77.6392822265625, "logits_per_token": -3.3759334087371826, "logits_per_char": -0.6587187138999381, "num_chars": 82}, {"sum_logits": -43.05487823486328, "num_tokens": 16, "num_tokens_all": 411, "is_greedy": false, "sum_logits_uncond": -78.59490966796875, "logits_per_token": -2.690929889678955, "logits_per_char": -0.47313053005344263, "num_chars": 91}, {"sum_logits": -47.34674835205078, "num_tokens": 14, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -73.94927978515625, "logits_per_token": -3.381910596575056, "logits_per_char": -0.7397929430007935, "num_chars": 64}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 955, "native_id": 114, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 26.795433044433594, "incorrect_loss_raw": 22.297977447509766, "correct_loss_per_char": 0.6231496056845022, "incorrect_loss_per_char": 1.1504966808371224, "correct_loss_per_token": 2.679543304443359, "incorrect_loss_per_token": 3.999484434581938, "correct_loss_uncond": -22.326156616210938, "incorrect_loss_uncond": -11.985208511352539}, "model_output": [{"sum_logits": -16.538230895996094, "num_tokens": 5, "num_tokens_all": 389, "is_greedy": false, "sum_logits_uncond": -28.23090934753418, "logits_per_token": -3.3076461791992187, "logits_per_char": -0.972837111529182, "num_chars": 17}, {"sum_logits": -26.199871063232422, "num_tokens": 5, "num_tokens_all": 389, "is_greedy": false, "sum_logits_uncond": -33.5797004699707, "logits_per_token": -5.2399742126464846, "logits_per_char": -1.7466580708821615, "num_chars": 15}, {"sum_logits": -26.795433044433594, "num_tokens": 10, "num_tokens_all": 394, "is_greedy": false, "sum_logits_uncond": -49.12158966064453, "logits_per_token": -2.679543304443359, "logits_per_char": -0.6231496056845022, "num_chars": 43}, {"sum_logits": -24.15583038330078, "num_tokens": 7, "num_tokens_all": 391, "is_greedy": false, "sum_logits_uncond": -41.03894805908203, "logits_per_token": -3.4508329119001115, "logits_per_char": -0.7319948601000237, "num_chars": 33}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 956, "native_id": 41055, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 101.16206359863281, "incorrect_loss_raw": 117.36509958902995, "correct_loss_per_char": 0.5881515325501908, "incorrect_loss_per_char": 0.6034432715112036, "correct_loss_per_token": 2.734109826990076, "incorrect_loss_per_token": 2.9062182512786787, "correct_loss_uncond": -25.346176147460938, "incorrect_loss_uncond": -22.83763885498047}, "model_output": [{"sum_logits": -73.3213119506836, "num_tokens": 30, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -88.47857666015625, "logits_per_token": -2.444043731689453, "logits_per_char": -0.4700084099402794, "num_chars": 156}, {"sum_logits": -101.16206359863281, "num_tokens": 37, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -126.50823974609375, "logits_per_token": -2.734109826990076, "logits_per_char": -0.5881515325501908, "num_chars": 172}, {"sum_logits": -190.0363311767578, "num_tokens": 53, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -224.29336547851562, "logits_per_token": -3.585591154278449, "logits_per_char": -0.8121210734049479, "num_chars": 234}, {"sum_logits": -88.73765563964844, "num_tokens": 33, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -107.83627319335938, "logits_per_token": -2.6890198678681343, "logits_per_char": -0.5282003311883836, "num_chars": 168}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 957, "native_id": 2809, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 138.02651977539062, "incorrect_loss_raw": 133.52582550048828, "correct_loss_per_char": 0.4710802722709578, "incorrect_loss_per_char": 0.6232565475027266, "correct_loss_per_token": 2.509573086825284, "incorrect_loss_per_token": 2.99587459958493, "correct_loss_uncond": -15.387786865234375, "incorrect_loss_uncond": -10.519831339518229}, "model_output": [{"sum_logits": -213.10107421875, "num_tokens": 64, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -225.54010009765625, "logits_per_token": -3.3297042846679688, "logits_per_char": -0.6874228200604838, "num_chars": 310}, {"sum_logits": -138.02651977539062, "num_tokens": 55, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -153.414306640625, "logits_per_token": -2.509573086825284, "logits_per_char": -0.4710802722709578, "num_chars": 293}, {"sum_logits": -70.50206756591797, "num_tokens": 24, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -83.69833374023438, "logits_per_token": -2.9375861485799155, "logits_per_char": -0.5974751488637116, "num_chars": 118}, {"sum_logits": -116.97433471679688, "num_tokens": 43, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -122.8985366821289, "logits_per_token": -2.720333365506904, "logits_per_char": -0.5848716735839844, "num_chars": 200}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 958, "native_id": 47452, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 108.92459869384766, "incorrect_loss_raw": 86.99410756429036, "correct_loss_per_char": 0.4756532694054483, "incorrect_loss_per_char": 0.5925122973741659, "correct_loss_per_token": 2.3679260585619057, "incorrect_loss_per_token": 2.781307584186918, "correct_loss_uncond": -26.90868377685547, "incorrect_loss_uncond": -22.82349141438802}, "model_output": [{"sum_logits": -108.92459869384766, "num_tokens": 46, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -135.83328247070312, "logits_per_token": -2.3679260585619057, "logits_per_char": -0.4756532694054483, "num_chars": 229}, {"sum_logits": -65.28448486328125, "num_tokens": 26, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -89.60225677490234, "logits_per_token": -2.5109417255108175, "logits_per_char": -0.5627972833041487, "num_chars": 116}, {"sum_logits": -55.653038024902344, "num_tokens": 21, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -72.64761352539062, "logits_per_token": -2.6501446678524925, "logits_per_char": -0.5510201784643797, "num_chars": 101}, {"sum_logits": -140.0447998046875, "num_tokens": 44, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -167.2029266357422, "logits_per_token": -3.1828363591974433, "logits_per_char": -0.6637194303539692, "num_chars": 211}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 959, "native_id": 30343, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 29.578210830688477, "incorrect_loss_raw": 40.186676025390625, "correct_loss_per_char": 0.6572935740152995, "incorrect_loss_per_char": 0.9983950788922445, "correct_loss_per_token": 2.464850902557373, "incorrect_loss_per_token": 4.343409084964108, "correct_loss_uncond": -27.798776626586914, "incorrect_loss_uncond": -9.97140375773112}, "model_output": [{"sum_logits": -35.376869201660156, "num_tokens": 10, "num_tokens_all": 395, "is_greedy": false, "sum_logits_uncond": -48.67332077026367, "logits_per_token": -3.5376869201660157, "logits_per_char": -0.8040197545831854, "num_chars": 44}, {"sum_logits": -33.66087341308594, "num_tokens": 7, "num_tokens_all": 392, "is_greedy": false, "sum_logits_uncond": -41.49085235595703, "logits_per_token": -4.808696201869419, "logits_per_char": -1.1607197728650323, "num_chars": 29}, {"sum_logits": -29.578210830688477, "num_tokens": 12, "num_tokens_all": 397, "is_greedy": false, "sum_logits_uncond": -57.37698745727539, "logits_per_token": -2.464850902557373, "logits_per_char": -0.6572935740152995, "num_chars": 45}, {"sum_logits": -51.52228546142578, "num_tokens": 11, "num_tokens_all": 396, "is_greedy": false, "sum_logits_uncond": -60.31006622314453, "logits_per_token": -4.68384413285689, "logits_per_char": -1.0304457092285155, "num_chars": 50}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 960, "native_id": 22761, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 108.23873138427734, "incorrect_loss_raw": 146.44447580973306, "correct_loss_per_char": 0.4312300055150492, "incorrect_loss_per_char": 0.8260324248556895, "correct_loss_per_token": 2.302951731580369, "incorrect_loss_per_token": 3.5754475392556753, "correct_loss_uncond": -36.02373504638672, "incorrect_loss_uncond": -16.307477315266926}, "model_output": [{"sum_logits": -125.15824127197266, "num_tokens": 50, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -143.292724609375, "logits_per_token": -2.503164825439453, "logits_per_char": -0.49469660581807373, "num_chars": 253}, {"sum_logits": -153.63648986816406, "num_tokens": 33, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -165.53787231445312, "logits_per_token": -4.655651208126184, "logits_per_char": -1.2002850770950317, "num_chars": 128}, {"sum_logits": -160.5386962890625, "num_tokens": 45, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -179.42526245117188, "logits_per_token": -3.567526584201389, "logits_per_char": -0.7831155916539634, "num_chars": 205}, {"sum_logits": -108.23873138427734, "num_tokens": 47, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -144.26246643066406, "logits_per_token": -2.302951731580369, "logits_per_char": -0.4312300055150492, "num_chars": 251}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 961, "native_id": 10040, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 98.36854553222656, "incorrect_loss_raw": 159.42131551106772, "correct_loss_per_char": 0.43914529255458284, "incorrect_loss_per_char": 0.7236764743230203, "correct_loss_per_token": 2.1384466420049253, "incorrect_loss_per_token": 3.3638717725081144, "correct_loss_uncond": -23.701248168945312, "incorrect_loss_uncond": -8.172108968098959}, "model_output": [{"sum_logits": -142.87579345703125, "num_tokens": 42, "num_tokens_all": 482, "is_greedy": false, "sum_logits_uncond": -145.813232421875, "logits_per_token": -3.4018046061197915, "logits_per_char": -0.66453857421875, "num_chars": 215}, {"sum_logits": -98.36854553222656, "num_tokens": 46, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -122.06979370117188, "logits_per_token": -2.1384466420049253, "logits_per_char": -0.43914529255458284, "num_chars": 224}, {"sum_logits": -148.704345703125, "num_tokens": 46, "num_tokens_all": 486, "is_greedy": false, "sum_logits_uncond": -166.7696075439453, "logits_per_token": -3.2327031674592392, "logits_per_char": -0.7253870522103658, "num_chars": 205}, {"sum_logits": -186.68380737304688, "num_tokens": 54, "num_tokens_all": 494, "is_greedy": false, "sum_logits_uncond": -190.1974334716797, "logits_per_token": -3.4571075439453125, "logits_per_char": -0.7811037965399451, "num_chars": 239}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 962, "native_id": 37244, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 51.82373809814453, "incorrect_loss_raw": 74.35857772827148, "correct_loss_per_char": 0.43186448415120443, "incorrect_loss_per_char": 0.5373595180651184, "correct_loss_per_token": 1.7870254516601562, "incorrect_loss_per_token": 2.3229993111746654, "correct_loss_uncond": -32.971900939941406, "incorrect_loss_uncond": -24.10093053181966}, "model_output": [{"sum_logits": -74.07215881347656, "num_tokens": 36, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -100.04987335205078, "logits_per_token": -2.0575599670410156, "logits_per_char": -0.5004875595505173, "num_chars": 148}, {"sum_logits": -57.241886138916016, "num_tokens": 25, "num_tokens_all": 441, "is_greedy": false, "sum_logits_uncond": -80.17018127441406, "logits_per_token": -2.289675445556641, "logits_per_char": -0.5451608203706287, "num_chars": 105}, {"sum_logits": -51.82373809814453, "num_tokens": 29, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -84.79563903808594, "logits_per_token": -1.7870254516601562, "logits_per_char": -0.43186448415120443, "num_chars": 120}, {"sum_logits": -91.76168823242188, "num_tokens": 35, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -115.1584701538086, "logits_per_token": -2.6217625209263393, "logits_per_char": -0.566430174274209, "num_chars": 162}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 963, "native_id": 43303, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 45.260780334472656, "incorrect_loss_raw": 57.644125620524086, "correct_loss_per_char": 0.38034269188632486, "incorrect_loss_per_char": 0.4670797320434848, "correct_loss_per_token": 1.6164564405168806, "incorrect_loss_per_token": 1.9979633974969833, "correct_loss_uncond": -30.274551391601562, "incorrect_loss_uncond": -24.77555211385091}, "model_output": [{"sum_logits": -45.260780334472656, "num_tokens": 28, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -75.53533172607422, "logits_per_token": -1.6164564405168806, "logits_per_char": -0.38034269188632486, "num_chars": 119}, {"sum_logits": -64.74720764160156, "num_tokens": 31, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -91.46112060546875, "logits_per_token": -2.088619601341986, "logits_per_char": -0.4726073550481866, "num_chars": 137}, {"sum_logits": -49.86100387573242, "num_tokens": 30, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -72.91744995117188, "logits_per_token": -1.662033462524414, "logits_per_char": -0.3835461836594802, "num_chars": 130}, {"sum_logits": -58.32416534423828, "num_tokens": 26, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -82.88046264648438, "logits_per_token": -2.2432371286245494, "logits_per_char": -0.5450856574227877, "num_chars": 107}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 964, "native_id": 4274, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 120.64976501464844, "incorrect_loss_raw": 73.40686289469402, "correct_loss_per_char": 0.6316741623803583, "incorrect_loss_per_char": 0.7286146629028537, "correct_loss_per_token": 2.5670162769074136, "incorrect_loss_per_token": 2.8513892616321836, "correct_loss_uncond": -37.61796569824219, "incorrect_loss_uncond": -21.006197611490887}, "model_output": [{"sum_logits": -91.39537811279297, "num_tokens": 40, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -125.37791442871094, "logits_per_token": -2.284884452819824, "logits_per_char": -0.5676731559800805, "num_chars": 161}, {"sum_logits": -52.548431396484375, "num_tokens": 17, "num_tokens_all": 449, "is_greedy": false, "sum_logits_uncond": -71.70884704589844, "logits_per_token": -3.0910841997931984, "logits_per_char": -0.7101139377903294, "num_chars": 74}, {"sum_logits": -76.27677917480469, "num_tokens": 24, "num_tokens_all": 456, "is_greedy": false, "sum_logits_uncond": -86.15242004394531, "logits_per_token": -3.178199132283529, "logits_per_char": -0.908056894938151, "num_chars": 84}, {"sum_logits": -120.64976501464844, "num_tokens": 47, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -158.26773071289062, "logits_per_token": -2.5670162769074136, "logits_per_char": -0.6316741623803583, "num_chars": 191}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 965, "native_id": 7057, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 62.94908142089844, "incorrect_loss_raw": 97.25639343261719, "correct_loss_per_char": 0.4141386935585423, "incorrect_loss_per_char": 0.6423501878441297, "correct_loss_per_token": 1.701326524889147, "incorrect_loss_per_token": 2.7930162234071987, "correct_loss_uncond": -19.060287475585938, "incorrect_loss_uncond": -24.828521728515625}, "model_output": [{"sum_logits": -100.98724365234375, "num_tokens": 34, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -120.76805877685547, "logits_per_token": -2.9702130485983456, "logits_per_char": -0.6823462408942145, "num_chars": 148}, {"sum_logits": -108.16998291015625, "num_tokens": 44, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -134.57473754882812, "logits_per_token": -2.4584087025035513, "logits_per_char": -0.6043015805036662, "num_chars": 179}, {"sum_logits": -62.94908142089844, "num_tokens": 37, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -82.00936889648438, "logits_per_token": -1.701326524889147, "logits_per_char": -0.4141386935585423, "num_chars": 152}, {"sum_logits": -82.61195373535156, "num_tokens": 28, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -110.91194915771484, "logits_per_token": -2.950426919119699, "logits_per_char": -0.6404027421345082, "num_chars": 129}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 966, "native_id": 12463, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 98.24691772460938, "incorrect_loss_raw": 92.41837310791016, "correct_loss_per_char": 0.5225899878968584, "incorrect_loss_per_char": 0.6045174954883087, "correct_loss_per_token": 2.4561729431152344, "incorrect_loss_per_token": 2.571384158508512, "correct_loss_uncond": -21.639488220214844, "incorrect_loss_uncond": -18.660171508789062}, "model_output": [{"sum_logits": -98.24691772460938, "num_tokens": 40, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -119.88640594482422, "logits_per_token": -2.4561729431152344, "logits_per_char": -0.5225899878968584, "num_chars": 188}, {"sum_logits": -71.74971008300781, "num_tokens": 41, "num_tokens_all": 494, "is_greedy": false, "sum_logits_uncond": -88.40455627441406, "logits_per_token": -1.7499929288538492, "logits_per_char": -0.44843568801879885, "num_chars": 160}, {"sum_logits": -97.07836151123047, "num_tokens": 32, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -121.84407806396484, "logits_per_token": -3.033698797225952, "logits_per_char": -0.6788696609176956, "num_chars": 143}, {"sum_logits": -108.42704772949219, "num_tokens": 37, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -122.98699951171875, "logits_per_token": -2.930460749445735, "logits_per_char": -0.6862471375284316, "num_chars": 158}], "label": 0, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 967, "native_id": 14877, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 17.39543914794922, "incorrect_loss_raw": 38.694037119547524, "correct_loss_per_char": 0.4141771225702195, "incorrect_loss_per_char": 0.6904361975820441, "correct_loss_per_token": 1.9328265719943576, "incorrect_loss_per_token": 2.9543443139343424, "correct_loss_uncond": -25.500469207763672, "incorrect_loss_uncond": -20.94406255086263}, "model_output": [{"sum_logits": -54.288856506347656, "num_tokens": 19, "num_tokens_all": 433, "is_greedy": false, "sum_logits_uncond": -78.15103912353516, "logits_per_token": -2.8573082371761926, "logits_per_char": -0.6786107063293457, "num_chars": 80}, {"sum_logits": -17.39543914794922, "num_tokens": 9, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -42.89590835571289, "logits_per_token": -1.9328265719943576, "logits_per_char": -0.4141771225702195, "num_chars": 42}, {"sum_logits": -42.57952880859375, "num_tokens": 11, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -58.0081787109375, "logits_per_token": -3.8708662553267046, "logits_per_char": -0.8870735168457031, "num_chars": 48}, {"sum_logits": -19.213726043701172, "num_tokens": 9, "num_tokens_all": 423, "is_greedy": false, "sum_logits_uncond": -42.75508117675781, "logits_per_token": -2.1348584493001304, "logits_per_char": -0.5056243695710835, "num_chars": 38}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 968, "native_id": 18164, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 19.971412658691406, "incorrect_loss_raw": 30.021381378173828, "correct_loss_per_char": 0.5706117902483259, "incorrect_loss_per_char": 0.5155140992311797, "correct_loss_per_token": 2.8530589512416293, "incorrect_loss_per_token": 2.2709375691210103, "correct_loss_uncond": -26.624927520751953, "incorrect_loss_uncond": -34.68954086303711}, "model_output": [{"sum_logits": -39.50432586669922, "num_tokens": 17, "num_tokens_all": 418, "is_greedy": false, "sum_logits_uncond": -85.53654479980469, "logits_per_token": -2.3237838745117188, "logits_per_char": -0.5197937614039371, "num_chars": 76}, {"sum_logits": -19.971412658691406, "num_tokens": 7, "num_tokens_all": 408, "is_greedy": false, "sum_logits_uncond": -46.59634017944336, "logits_per_token": -2.8530589512416293, "logits_per_char": -0.5706117902483259, "num_chars": 35}, {"sum_logits": -33.015316009521484, "num_tokens": 13, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -64.3904800415039, "logits_per_token": -2.5396396930401144, "logits_per_char": -0.4784828407177027, "num_chars": 69}, {"sum_logits": -17.54450225830078, "num_tokens": 9, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -44.20574188232422, "logits_per_token": -1.949389139811198, "logits_per_char": -0.5482656955718994, "num_chars": 32}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 969, "native_id": 50515, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 68.54122161865234, "incorrect_loss_raw": 99.19934844970703, "correct_loss_per_char": 0.49667551897574164, "incorrect_loss_per_char": 0.7371321443149689, "correct_loss_per_token": 2.077006715716738, "incorrect_loss_per_token": 2.997833218558469, "correct_loss_uncond": -24.191383361816406, "incorrect_loss_uncond": -18.047719319661457}, "model_output": [{"sum_logits": -95.78143310546875, "num_tokens": 33, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -108.73957061767578, "logits_per_token": -2.9024676698626894, "logits_per_char": -0.7311559779043416, "num_chars": 131}, {"sum_logits": -87.30269622802734, "num_tokens": 30, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -108.02491760253906, "logits_per_token": -2.910089874267578, "logits_per_char": -0.7461768908378406, "num_chars": 117}, {"sum_logits": -68.54122161865234, "num_tokens": 33, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -92.73260498046875, "logits_per_token": -2.077006715716738, "logits_per_char": -0.49667551897574164, "num_chars": 138}, {"sum_logits": -114.513916015625, "num_tokens": 36, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -134.97671508789062, "logits_per_token": -3.180942111545139, "logits_per_char": -0.7340635642027243, "num_chars": 156}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 970, "native_id": 11673, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 135.98121643066406, "incorrect_loss_raw": 163.1369832356771, "correct_loss_per_char": 0.5417578343851158, "incorrect_loss_per_char": 0.8162625293137177, "correct_loss_per_token": 2.5181706746419272, "incorrect_loss_per_token": 3.731187202935645, "correct_loss_uncond": -29.134567260742188, "incorrect_loss_uncond": -14.568781534830729}, "model_output": [{"sum_logits": -150.08831787109375, "num_tokens": 36, "num_tokens_all": 455, "is_greedy": false, "sum_logits_uncond": -158.5569610595703, "logits_per_token": -4.169119940863715, "logits_per_char": -0.9499260624752769, "num_chars": 158}, {"sum_logits": -135.98121643066406, "num_tokens": 54, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -165.11578369140625, "logits_per_token": -2.5181706746419272, "logits_per_char": -0.5417578343851158, "num_chars": 251}, {"sum_logits": -141.92919921875, "num_tokens": 45, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -158.13018798828125, "logits_per_token": -3.1539822048611112, "logits_per_char": -0.6694773548054245, "num_chars": 212}, {"sum_logits": -197.3934326171875, "num_tokens": 51, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -216.43014526367188, "logits_per_token": -3.8704594630821076, "logits_per_char": -0.8293841706604517, "num_chars": 238}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 971, "native_id": 16594, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 79.32784271240234, "incorrect_loss_raw": 133.98013051350912, "correct_loss_per_char": 0.48667388167118003, "incorrect_loss_per_char": 0.7016464955852076, "correct_loss_per_token": 2.4038740215879497, "incorrect_loss_per_token": 2.78999462093957, "correct_loss_uncond": -27.28321075439453, "incorrect_loss_uncond": -26.234878540039062}, "model_output": [{"sum_logits": -177.39984130859375, "num_tokens": 64, "num_tokens_all": 495, "is_greedy": false, "sum_logits_uncond": -209.47918701171875, "logits_per_token": -2.7718725204467773, "logits_per_char": -0.6075337031116225, "num_chars": 292}, {"sum_logits": -83.92504119873047, "num_tokens": 29, "num_tokens_all": 460, "is_greedy": false, "sum_logits_uncond": -104.29776763916016, "logits_per_token": -2.8939669378872575, "logits_per_char": -0.8148062252303929, "num_chars": 103}, {"sum_logits": -79.32784271240234, "num_tokens": 33, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -106.61105346679688, "logits_per_token": -2.4038740215879497, "logits_per_char": -0.48667388167118003, "num_chars": 163}, {"sum_logits": -140.61550903320312, "num_tokens": 52, "num_tokens_all": 483, "is_greedy": false, "sum_logits_uncond": -166.86807250976562, "logits_per_token": -2.7041444044846754, "logits_per_char": -0.6825995584136074, "num_chars": 206}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 972, "native_id": 17591, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 51.084320068359375, "incorrect_loss_raw": 102.26207987467448, "correct_loss_per_char": 0.37017623237941577, "incorrect_loss_per_char": 0.6512711760682871, "correct_loss_per_token": 1.8920118543836806, "incorrect_loss_per_token": 2.846620643179023, "correct_loss_uncond": -33.88336944580078, "incorrect_loss_uncond": -25.740503946940105}, "model_output": [{"sum_logits": -137.8607177734375, "num_tokens": 36, "num_tokens_all": 479, "is_greedy": false, "sum_logits_uncond": -158.93605041503906, "logits_per_token": -3.829464382595486, "logits_per_char": -0.9129848859168046, "num_chars": 151}, {"sum_logits": -51.084320068359375, "num_tokens": 27, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -84.96768951416016, "logits_per_token": -1.8920118543836806, "logits_per_char": -0.37017623237941577, "num_chars": 138}, {"sum_logits": -101.84457397460938, "num_tokens": 44, "num_tokens_all": 487, "is_greedy": false, "sum_logits_uncond": -131.0394744873047, "logits_per_token": -2.3146494085138496, "logits_per_char": -0.5041810592802445, "num_chars": 202}, {"sum_logits": -67.08094787597656, "num_tokens": 28, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -94.0322265625, "logits_per_token": -2.3957481384277344, "logits_per_char": -0.5366475830078125, "num_chars": 125}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 973, "native_id": 27450, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 128.9547119140625, "incorrect_loss_raw": 137.65865071614584, "correct_loss_per_char": 0.5057047526041667, "incorrect_loss_per_char": 0.701599134767803, "correct_loss_per_token": 2.8656602647569445, "incorrect_loss_per_token": 3.374739443258998, "correct_loss_uncond": -28.467941284179688, "incorrect_loss_uncond": -18.690338134765625}, "model_output": [{"sum_logits": -129.26821899414062, "num_tokens": 41, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -150.60699462890625, "logits_per_token": -3.1528833901009907, "logits_per_char": -0.6561838527621352, "num_chars": 197}, {"sum_logits": -128.9547119140625, "num_tokens": 45, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -157.4226531982422, "logits_per_token": -2.8656602647569445, "logits_per_char": -0.5057047526041667, "num_chars": 255}, {"sum_logits": -144.94691467285156, "num_tokens": 45, "num_tokens_all": 492, "is_greedy": false, "sum_logits_uncond": -166.33477783203125, "logits_per_token": -3.2210425482855904, "logits_per_char": -0.7105240915335861, "num_chars": 204}, {"sum_logits": -138.7608184814453, "num_tokens": 37, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -152.10519409179688, "logits_per_token": -3.750292391390414, "logits_per_char": -0.7380894600076878, "num_chars": 188}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 974, "native_id": 33638, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 133.8068389892578, "incorrect_loss_raw": 114.63009897867839, "correct_loss_per_char": 0.719391607469128, "incorrect_loss_per_char": 0.7538587350350943, "correct_loss_per_token": 2.573208442101112, "incorrect_loss_per_token": 3.224912537468805, "correct_loss_uncond": -28.80950927734375, "incorrect_loss_uncond": -30.0438969930013}, "model_output": [{"sum_logits": -164.3602294921875, "num_tokens": 36, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -184.92205810546875, "logits_per_token": -4.565561930338542, "logits_per_char": -1.0402546170391613, "num_chars": 158}, {"sum_logits": -105.0191879272461, "num_tokens": 40, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -143.385986328125, "logits_per_token": -2.6254796981811523, "logits_per_char": -0.6105766739956168, "num_chars": 172}, {"sum_logits": -133.8068389892578, "num_tokens": 52, "num_tokens_all": 489, "is_greedy": false, "sum_logits_uncond": -162.61634826660156, "logits_per_token": -2.573208442101112, "logits_per_char": -0.719391607469128, "num_chars": 186}, {"sum_logits": -74.51087951660156, "num_tokens": 30, "num_tokens_all": 467, "is_greedy": false, "sum_logits_uncond": -105.71394348144531, "logits_per_token": -2.4836959838867188, "logits_per_char": -0.6107449140705046, "num_chars": 122}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 975, "native_id": 31475, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 72.25425720214844, "incorrect_loss_raw": 83.72125244140625, "correct_loss_per_char": 0.5088327971982284, "incorrect_loss_per_char": 0.6813817068467486, "correct_loss_per_token": 1.9014278211091693, "incorrect_loss_per_token": 3.048785848378025, "correct_loss_uncond": -22.444122314453125, "incorrect_loss_uncond": -10.490275065104166}, "model_output": [{"sum_logits": -68.88517761230469, "num_tokens": 26, "num_tokens_all": 439, "is_greedy": false, "sum_logits_uncond": -82.95317840576172, "logits_per_token": -2.649429908165565, "logits_per_char": -0.5938377380371094, "num_chars": 116}, {"sum_logits": -80.74455261230469, "num_tokens": 23, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -93.7599105834961, "logits_per_token": -3.5106327222741167, "logits_per_char": -0.7145535629407495, "num_chars": 113}, {"sum_logits": -101.53402709960938, "num_tokens": 34, "num_tokens_all": 447, "is_greedy": false, "sum_logits_uncond": -105.92149353027344, "logits_per_token": -2.9862949146943936, "logits_per_char": -0.7357538195623867, "num_chars": 138}, {"sum_logits": -72.25425720214844, "num_tokens": 38, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -94.69837951660156, "logits_per_token": -1.9014278211091693, "logits_per_char": -0.5088327971982284, "num_chars": 142}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 976, "native_id": 26468, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 88.40308380126953, "incorrect_loss_raw": 137.33682250976562, "correct_loss_per_char": 0.39642638475905617, "incorrect_loss_per_char": 0.5844297757661511, "correct_loss_per_token": 2.210077095031738, "incorrect_loss_per_token": 3.0922405382683404, "correct_loss_uncond": -25.602500915527344, "incorrect_loss_uncond": -13.256065368652344}, "model_output": [{"sum_logits": -150.6162109375, "num_tokens": 49, "num_tokens_all": 493, "is_greedy": false, "sum_logits_uncond": -163.07977294921875, "logits_per_token": -3.0738002232142856, "logits_per_char": -0.5537360696231618, "num_chars": 272}, {"sum_logits": -88.40308380126953, "num_tokens": 40, "num_tokens_all": 484, "is_greedy": false, "sum_logits_uncond": -114.00558471679688, "logits_per_token": -2.210077095031738, "logits_per_char": -0.39642638475905617, "num_chars": 223}, {"sum_logits": -157.63522338867188, "num_tokens": 41, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -164.60208129882812, "logits_per_token": -3.844761546065168, "logits_per_char": -0.7542355186060855, "num_chars": 209}, {"sum_logits": -103.759033203125, "num_tokens": 44, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -124.09680938720703, "logits_per_token": -2.3581598455255683, "logits_per_char": -0.445317739069206, "num_chars": 233}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 977, "native_id": 41583, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 71.92292022705078, "incorrect_loss_raw": 98.83735656738281, "correct_loss_per_char": 0.5490299253973342, "incorrect_loss_per_char": 0.641016528860062, "correct_loss_per_token": 2.5686757223946706, "incorrect_loss_per_token": 2.77850054043196, "correct_loss_uncond": -14.735252380371094, "incorrect_loss_uncond": -15.840937296549479}, "model_output": [{"sum_logits": -103.28915405273438, "num_tokens": 32, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -120.80628967285156, "logits_per_token": -3.227786064147949, "logits_per_char": -0.7484721308169158, "num_chars": 138}, {"sum_logits": -115.11874389648438, "num_tokens": 42, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -129.87991333007812, "logits_per_token": -2.7409224737258184, "logits_per_char": -0.650388383595957, "num_chars": 177}, {"sum_logits": -78.10417175292969, "num_tokens": 33, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -93.34867858886719, "logits_per_token": -2.366793083422112, "logits_per_char": -0.5241890721673134, "num_chars": 149}, {"sum_logits": -71.92292022705078, "num_tokens": 28, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -86.65817260742188, "logits_per_token": -2.5686757223946706, "logits_per_char": -0.5490299253973342, "num_chars": 131}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 978, "native_id": 15578, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 91.78787994384766, "incorrect_loss_raw": 104.75240325927734, "correct_loss_per_char": 0.46124562785853096, "incorrect_loss_per_char": 0.6105420802772231, "correct_loss_per_token": 2.2946969985961916, "incorrect_loss_per_token": 2.8973038005220033, "correct_loss_uncond": -19.694496154785156, "incorrect_loss_uncond": -24.014193216959637}, "model_output": [{"sum_logits": -107.55513000488281, "num_tokens": 34, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -122.69966125488281, "logits_per_token": -3.1633861766142, "logits_per_char": -0.6939040645476311, "num_chars": 155}, {"sum_logits": -116.448974609375, "num_tokens": 43, "num_tokens_all": 477, "is_greedy": false, "sum_logits_uncond": -147.91085815429688, "logits_per_token": -2.7081156885901163, "logits_per_char": -0.5736402690116995, "num_chars": 203}, {"sum_logits": -90.25310516357422, "num_tokens": 32, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -115.68927001953125, "logits_per_token": -2.8204095363616943, "logits_per_char": -0.5640819072723389, "num_chars": 160}, {"sum_logits": -91.78787994384766, "num_tokens": 40, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -111.48237609863281, "logits_per_token": -2.2946969985961916, "logits_per_char": -0.46124562785853096, "num_chars": 199}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 979, "native_id": 1806, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 100.26756286621094, "incorrect_loss_raw": 87.48901621500652, "correct_loss_per_char": 0.6004045680611433, "incorrect_loss_per_char": 0.6645551434176976, "correct_loss_per_token": 2.5066890716552734, "incorrect_loss_per_token": 2.557735477435498, "correct_loss_uncond": -19.848304748535156, "incorrect_loss_uncond": -16.164708455403645}, "model_output": [{"sum_logits": -99.65238952636719, "num_tokens": 39, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -115.77035522460938, "logits_per_token": -2.555189475035056, "logits_per_char": -0.6733269562592378, "num_chars": 148}, {"sum_logits": -100.26756286621094, "num_tokens": 40, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -120.1158676147461, "logits_per_token": -2.5066890716552734, "logits_per_char": -0.6004045680611433, "num_chars": 167}, {"sum_logits": -68.61830139160156, "num_tokens": 26, "num_tokens_all": 431, "is_greedy": false, "sum_logits_uncond": -86.69535064697266, "logits_per_token": -2.6391654381385217, "logits_per_char": -0.6661971008893356, "num_chars": 103}, {"sum_logits": -94.19635772705078, "num_tokens": 38, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -108.49546813964844, "logits_per_token": -2.4788515191329155, "logits_per_char": -0.6541413731045194, "num_chars": 144}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 980, "native_id": 1400, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 128.05108642578125, "incorrect_loss_raw": 131.4884516398112, "correct_loss_per_char": 0.6434727961094535, "incorrect_loss_per_char": 0.7946560312405534, "correct_loss_per_token": 2.613287478077168, "incorrect_loss_per_token": 3.344987151277587, "correct_loss_uncond": -24.439178466796875, "incorrect_loss_uncond": -16.236801147460938}, "model_output": [{"sum_logits": -123.0073471069336, "num_tokens": 45, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -141.13421630859375, "logits_per_token": -2.733496602376302, "logits_per_char": -0.7736311138800855, "num_chars": 159}, {"sum_logits": -128.05108642578125, "num_tokens": 49, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -152.49026489257812, "logits_per_token": -2.613287478077168, "logits_per_char": -0.6434727961094535, "num_chars": 199}, {"sum_logits": -192.35365295410156, "num_tokens": 44, "num_tokens_all": 464, "is_greedy": false, "sum_logits_uncond": -205.45663452148438, "logits_per_token": -4.371673930775035, "logits_per_char": -1.001841942469279, "num_chars": 192}, {"sum_logits": -79.10435485839844, "num_tokens": 27, "num_tokens_all": 447, "is_greedy": false, "sum_logits_uncond": -96.58490753173828, "logits_per_token": -2.9297909206814237, "logits_per_char": -0.6084950373722957, "num_chars": 130}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 981, "native_id": 273, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 84.85969543457031, "incorrect_loss_raw": 78.83100255330403, "correct_loss_per_char": 0.40995021949067784, "incorrect_loss_per_char": 0.5157707379767589, "correct_loss_per_token": 1.8447759877080503, "incorrect_loss_per_token": 2.4546455949628387, "correct_loss_uncond": -41.25163269042969, "incorrect_loss_uncond": -25.047809600830078}, "model_output": [{"sum_logits": -64.77726745605469, "num_tokens": 28, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -87.29335021972656, "logits_per_token": -2.3134738377162387, "logits_per_char": -0.5398105621337891, "num_chars": 120}, {"sum_logits": -61.96950912475586, "num_tokens": 34, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -87.08734893798828, "logits_per_token": -1.8226326213163488, "logits_per_char": -0.34237297858981136, "num_chars": 181}, {"sum_logits": -109.74623107910156, "num_tokens": 34, "num_tokens_all": 457, "is_greedy": false, "sum_logits_uncond": -137.2557373046875, "logits_per_token": -3.2278303258559284, "logits_per_char": -0.6651286732066761, "num_chars": 165}, {"sum_logits": -84.85969543457031, "num_tokens": 46, "num_tokens_all": 469, "is_greedy": false, "sum_logits_uncond": -126.111328125, "logits_per_token": -1.8447759877080503, "logits_per_char": -0.40995021949067784, "num_chars": 207}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 982, "native_id": 9316, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 140.0171661376953, "incorrect_loss_raw": 163.45416768391928, "correct_loss_per_char": 0.48448846414427443, "incorrect_loss_per_char": 0.7551502274462359, "correct_loss_per_token": 2.2583413893176663, "incorrect_loss_per_token": 3.7325813029628763, "correct_loss_uncond": -14.111419677734375, "incorrect_loss_uncond": -4.2304331461588545}, "model_output": [{"sum_logits": -172.74917602539062, "num_tokens": 49, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -178.98114013671875, "logits_per_token": -3.525493388273278, "logits_per_char": -0.7712016786847796, "num_chars": 224}, {"sum_logits": -84.68894958496094, "num_tokens": 21, "num_tokens_all": 447, "is_greedy": false, "sum_logits_uncond": -97.73033142089844, "logits_per_token": -4.032807123093378, "logits_per_char": -0.7989523545751032, "num_chars": 106}, {"sum_logits": -232.92437744140625, "num_tokens": 64, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -226.3423309326172, "logits_per_token": -3.6394433975219727, "logits_per_char": -0.6952966490788246, "num_chars": 335}, {"sum_logits": -140.0171661376953, "num_tokens": 62, "num_tokens_all": 488, "is_greedy": false, "sum_logits_uncond": -154.1285858154297, "logits_per_token": -2.2583413893176663, "logits_per_char": -0.48448846414427443, "num_chars": 289}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 983, "native_id": 2646, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 55.72770309448242, "incorrect_loss_raw": 29.47027015686035, "correct_loss_per_char": 0.6057359032008959, "incorrect_loss_per_char": 1.0567248282814519, "correct_loss_per_token": 3.0959835052490234, "incorrect_loss_per_token": 4.467815215247017, "correct_loss_uncond": -20.26473617553711, "incorrect_loss_uncond": -7.452866236368815}, "model_output": [{"sum_logits": -37.1451530456543, "num_tokens": 7, "num_tokens_all": 392, "is_greedy": false, "sum_logits_uncond": -42.6705322265625, "logits_per_token": -5.306450435093471, "logits_per_char": -1.3757464090983074, "num_chars": 27}, {"sum_logits": -55.72770309448242, "num_tokens": 18, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -75.99243927001953, "logits_per_token": -3.0959835052490234, "logits_per_char": -0.6057359032008959, "num_chars": 92}, {"sum_logits": -28.748483657836914, "num_tokens": 8, "num_tokens_all": 393, "is_greedy": false, "sum_logits_uncond": -43.63599395751953, "logits_per_token": -3.5935604572296143, "logits_per_char": -0.6685693873915561, "num_chars": 43}, {"sum_logits": -22.517173767089844, "num_tokens": 5, "num_tokens_all": 390, "is_greedy": false, "sum_logits_uncond": -24.46288299560547, "logits_per_token": -4.503434753417968, "logits_per_char": -1.125858688354492, "num_chars": 20}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 984, "native_id": 25764, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 126.66412353515625, "incorrect_loss_raw": 61.673292795817055, "correct_loss_per_char": 0.594667246643926, "incorrect_loss_per_char": 0.46026922014689947, "correct_loss_per_token": 2.4836102653952206, "incorrect_loss_per_token": 2.0418082039672414, "correct_loss_uncond": -14.2529296875, "incorrect_loss_uncond": -17.332647959391277}, "model_output": [{"sum_logits": -46.86298751831055, "num_tokens": 31, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -64.66226959228516, "logits_per_token": -1.5117092747842111, "logits_per_char": -0.3277131994287451, "num_chars": 143}, {"sum_logits": -126.66412353515625, "num_tokens": 51, "num_tokens_all": 478, "is_greedy": false, "sum_logits_uncond": -140.91705322265625, "logits_per_token": -2.4836102653952206, "logits_per_char": -0.594667246643926, "num_chars": 213}, {"sum_logits": -74.66402435302734, "num_tokens": 36, "num_tokens_all": 463, "is_greedy": false, "sum_logits_uncond": -94.39569854736328, "logits_per_token": -2.074000676472982, "logits_per_char": -0.49121068653307465, "num_chars": 152}, {"sum_logits": -63.49286651611328, "num_tokens": 25, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -77.95985412597656, "logits_per_token": -2.539714660644531, "logits_per_char": -0.5618837744788786, "num_chars": 113}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 985, "native_id": 40325, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 182.38003540039062, "incorrect_loss_raw": 73.3409423828125, "correct_loss_per_char": 0.7827469330488868, "incorrect_loss_per_char": 0.6323847933324771, "correct_loss_per_token": 2.989836645908043, "incorrect_loss_per_token": 2.5314526139644156, "correct_loss_uncond": -22.015243530273438, "incorrect_loss_uncond": -16.76391092936198}, "model_output": [{"sum_logits": -121.40489196777344, "num_tokens": 38, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -137.95101928710938, "logits_per_token": -3.194865578099301, "logits_per_char": -0.7987163945248252, "num_chars": 152}, {"sum_logits": -182.38003540039062, "num_tokens": 61, "num_tokens_all": 476, "is_greedy": false, "sum_logits_uncond": -204.39527893066406, "logits_per_token": -2.989836645908043, "logits_per_char": -0.7827469330488868, "num_chars": 233}, {"sum_logits": -49.82878112792969, "num_tokens": 24, "num_tokens_all": 439, "is_greedy": false, "sum_logits_uncond": -67.66946411132812, "logits_per_token": -2.076199213663737, "logits_per_char": -0.5794044317201127, "num_chars": 86}, {"sum_logits": -48.789154052734375, "num_tokens": 21, "num_tokens_all": 436, "is_greedy": false, "sum_logits_uncond": -64.69407653808594, "logits_per_token": -2.3232930501302085, "logits_per_char": -0.5190335537524934, "num_chars": 94}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 986, "native_id": 7028, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 22.04958152770996, "incorrect_loss_raw": 27.542226155598957, "correct_loss_per_char": 0.36749302546183266, "incorrect_loss_per_char": 0.5917738333737623, "correct_loss_per_token": 1.3780988454818726, "incorrect_loss_per_token": 2.443240192683056, "correct_loss_uncond": -30.35703468322754, "incorrect_loss_uncond": -27.41833750406901}, "model_output": [{"sum_logits": -18.817813873291016, "num_tokens": 11, "num_tokens_all": 404, "is_greedy": false, "sum_logits_uncond": -49.22969055175781, "logits_per_token": -1.710710352117365, "logits_per_char": -0.43762357844862826, "num_chars": 43}, {"sum_logits": -22.04958152770996, "num_tokens": 16, "num_tokens_all": 409, "is_greedy": false, "sum_logits_uncond": -52.4066162109375, "logits_per_token": -1.3780988454818726, "logits_per_char": -0.36749302546183266, "num_chars": 60}, {"sum_logits": -45.71257400512695, "num_tokens": 12, "num_tokens_all": 405, "is_greedy": false, "sum_logits_uncond": -72.17411804199219, "logits_per_token": -3.8093811670939126, "logits_per_char": -0.8963249804926854, "num_chars": 51}, {"sum_logits": -18.096290588378906, "num_tokens": 10, "num_tokens_all": 403, "is_greedy": false, "sum_logits_uncond": -43.477882385253906, "logits_per_token": -1.8096290588378907, "logits_per_char": -0.44137294117997333, "num_chars": 41}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 987, "native_id": 32808, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 66.03411865234375, "incorrect_loss_raw": 112.8713862101237, "correct_loss_per_char": 0.5643941765157585, "incorrect_loss_per_char": 0.8468413172572946, "correct_loss_per_token": 2.5397737943209133, "incorrect_loss_per_token": 3.8548654226609216, "correct_loss_uncond": -20.30193328857422, "incorrect_loss_uncond": -18.322611490885418}, "model_output": [{"sum_logits": -74.3137435913086, "num_tokens": 24, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -90.78787231445312, "logits_per_token": -3.0964059829711914, "logits_per_char": -0.6192811965942383, "num_chars": 120}, {"sum_logits": -66.03411865234375, "num_tokens": 26, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -86.33605194091797, "logits_per_token": -2.5397737943209133, "logits_per_char": -0.5643941765157585, "num_chars": 117}, {"sum_logits": -156.00933837890625, "num_tokens": 35, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -179.84146118164062, "logits_per_token": -4.45740966796875, "logits_per_char": -1.0685571121842894, "num_chars": 146}, {"sum_logits": -108.29107666015625, "num_tokens": 27, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -122.9526596069336, "logits_per_token": -4.010780617042824, "logits_per_char": -0.8526856429933563, "num_chars": 127}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 988, "native_id": 1818, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 42.17558288574219, "incorrect_loss_raw": 92.04373041788737, "correct_loss_per_char": 0.4847768147786458, "incorrect_loss_per_char": 0.6749670413107003, "correct_loss_per_token": 2.2197675203022205, "incorrect_loss_per_token": 3.1644006617728118, "correct_loss_uncond": -36.157127380371094, "incorrect_loss_uncond": -28.66697057088216}, "model_output": [{"sum_logits": -122.39137268066406, "num_tokens": 36, "num_tokens_all": 437, "is_greedy": false, "sum_logits_uncond": -150.8978271484375, "logits_per_token": -3.3997603522406683, "logits_per_char": -0.6651705036992612, "num_chars": 184}, {"sum_logits": -56.27596664428711, "num_tokens": 24, "num_tokens_all": 425, "is_greedy": false, "sum_logits_uncond": -89.80966186523438, "logits_per_token": -2.344831943511963, "logits_per_char": -0.5862079858779907, "num_chars": 96}, {"sum_logits": -97.46385192871094, "num_tokens": 26, "num_tokens_all": 427, "is_greedy": false, "sum_logits_uncond": -121.42461395263672, "logits_per_token": -3.7486096895658054, "logits_per_char": -0.7735226343548487, "num_chars": 126}, {"sum_logits": -42.17558288574219, "num_tokens": 19, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -78.33271026611328, "logits_per_token": -2.2197675203022205, "logits_per_char": -0.4847768147786458, "num_chars": 87}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 989, "native_id": 28589, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 99.88478088378906, "incorrect_loss_raw": 94.9987080891927, "correct_loss_per_char": 0.5122296455578926, "incorrect_loss_per_char": 0.6075050081434082, "correct_loss_per_token": 2.62854686536287, "incorrect_loss_per_token": 2.8862679636376534, "correct_loss_uncond": -18.972763061523438, "incorrect_loss_uncond": -13.835088094075521}, "model_output": [{"sum_logits": -70.13056945800781, "num_tokens": 30, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -89.11274719238281, "logits_per_token": -2.3376856486002606, "logits_per_char": -0.43831605911254884, "num_chars": 160}, {"sum_logits": -105.52685546875, "num_tokens": 30, "num_tokens_all": 466, "is_greedy": false, "sum_logits_uncond": -108.9261474609375, "logits_per_token": -3.517561848958333, "logits_per_char": -0.8117450420673077, "num_chars": 130}, {"sum_logits": -99.88478088378906, "num_tokens": 38, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -118.8575439453125, "logits_per_token": -2.62854686536287, "logits_per_char": -0.5122296455578926, "num_chars": 195}, {"sum_logits": -109.33869934082031, "num_tokens": 39, "num_tokens_all": 475, "is_greedy": false, "sum_logits_uncond": -128.46249389648438, "logits_per_token": -2.803556393354367, "logits_per_char": -0.5724539232503681, "num_chars": 191}], "label": 2, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 990, "native_id": 3131, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 12.007740020751953, "incorrect_loss_raw": 32.109673182169594, "correct_loss_per_char": 0.4002580006917318, "incorrect_loss_per_char": 0.7247295642335772, "correct_loss_per_token": 1.7153914315359933, "incorrect_loss_per_token": 3.226241464325876, "correct_loss_uncond": -20.59601593017578, "incorrect_loss_uncond": -20.75459925333659}, "model_output": [{"sum_logits": -34.65558624267578, "num_tokens": 11, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -59.47892761230469, "logits_per_token": -3.150507840243253, "logits_per_char": -0.6931117248535156, "num_chars": 50}, {"sum_logits": -45.161434173583984, "num_tokens": 14, "num_tokens_all": 419, "is_greedy": false, "sum_logits_uncond": -70.3480224609375, "logits_per_token": -3.2258167266845703, "logits_per_char": -0.6947912949782151, "num_chars": 65}, {"sum_logits": -16.511999130249023, "num_tokens": 5, "num_tokens_all": 410, "is_greedy": false, "sum_logits_uncond": -28.765867233276367, "logits_per_token": -3.302399826049805, "logits_per_char": -0.7862856728690011, "num_chars": 21}, {"sum_logits": -12.007740020751953, "num_tokens": 7, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -32.603755950927734, "logits_per_token": -1.7153914315359933, "logits_per_char": -0.4002580006917318, "num_chars": 30}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 991, "native_id": 46897, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 67.77804565429688, "incorrect_loss_raw": 109.14047241210938, "correct_loss_per_char": 0.31234122421334964, "incorrect_loss_per_char": 0.48906580652884485, "correct_loss_per_token": 1.7378986065204327, "incorrect_loss_per_token": 2.403653242649176, "correct_loss_uncond": -20.257827758789062, "incorrect_loss_uncond": -33.31090291341146}, "model_output": [{"sum_logits": -101.87223815917969, "num_tokens": 48, "num_tokens_all": 470, "is_greedy": false, "sum_logits_uncond": -130.68923950195312, "logits_per_token": -2.12233829498291, "logits_per_char": -0.4921364162279212, "num_chars": 207}, {"sum_logits": -108.37177276611328, "num_tokens": 52, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -138.30145263671875, "logits_per_token": -2.084072553194486, "logits_per_char": -0.38025183426706416, "num_chars": 285}, {"sum_logits": -117.17740631103516, "num_tokens": 39, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -158.36343383789062, "logits_per_token": -3.004548879770132, "logits_per_char": -0.594809169091549, "num_chars": 197}, {"sum_logits": -67.77804565429688, "num_tokens": 39, "num_tokens_all": 461, "is_greedy": false, "sum_logits_uncond": -88.03587341308594, "logits_per_token": -1.7378986065204327, "logits_per_char": -0.31234122421334964, "num_chars": 217}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 992, "native_id": 12295, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 14.23524284362793, "incorrect_loss_raw": 31.411584218343098, "correct_loss_per_char": 0.5084015301295689, "incorrect_loss_per_char": 0.8831395848041662, "correct_loss_per_token": 2.0336061205182756, "incorrect_loss_per_token": 4.004084238930354, "correct_loss_uncond": -21.4310245513916, "incorrect_loss_uncond": -9.214935302734375}, "model_output": [{"sum_logits": -35.75697326660156, "num_tokens": 12, "num_tokens_all": 421, "is_greedy": false, "sum_logits_uncond": -49.07135009765625, "logits_per_token": -2.979747772216797, "logits_per_char": -0.7151394653320312, "num_chars": 50}, {"sum_logits": -14.23524284362793, "num_tokens": 7, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -35.66626739501953, "logits_per_token": -2.0336061205182756, "logits_per_char": -0.5084015301295689, "num_chars": 28}, {"sum_logits": -28.498531341552734, "num_tokens": 6, "num_tokens_all": 415, "is_greedy": false, "sum_logits_uncond": -35.253238677978516, "logits_per_token": -4.749755223592122, "logits_per_char": -0.8635918588349314, "num_chars": 33}, {"sum_logits": -29.979248046875, "num_tokens": 7, "num_tokens_all": 416, "is_greedy": false, "sum_logits_uncond": -37.554969787597656, "logits_per_token": -4.282749720982143, "logits_per_char": -1.0706874302455358, "num_chars": 28}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 993, "native_id": 48498, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 87.92649841308594, "incorrect_loss_raw": 121.08386739095052, "correct_loss_per_char": 0.40896045773528344, "incorrect_loss_per_char": 0.6267157186182994, "correct_loss_per_token": 2.1981624603271483, "incorrect_loss_per_token": 2.972567086014132, "correct_loss_uncond": -25.705673217773438, "incorrect_loss_uncond": -13.772994995117188}, "model_output": [{"sum_logits": -159.89402770996094, "num_tokens": 54, "num_tokens_all": 485, "is_greedy": false, "sum_logits_uncond": -167.3109893798828, "logits_per_token": -2.961000513147425, "logits_per_char": -0.592200102629485, "num_chars": 270}, {"sum_logits": -115.31575012207031, "num_tokens": 37, "num_tokens_all": 468, "is_greedy": false, "sum_logits_uncond": -137.4557647705078, "logits_per_token": -3.1166418951910897, "logits_per_char": -0.7162468951681386, "num_chars": 161}, {"sum_logits": -88.04182434082031, "num_tokens": 31, "num_tokens_all": 462, "is_greedy": false, "sum_logits_uncond": -99.8038330078125, "logits_per_token": -2.840058849703881, "logits_per_char": -0.5717001580572748, "num_chars": 154}, {"sum_logits": -87.92649841308594, "num_tokens": 40, "num_tokens_all": 471, "is_greedy": false, "sum_logits_uncond": -113.63217163085938, "logits_per_token": -2.1981624603271483, "logits_per_char": -0.40896045773528344, "num_chars": 215}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 994, "native_id": 49361, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 42.36063003540039, "incorrect_loss_raw": 56.52142842610677, "correct_loss_per_char": 0.5724409464243296, "incorrect_loss_per_char": 0.4774601654251451, "correct_loss_per_token": 2.1180315017700195, "incorrect_loss_per_token": 2.1738085766031285, "correct_loss_uncond": -18.08092498779297, "incorrect_loss_uncond": -20.77947998046875}, "model_output": [{"sum_logits": -32.508235931396484, "num_tokens": 22, "num_tokens_all": 444, "is_greedy": false, "sum_logits_uncond": -65.59426879882812, "logits_per_token": -1.4776470877907493, "logits_per_char": -0.29824069661831637, "num_chars": 109}, {"sum_logits": -42.36063003540039, "num_tokens": 20, "num_tokens_all": 442, "is_greedy": false, "sum_logits_uncond": -60.44155502319336, "logits_per_token": -2.1180315017700195, "logits_per_char": -0.5724409464243296, "num_chars": 74}, {"sum_logits": -57.029239654541016, "num_tokens": 24, "num_tokens_all": 446, "is_greedy": false, "sum_logits_uncond": -72.48384094238281, "logits_per_token": -2.376218318939209, "logits_per_char": -0.5137769338246938, "num_chars": 111}, {"sum_logits": -80.02680969238281, "num_tokens": 30, "num_tokens_all": 452, "is_greedy": false, "sum_logits_uncond": -93.82461547851562, "logits_per_token": -2.667560323079427, "logits_per_char": -0.620362865832425, "num_chars": 129}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 995, "native_id": 25321, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 83.54093933105469, "incorrect_loss_raw": 95.63929748535156, "correct_loss_per_char": 0.5093959715308213, "incorrect_loss_per_char": 0.6239921875471194, "correct_loss_per_token": 2.1420753674629407, "incorrect_loss_per_token": 2.701904123185447, "correct_loss_uncond": -11.458847045898438, "incorrect_loss_uncond": -15.895929972330729}, "model_output": [{"sum_logits": -88.74957275390625, "num_tokens": 34, "num_tokens_all": 445, "is_greedy": false, "sum_logits_uncond": -105.39776611328125, "logits_per_token": -2.610281551585478, "logits_per_char": -0.6384861349201888, "num_chars": 139}, {"sum_logits": -73.84449005126953, "num_tokens": 32, "num_tokens_all": 443, "is_greedy": false, "sum_logits_uncond": -95.17648315429688, "logits_per_token": -2.307640314102173, "logits_per_char": -0.4890363579554274, "num_chars": 151}, {"sum_logits": -124.3238296508789, "num_tokens": 39, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -134.03143310546875, "logits_per_token": -3.18779050386869, "logits_per_char": -0.7444540697657419, "num_chars": 167}, {"sum_logits": -83.54093933105469, "num_tokens": 39, "num_tokens_all": 450, "is_greedy": false, "sum_logits_uncond": -94.99978637695312, "logits_per_token": -2.1420753674629407, "logits_per_char": -0.5093959715308213, "num_chars": 164}], "label": 3, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 996, "native_id": 27234, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 88.89274597167969, "incorrect_loss_raw": 136.00791931152344, "correct_loss_per_char": 0.5772256231927252, "incorrect_loss_per_char": 0.6676618015176251, "correct_loss_per_token": 2.8675079345703125, "incorrect_loss_per_token": 3.016023053851434, "correct_loss_uncond": -17.371498107910156, "incorrect_loss_uncond": -8.464121500651041}, "model_output": [{"sum_logits": -98.40853118896484, "num_tokens": 34, "num_tokens_all": 451, "is_greedy": false, "sum_logits_uncond": -105.23101806640625, "logits_per_token": -2.894368564381319, "logits_per_char": -0.6112331129749369, "num_chars": 161}, {"sum_logits": -88.89274597167969, "num_tokens": 31, "num_tokens_all": 448, "is_greedy": false, "sum_logits_uncond": -106.26424407958984, "logits_per_token": -2.8675079345703125, "logits_per_char": -0.5772256231927252, "num_chars": 154}, {"sum_logits": -105.43587493896484, "num_tokens": 41, "num_tokens_all": 458, "is_greedy": false, "sum_logits_uncond": -117.79183959960938, "logits_per_token": -2.571606705828411, "logits_per_char": -0.6094559245026869, "num_chars": 173}, {"sum_logits": -204.17935180664062, "num_tokens": 57, "num_tokens_all": 474, "is_greedy": false, "sum_logits_uncond": -210.3932647705078, "logits_per_token": -3.5820938913445723, "logits_per_char": -0.7822963670752514, "num_chars": 261}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 997, "native_id": 41984, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 19.70844268798828, "incorrect_loss_raw": 28.2456693649292, "correct_loss_per_char": 0.5326606131888725, "incorrect_loss_per_char": 0.48058287302652997, "correct_loss_per_token": 2.463555335998535, "incorrect_loss_per_token": 2.2167846820972583, "correct_loss_uncond": -17.560592651367188, "incorrect_loss_uncond": -26.617375373840332}, "model_output": [{"sum_logits": -19.82086753845215, "num_tokens": 9, "num_tokens_all": 413, "is_greedy": false, "sum_logits_uncond": -44.08959197998047, "logits_per_token": -2.202318615383572, "logits_per_char": -0.45047426223754883, "num_chars": 44}, {"sum_logits": -19.70844268798828, "num_tokens": 8, "num_tokens_all": 412, "is_greedy": false, "sum_logits_uncond": -37.26903533935547, "logits_per_token": -2.463555335998535, "logits_per_char": -0.5326606131888725, "num_chars": 37}, {"sum_logits": -10.420710563659668, "num_tokens": 10, "num_tokens_all": 414, "is_greedy": false, "sum_logits_uncond": -48.54814147949219, "logits_per_token": -1.0420710563659668, "logits_per_char": -0.2742292253594649, "num_chars": 38}, {"sum_logits": -54.49542999267578, "num_tokens": 16, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -71.95140075683594, "logits_per_token": -3.4059643745422363, "logits_per_char": -0.7170451314825761, "num_chars": 76}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 998, "native_id": 21416, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 159.11239624023438, "incorrect_loss_raw": 119.37386067708333, "correct_loss_per_char": 0.7838049075873614, "incorrect_loss_per_char": 0.9153580241051665, "correct_loss_per_token": 3.247191760004783, "incorrect_loss_per_token": 4.168105513319023, "correct_loss_uncond": -26.029296875, "incorrect_loss_uncond": -23.474263509114582}, "model_output": [{"sum_logits": -78.24696350097656, "num_tokens": 16, "num_tokens_all": 420, "is_greedy": false, "sum_logits_uncond": -91.1119384765625, "logits_per_token": -4.890435218811035, "logits_per_char": -0.9904678924174248, "num_chars": 79}, {"sum_logits": -159.11239624023438, "num_tokens": 49, "num_tokens_all": 453, "is_greedy": false, "sum_logits_uncond": -185.14169311523438, "logits_per_token": -3.247191760004783, "logits_per_char": -0.7838049075873614, "num_chars": 203}, {"sum_logits": -212.30706787109375, "num_tokens": 55, "num_tokens_all": 459, "is_greedy": false, "sum_logits_uncond": -246.31446838378906, "logits_per_token": -3.860128506747159, "logits_per_char": -0.8665594606983419, "num_chars": 245}, {"sum_logits": -67.56755065917969, "num_tokens": 18, "num_tokens_all": 422, "is_greedy": false, "sum_logits_uncond": -91.11796569824219, "logits_per_token": -3.7537528143988714, "logits_per_char": -0.8890467191997328, "num_chars": 76}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 999, "native_id": 30291, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 54.1335334777832, "incorrect_loss_raw": 118.86724853515625, "correct_loss_per_char": 0.44371748752281315, "incorrect_loss_per_char": 0.6098093386049624, "correct_loss_per_token": 1.9333404813494002, "incorrect_loss_per_token": 2.7904978262874742, "correct_loss_uncond": -34.4997673034668, "incorrect_loss_uncond": -23.80511728922526}, "model_output": [{"sum_logits": -85.455810546875, "num_tokens": 35, "num_tokens_all": 472, "is_greedy": false, "sum_logits_uncond": -110.35094451904297, "logits_per_token": -2.4415945870535714, "logits_per_char": -0.5340988159179687, "num_chars": 160}, {"sum_logits": -54.1335334777832, "num_tokens": 28, "num_tokens_all": 465, "is_greedy": false, "sum_logits_uncond": -88.63330078125, "logits_per_token": -1.9333404813494002, "logits_per_char": -0.44371748752281315, "num_chars": 122}, {"sum_logits": -91.35255432128906, "num_tokens": 36, "num_tokens_all": 473, "is_greedy": false, "sum_logits_uncond": -108.53208923339844, "logits_per_token": -2.5375709533691406, "logits_per_char": -0.5818634033203125, "num_chars": 157}, {"sum_logits": -179.7933807373047, "num_tokens": 53, "num_tokens_all": 490, "is_greedy": false, "sum_logits_uncond": -209.13406372070312, "logits_per_token": -3.392327938439711, "logits_per_char": -0.7134657965766059, "num_chars": 252}], "label": 1, "task_hash": "8312d0c6fac4c6da5cc98a431402ea60", "model_hash": "03418cf8091a9882619950ffb07429a5"}