diff --git "a/evals/core_9mcqa/task-007-csqa-predictions.jsonl" "b/evals/core_9mcqa/task-007-csqa-predictions.jsonl" new file mode 100644--- /dev/null +++ "b/evals/core_9mcqa/task-007-csqa-predictions.jsonl" @@ -0,0 +1,1221 @@ +{"doc_id": 0, "native_id": "1afa02df02c908a558b4036e80242fac", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 1.7067879438400269, "incorrect_loss_raw": 7.031746983528137, "correct_loss_per_char": 0.34135758876800537, "incorrect_loss_per_char": 0.8400718456974217, "correct_loss_per_token": 1.7067879438400269, "incorrect_loss_per_token": 4.38541853427887, "correct_loss_uncond": -9.934908032417297, "incorrect_loss_uncond": -7.927129149436951}, "model_output": [{"sum_logits": -1.7067879438400269, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": true, "sum_logits_uncond": -11.641695976257324, "logits_per_token": -1.7067879438400269, "logits_per_char": -0.34135758876800537, "num_chars": 5}, {"sum_logits": -5.882589817047119, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.023554801940918, "logits_per_token": -5.882589817047119, "logits_per_char": -0.7353237271308899, "num_chars": 8}, {"sum_logits": -5.536731719970703, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.58877944946289, "logits_per_token": -2.7683658599853516, "logits_per_char": -0.3256901011747472, "num_chars": 17}, {"sum_logits": -4.982244491577148, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.388150215148926, "logits_per_token": -4.982244491577148, "logits_per_char": -0.9964488983154297, "num_chars": 5}, {"sum_logits": -11.725421905517578, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.835020065307617, "logits_per_token": -3.9084739685058594, "logits_per_char": -1.3028246561686199, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1, "native_id": "a7ab086045575bb497933726e4e6ad28", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.554998397827148, "incorrect_loss_raw": 14.93218207359314, "correct_loss_per_char": 0.5811537229097806, "incorrect_loss_per_char": 1.0331190730264443, "correct_loss_per_token": 3.777499198913574, "incorrect_loss_per_token": 5.618274390697479, "correct_loss_uncond": -12.634359359741211, "incorrect_loss_uncond": -5.244855165481567}, "model_output": [{"sum_logits": -7.554998397827148, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.18935775756836, "logits_per_token": -3.777499198913574, "logits_per_char": -0.5811537229097806, "num_chars": 13}, {"sum_logits": -14.34862995147705, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.90337371826172, "logits_per_token": -3.5871574878692627, "logits_per_char": -0.652210452339866, "num_chars": 22}, {"sum_logits": -14.601835250854492, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.989015579223633, "logits_per_token": -7.300917625427246, "logits_per_char": -1.1232180962195764, "num_chars": 13}, {"sum_logits": -15.561826705932617, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.85982894897461, "logits_per_token": -7.780913352966309, "logits_per_char": -1.5561826705932618, "num_chars": 10}, {"sum_logits": -15.216436386108398, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.955930709838867, "logits_per_token": -3.8041090965270996, "logits_per_char": -0.8008650729530736, "num_chars": 19}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 2, "native_id": "b8c0a4703079cf661d7261a60a1bcbff", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.7779548168182373, "incorrect_loss_raw": 10.771382570266724, "correct_loss_per_char": 0.17779548168182374, "incorrect_loss_per_char": 1.2226886768189689, "correct_loss_per_token": 0.8889774084091187, "incorrect_loss_per_token": 7.537142753601074, "correct_loss_uncond": -13.99925446510315, "incorrect_loss_uncond": -3.5869710445404053}, "model_output": [{"sum_logits": -10.2712984085083, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.913461685180664, "logits_per_token": -10.2712984085083, "logits_per_char": -1.4673283440726144, "num_chars": 7}, {"sum_logits": -1.7779548168182373, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -15.777209281921387, "logits_per_token": -0.8889774084091187, "logits_per_char": -0.17779548168182374, "num_chars": 10}, {"sum_logits": -6.940313339233398, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.348342895507812, "logits_per_token": -6.940313339233398, "logits_per_char": -0.9914733341761998, "num_chars": 7}, {"sum_logits": -11.161755561828613, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.714458465576172, "logits_per_token": -5.580877780914307, "logits_per_char": -0.7972682544163295, "num_chars": 14}, {"sum_logits": -14.712162971496582, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.457151412963867, "logits_per_token": -7.356081485748291, "logits_per_char": -1.6346847746107314, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 3, "native_id": "e68fb2448fd74e402aae9982aa76e527", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.95412015914917, "incorrect_loss_raw": 18.000744104385376, "correct_loss_per_char": 0.14067238853091285, "incorrect_loss_per_char": 1.7722482413956615, "correct_loss_per_token": 0.98470671971639, "incorrect_loss_per_token": 8.936286687850952, "correct_loss_uncond": -17.125568866729736, "incorrect_loss_uncond": -3.1748077869415283}, "model_output": [{"sum_logits": -2.95412015914917, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.079689025878906, "logits_per_token": -0.98470671971639, "logits_per_char": -0.14067238853091285, "num_chars": 21}, {"sum_logits": -9.012195587158203, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.903332710266113, "logits_per_token": -9.012195587158203, "logits_per_char": -1.5020325978597004, "num_chars": 6}, {"sum_logits": -25.168907165527344, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -28.798595428466797, "logits_per_token": -6.292226791381836, "logits_per_char": -1.2584453582763673, "num_chars": 20}, {"sum_logits": -11.750149726867676, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.5486421585083, "logits_per_token": -11.750149726867676, "logits_per_char": -1.9583582878112793, "num_chars": 6}, {"sum_logits": -26.07172393798828, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -28.451637268066406, "logits_per_token": -8.690574645996094, "logits_per_char": -2.3701567216352983, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 4, "native_id": "2435de612dd69f2012b9e40d6af4ce38", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.991920471191406, "incorrect_loss_raw": 7.7431275844573975, "correct_loss_per_char": 1.1239900588989258, "incorrect_loss_per_char": 0.8207349304168943, "correct_loss_per_token": 4.495960235595703, "incorrect_loss_per_token": 5.377462863922119, "correct_loss_uncond": -9.195199966430664, "incorrect_loss_uncond": -8.914209365844727}, "model_output": [{"sum_logits": -8.991920471191406, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.18712043762207, "logits_per_token": -4.495960235595703, "logits_per_char": -1.1239900588989258, "num_chars": 8}, {"sum_logits": -4.269416809082031, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -4.269416809082031, "logits_per_char": -0.35578473409016925, "num_chars": 12}, {"sum_logits": -7.777775764465332, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.386797904968262, "logits_per_token": -7.777775764465332, "logits_per_char": -1.1111108234950475, "num_chars": 7}, {"sum_logits": -7.226576805114746, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.29250717163086, "logits_per_token": -3.613288402557373, "logits_per_char": -0.5161840575081962, "num_chars": 14}, {"sum_logits": -11.69874095916748, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.79250717163086, "logits_per_token": -5.84937047958374, "logits_per_char": -1.2998601065741644, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 5, "native_id": "a4892551cb4beb279653ae52d0de4c89", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.538084030151367, "incorrect_loss_raw": 13.759393453598022, "correct_loss_per_char": 0.7527202878679548, "incorrect_loss_per_char": 1.5782085471683078, "correct_loss_per_token": 2.634521007537842, "incorrect_loss_per_token": 8.101992666721344, "correct_loss_uncond": -9.502996444702148, "incorrect_loss_uncond": -3.2882988452911377}, "model_output": [{"sum_logits": -17.347625732421875, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.204669952392578, "logits_per_token": -8.673812866210938, "logits_per_char": -1.9275139702690973, "num_chars": 9}, {"sum_logits": -10.58944034576416, "num_tokens": 4, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.18499183654785, "logits_per_token": -2.64736008644104, "logits_per_char": -0.7059626897176107, "num_chars": 15}, {"sum_logits": -10.538084030151367, "num_tokens": 4, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.041080474853516, "logits_per_token": -2.634521007537842, "logits_per_char": -0.7527202878679548, "num_chars": 14}, {"sum_logits": -12.027420043945312, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.80486488342285, "logits_per_token": -6.013710021972656, "logits_per_char": -2.0045700073242188, "num_chars": 6}, {"sum_logits": -15.073087692260742, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.99624252319336, "logits_per_token": -15.073087692260742, "logits_per_char": -1.6747875213623047, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 6, "native_id": "118a9093a30695622363455e4d911866", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.312840938568115, "incorrect_loss_raw": 15.70208740234375, "correct_loss_per_char": 1.044691562652588, "incorrect_loss_per_char": 1.5926167838501208, "correct_loss_per_token": 3.6564204692840576, "incorrect_loss_per_token": 7.045174932479858, "correct_loss_uncond": -9.566626071929932, "incorrect_loss_uncond": -4.428542375564575}, "model_output": [{"sum_logits": -25.22488784790039, "num_tokens": 5, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -32.296730041503906, "logits_per_token": -5.0449775695800785, "logits_per_char": -1.1465858112681995, "num_chars": 22}, {"sum_logits": -7.312840938568115, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.879467010498047, "logits_per_token": -3.6564204692840576, "logits_per_char": -1.044691562652588, "num_chars": 7}, {"sum_logits": -12.622766494750977, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.265960693359375, "logits_per_token": -6.311383247375488, "logits_per_char": -2.1037944157918296, "num_chars": 6}, {"sum_logits": -12.756160736083984, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -12.756160736083984, "logits_per_char": -1.594520092010498, "num_chars": 8}, {"sum_logits": -12.204534530639648, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.38298225402832, "logits_per_token": -4.068178176879883, "logits_per_char": -1.525566816329956, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 7, "native_id": "05ea49b82e8ec519e82d6633936ab8bf", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 14.521194458007812, "incorrect_loss_raw": 13.523855924606323, "correct_loss_per_char": 0.6914854503813244, "incorrect_loss_per_char": 1.3984556445827732, "correct_loss_per_token": 3.630298614501953, "incorrect_loss_per_token": 7.821299076080322, "correct_loss_uncond": -4.683692932128906, "incorrect_loss_uncond": -4.0949835777282715}, "model_output": [{"sum_logits": -18.16378402709961, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.54102325439453, "logits_per_token": -9.081892013549805, "logits_per_char": -1.2974131447928292, "num_chars": 14}, {"sum_logits": -11.589014053344727, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.312774658203125, "logits_per_token": -5.794507026672363, "logits_per_char": -1.1589014053344726, "num_chars": 10}, {"sum_logits": -15.867656707763672, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.27543067932129, "logits_per_token": -7.933828353881836, "logits_per_char": -1.4425142461603337, "num_chars": 11}, {"sum_logits": -14.521194458007812, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.20488739013672, "logits_per_token": -3.630298614501953, "logits_per_char": -0.6914854503813244, "num_chars": 21}, {"sum_logits": -8.474968910217285, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.346129417419434, "logits_per_token": -8.474968910217285, "logits_per_char": -1.694993782043457, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 8, "native_id": "c0c07ce781653b2a2c01871ba2bcba93", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.636686325073242, "incorrect_loss_raw": 12.341598749160767, "correct_loss_per_char": 0.4040762583414714, "incorrect_loss_per_char": 1.288028220232431, "correct_loss_per_token": 3.636686325073242, "incorrect_loss_per_token": 7.695304572582245, "correct_loss_uncond": -11.12927532196045, "incorrect_loss_uncond": -3.4361984729766846}, "model_output": [{"sum_logits": -3.636686325073242, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.765961647033691, "logits_per_token": -3.636686325073242, "logits_per_char": -0.4040762583414714, "num_chars": 9}, {"sum_logits": -12.30652141571045, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.746564865112305, "logits_per_token": -3.0766303539276123, "logits_per_char": -0.6153260707855225, "num_chars": 20}, {"sum_logits": -9.008869171142578, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -10.810015678405762, "logits_per_token": -9.008869171142578, "logits_per_char": -1.5014781951904297, "num_chars": 6}, {"sum_logits": -9.340433120727539, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.185132026672363, "logits_per_token": -9.340433120727539, "logits_per_char": -1.3343475886753626, "num_chars": 7}, {"sum_logits": -18.7105712890625, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.369476318359375, "logits_per_token": -9.35528564453125, "logits_per_char": -1.7009610262784092, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 9, "native_id": "1d24f406b6828492040b405d3f35119c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.646801948547363, "incorrect_loss_raw": 10.92227029800415, "correct_loss_per_char": 0.5808502435684204, "incorrect_loss_per_char": 1.3532382579950184, "correct_loss_per_token": 4.646801948547363, "incorrect_loss_per_token": 8.419472575187683, "correct_loss_uncond": -9.757416725158691, "incorrect_loss_uncond": -6.024866580963135}, "model_output": [{"sum_logits": -9.467665672302246, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.619281768798828, "logits_per_token": -9.467665672302246, "logits_per_char": -2.3669164180755615, "num_chars": 4}, {"sum_logits": -13.408204078674316, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.166391372680664, "logits_per_token": -6.704102039337158, "logits_per_char": -1.1173503398895264, "num_chars": 12}, {"sum_logits": -4.646801948547363, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.404218673706055, "logits_per_token": -4.646801948547363, "logits_per_char": -0.5808502435684204, "num_chars": 8}, {"sum_logits": -14.199033737182617, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.151235580444336, "logits_per_token": -14.199033737182617, "logits_per_char": -1.4199033737182618, "num_chars": 10}, {"sum_logits": -6.614177703857422, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.851638793945312, "logits_per_token": -3.307088851928711, "logits_per_char": -0.5087829002967248, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 10, "native_id": "57f92025d860e32c4e780c0d51c1c20c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.706761360168457, "incorrect_loss_raw": 9.293649196624756, "correct_loss_per_char": 0.4706761360168457, "incorrect_loss_per_char": 1.017127777630593, "correct_loss_per_token": 2.3533806800842285, "incorrect_loss_per_token": 6.669461727142334, "correct_loss_uncond": -11.17537784576416, "incorrect_loss_uncond": -7.7734761238098145}, "model_output": [{"sum_logits": -8.377190589904785, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.07653522491455, "logits_per_token": -8.377190589904785, "logits_per_char": -1.3961984316507976, "num_chars": 6}, {"sum_logits": -10.44965648651123, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.730850219726562, "logits_per_token": -5.224828243255615, "logits_per_char": -0.7464040347508022, "num_chars": 14}, {"sum_logits": -10.543843269348145, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.14801788330078, "logits_per_token": -5.271921634674072, "logits_per_char": -0.8110648668729342, "num_chars": 13}, {"sum_logits": -7.803906440734863, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.313097953796387, "logits_per_token": -7.803906440734863, "logits_per_char": -1.1148437772478377, "num_chars": 7}, {"sum_logits": -4.706761360168457, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.882139205932617, "logits_per_token": -2.3533806800842285, "logits_per_char": -0.4706761360168457, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 11, "native_id": "81eb4b2ee66edd8bc91ee944697c4e9f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.808699607849121, "incorrect_loss_raw": 11.297542572021484, "correct_loss_per_char": 0.4371545098044656, "incorrect_loss_per_char": 1.0987736490037705, "correct_loss_per_token": 2.4043498039245605, "incorrect_loss_per_token": 5.648771286010742, "correct_loss_uncond": -13.812895774841309, "incorrect_loss_uncond": -7.596183776855469}, "model_output": [{"sum_logits": -9.039417266845703, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.705713272094727, "logits_per_token": -4.519708633422852, "logits_per_char": -0.9039417266845703, "num_chars": 10}, {"sum_logits": -11.388179779052734, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.16103744506836, "logits_per_token": -5.694089889526367, "logits_per_char": -1.0352890708229758, "num_chars": 11}, {"sum_logits": -10.133678436279297, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.59561538696289, "logits_per_token": -5.066839218139648, "logits_per_char": -1.1259642706976996, "num_chars": 9}, {"sum_logits": -4.808699607849121, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.62159538269043, "logits_per_token": -2.4043498039245605, "logits_per_char": -0.4371545098044656, "num_chars": 11}, {"sum_logits": -14.628894805908203, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.112539291381836, "logits_per_token": -7.314447402954102, "logits_per_char": -1.3298995278098367, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 12, "native_id": "d807e7ae60976324920c8d29eb42dad6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.246768951416016, "incorrect_loss_raw": 11.584051132202148, "correct_loss_per_char": 0.7497062683105469, "incorrect_loss_per_char": 1.134387970632977, "correct_loss_per_token": 4.123384475708008, "incorrect_loss_per_token": 5.792025566101074, "correct_loss_uncond": -11.824666976928711, "incorrect_loss_uncond": -7.29062032699585}, "model_output": [{"sum_logits": -8.246768951416016, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.071435928344727, "logits_per_token": -4.123384475708008, "logits_per_char": -0.7497062683105469, "num_chars": 11}, {"sum_logits": -9.75509262084961, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.070356369018555, "logits_per_token": -4.877546310424805, "logits_per_char": -1.0838991800944011, "num_chars": 9}, {"sum_logits": -10.246597290039062, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.588817596435547, "logits_per_token": -5.123298645019531, "logits_per_char": -1.0246597290039063, "num_chars": 10}, {"sum_logits": -10.225320816040039, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.049890518188477, "logits_per_token": -5.1126604080200195, "logits_per_char": -0.6390825510025024, "num_chars": 16}, {"sum_logits": -16.109193801879883, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.789621353149414, "logits_per_token": -8.054596900939941, "logits_per_char": -1.789910422431098, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 13, "native_id": "7ea9f721ffc662918bb0c0937a487f04", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.567108154296875, "incorrect_loss_raw": 11.0342276096344, "correct_loss_per_char": 0.5870615641276041, "incorrect_loss_per_char": 1.7622724612553915, "correct_loss_per_token": 3.522369384765625, "incorrect_loss_per_token": 9.335620999336243, "correct_loss_uncond": -10.71513557434082, "incorrect_loss_uncond": -4.272989749908447}, "model_output": [{"sum_logits": -7.933638572692871, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.836627006530762, "logits_per_token": -7.933638572692871, "logits_per_char": -1.5867277145385743, "num_chars": 5}, {"sum_logits": -12.199578285217285, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.158405303955078, "logits_per_token": -12.199578285217285, "logits_per_char": -2.0332630475362143, "num_chars": 6}, {"sum_logits": -13.588852882385254, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.662477493286133, "logits_per_token": -6.794426441192627, "logits_per_char": -1.9412646974836076, "num_chars": 7}, {"sum_logits": -10.567108154296875, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.282243728637695, "logits_per_token": -3.522369384765625, "logits_per_char": -0.5870615641276041, "num_chars": 18}, {"sum_logits": -10.414840698242188, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.571359634399414, "logits_per_token": -10.414840698242188, "logits_per_char": -1.4878343854631697, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 14, "native_id": "fc1d33a2301a30214523c12573f81aba", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.177338600158691, "incorrect_loss_raw": 13.471903800964355, "correct_loss_per_char": 0.5098521444532607, "incorrect_loss_per_char": 1.8662397324092805, "correct_loss_per_token": 4.588669300079346, "incorrect_loss_per_token": 8.22333844502767, "correct_loss_uncond": -10.810812950134277, "incorrect_loss_uncond": -2.0884313583374023}, "model_output": [{"sum_logits": -17.050148010253906, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.651959419250488, "logits_per_token": -8.525074005126953, "logits_per_char": -1.8944608900282118, "num_chars": 9}, {"sum_logits": -8.192338943481445, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.6738920211792, "logits_per_token": -8.192338943481445, "logits_per_char": -0.9102598826090494, "num_chars": 9}, {"sum_logits": -9.177338600158691, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.98815155029297, "logits_per_token": -4.588669300079346, "logits_per_char": -0.5098521444532607, "num_chars": 18}, {"sum_logits": -9.941347122192383, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.669677734375, "logits_per_token": -9.941347122192383, "logits_per_char": -1.9882694244384767, "num_chars": 5}, {"sum_logits": -18.703781127929688, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.245811462402344, "logits_per_token": -6.2345937093098955, "logits_per_char": -2.671968732561384, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 15, "native_id": "3b8e1d236f5169b6c833a994d6d9c39a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.280614852905273, "incorrect_loss_raw": 9.828603744506836, "correct_loss_per_char": 0.7543735504150391, "incorrect_loss_per_char": 1.4521538416544597, "correct_loss_per_token": 5.280614852905273, "incorrect_loss_per_token": 9.828603744506836, "correct_loss_uncond": -5.778053283691406, "incorrect_loss_uncond": -4.87463903427124}, "model_output": [{"sum_logits": -12.633980751037598, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.932482719421387, "logits_per_token": -12.633980751037598, "logits_per_char": -2.105663458506266, "num_chars": 6}, {"sum_logits": -6.583521842956543, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.005568504333496, "logits_per_token": -6.583521842956543, "logits_per_char": -1.097253640492757, "num_chars": 6}, {"sum_logits": -8.356610298156738, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.022502899169922, "logits_per_token": -8.356610298156738, "logits_per_char": -0.9285122553507487, "num_chars": 9}, {"sum_logits": -5.280614852905273, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.05866813659668, "logits_per_token": -5.280614852905273, "logits_per_char": -0.7543735504150391, "num_chars": 7}, {"sum_logits": -11.740302085876465, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.8524169921875, "logits_per_token": -11.740302085876465, "logits_per_char": -1.6771860122680664, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 16, "native_id": "c5c4166f2ed3c2b3517b79e6848e9ae2", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 19.093116760253906, "incorrect_loss_raw": 18.360082864761353, "correct_loss_per_char": 1.3637940543038505, "incorrect_loss_per_char": 1.3358550782387073, "correct_loss_per_token": 9.546558380126953, "incorrect_loss_per_token": 9.117149511973063, "correct_loss_uncond": -5.078708648681641, "incorrect_loss_uncond": -1.602123737335205}, "model_output": [{"sum_logits": -25.359909057617188, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -26.509384155273438, "logits_per_token": -8.453303019205729, "logits_per_char": -0.975381117600661, "num_chars": 26}, {"sum_logits": -12.806510925292969, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.1416015625, "logits_per_token": -4.268836975097656, "logits_per_char": -1.067209243774414, "num_chars": 12}, {"sum_logits": -23.054906845092773, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -23.027118682861328, "logits_per_token": -11.527453422546387, "logits_per_char": -1.773454372699444, "num_chars": 13}, {"sum_logits": -19.093116760253906, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -24.171825408935547, "logits_per_token": -9.546558380126953, "logits_per_char": -1.3637940543038505, "num_chars": 14}, {"sum_logits": -12.21900463104248, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.170722007751465, "logits_per_token": -12.21900463104248, "logits_per_char": -1.52737557888031, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 17, "native_id": "6dc5b2884737e66543ce65f8dc40c992", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.379046440124512, "incorrect_loss_raw": 10.18774688243866, "correct_loss_per_char": 0.42526976267496747, "incorrect_loss_per_char": 1.183370633042736, "correct_loss_per_token": 3.189523220062256, "incorrect_loss_per_token": 8.81444799900055, "correct_loss_uncond": -13.19222354888916, "incorrect_loss_uncond": -5.119067072868347}, "model_output": [{"sum_logits": -3.051543712615967, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.188199996948242, "logits_per_token": -3.051543712615967, "logits_per_char": -0.4359348160879953, "num_chars": 7}, {"sum_logits": -10.986391067504883, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.940143585205078, "logits_per_token": -5.493195533752441, "logits_per_char": -0.9987628243186257, "num_chars": 11}, {"sum_logits": -12.872365951538086, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.305567741394043, "logits_per_token": -12.872365951538086, "logits_per_char": -2.1453943252563477, "num_chars": 6}, {"sum_logits": -13.840686798095703, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.793344497680664, "logits_per_token": -13.840686798095703, "logits_per_char": -1.1533905665079753, "num_chars": 12}, {"sum_logits": -6.379046440124512, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.571269989013672, "logits_per_token": -3.189523220062256, "logits_per_char": -0.42526976267496747, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 18, "native_id": "8af63d58cc35061dec38e5448c325988", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.6131718158721924, "incorrect_loss_raw": 10.012781381607056, "correct_loss_per_char": 0.29035242398579914, "incorrect_loss_per_char": 1.3779351294986786, "correct_loss_per_token": 1.3065859079360962, "incorrect_loss_per_token": 10.012781381607056, "correct_loss_uncond": -15.488472700119019, "incorrect_loss_uncond": -4.276047945022583}, "model_output": [{"sum_logits": -11.112205505371094, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.415360450744629, "logits_per_token": -11.112205505371094, "logits_per_char": -1.3890256881713867, "num_chars": 8}, {"sum_logits": -11.495563507080078, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.479902267456055, "logits_per_token": -11.495563507080078, "logits_per_char": -1.915927251180013, "num_chars": 6}, {"sum_logits": -8.462060928344727, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.1697416305542, "logits_per_token": -8.462060928344727, "logits_per_char": -1.2088658469063895, "num_chars": 7}, {"sum_logits": -8.981295585632324, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.090312957763672, "logits_per_token": -8.981295585632324, "logits_per_char": -0.9979217317369249, "num_chars": 9}, {"sum_logits": -2.6131718158721924, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.10164451599121, "logits_per_token": -1.3065859079360962, "logits_per_char": -0.29035242398579914, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 19, "native_id": "768fb09deab56046e1565b6a2556ad5c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.595484733581543, "incorrect_loss_raw": 8.794649481773376, "correct_loss_per_char": 0.7993549619402204, "incorrect_loss_per_char": 0.7161622802416484, "correct_loss_per_token": 5.595484733581543, "incorrect_loss_per_token": 4.9518561363220215, "correct_loss_uncond": -7.874720573425293, "incorrect_loss_uncond": -8.41918933391571}, "model_output": [{"sum_logits": -4.436251163482666, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.412413597106934, "logits_per_token": -4.436251163482666, "logits_per_char": -0.4436251163482666, "num_chars": 10}, {"sum_logits": -12.90422248840332, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.095705032348633, "logits_per_token": -6.45211124420166, "logits_per_char": -1.07535187403361, "num_chars": 12}, {"sum_logits": -4.693915367126465, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.084230422973633, "logits_per_token": -2.3469576835632324, "logits_per_char": -0.4693915367126465, "num_chars": 10}, {"sum_logits": -5.595484733581543, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.470205307006836, "logits_per_token": -5.595484733581543, "logits_per_char": -0.7993549619402204, "num_chars": 7}, {"sum_logits": -13.144208908081055, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.26300621032715, "logits_per_token": -6.572104454040527, "logits_per_char": -0.8762805938720704, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 20, "native_id": "cd639cf3ff82f825ace7dd2b087562bd", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.380782127380371, "incorrect_loss_raw": 11.161052465438843, "correct_loss_per_char": 0.9115403039114816, "incorrect_loss_per_char": 1.1032677151940085, "correct_loss_per_token": 6.380782127380371, "incorrect_loss_per_token": 6.388189315795898, "correct_loss_uncond": -7.509042739868164, "incorrect_loss_uncond": -6.267451763153076}, "model_output": [{"sum_logits": -12.009556770324707, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.13845443725586, "logits_per_token": -6.0047783851623535, "logits_per_char": -1.091777888211337, "num_chars": 11}, {"sum_logits": -14.98556900024414, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.613142013549805, "logits_per_token": -7.49278450012207, "logits_per_char": -1.498556900024414, "num_chars": 10}, {"sum_logits": -6.461304664611816, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.586409568786621, "logits_per_token": -6.461304664611816, "logits_per_char": -1.076884110768636, "num_chars": 6}, {"sum_logits": -6.380782127380371, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.889824867248535, "logits_per_token": -6.380782127380371, "logits_per_char": -0.9115403039114816, "num_chars": 7}, {"sum_logits": -11.187779426574707, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.37601089477539, "logits_per_token": -5.5938897132873535, "logits_per_char": -0.7458519617716471, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 21, "native_id": "8d79cc5e4eea11f50fab18fdea20fd4f", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.3816590309143066, "incorrect_loss_raw": 9.257589340209961, "correct_loss_per_char": 0.11513825257619222, "incorrect_loss_per_char": 1.0319146763796758, "correct_loss_per_token": 1.3816590309143066, "incorrect_loss_per_token": 9.257589340209961, "correct_loss_uncond": -13.247670650482178, "incorrect_loss_uncond": -5.08287501335144}, "model_output": [{"sum_logits": -8.385438919067383, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.703721046447754, "logits_per_token": -8.385438919067383, "logits_per_char": -1.0481798648834229, "num_chars": 8}, {"sum_logits": -8.684666633605957, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.325604438781738, "logits_per_token": -8.684666633605957, "logits_per_char": -1.0855833292007446, "num_chars": 8}, {"sum_logits": -1.3816590309143066, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": true, "sum_logits_uncond": -14.629329681396484, "logits_per_token": -1.3816590309143066, "logits_per_char": -0.11513825257619222, "num_chars": 12}, {"sum_logits": -11.08355712890625, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.60936164855957, "logits_per_token": -11.08355712890625, "logits_per_char": -1.0075961026278408, "num_chars": 11}, {"sum_logits": -8.876694679260254, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.723170280456543, "logits_per_token": -8.876694679260254, "logits_per_char": -0.9862994088066949, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 22, "native_id": "e5ad2184e37ae88b2bf46bf6bc0ed2f4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 2.670180320739746, "incorrect_loss_raw": 6.223908424377441, "correct_loss_per_char": 0.17801202138264974, "incorrect_loss_per_char": 0.5101612882954734, "correct_loss_per_token": 0.6675450801849365, "incorrect_loss_per_token": 2.4367928306261697, "correct_loss_uncond": -14.108649253845215, "incorrect_loss_uncond": -11.25068473815918}, "model_output": [{"sum_logits": -2.670180320739746, "num_tokens": 4, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.77882957458496, "logits_per_token": -0.6675450801849365, "logits_per_char": -0.17801202138264974, "num_chars": 15}, {"sum_logits": -4.260153770446777, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.321434020996094, "logits_per_token": -4.260153770446777, "logits_per_char": -0.5325192213058472, "num_chars": 8}, {"sum_logits": -14.027528762817383, "num_tokens": 4, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -22.137086868286133, "logits_per_token": -3.5068821907043457, "logits_per_char": -1.0019663402012415, "num_chars": 14}, {"sum_logits": -2.670180320739746, "num_tokens": 4, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.77882957458496, "logits_per_token": -0.6675450801849365, "logits_per_char": -0.17801202138264974, "num_chars": 15}, {"sum_logits": -3.9377708435058594, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.661022186279297, "logits_per_token": -1.3125902811686199, "logits_per_char": -0.32814757029215497, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 23, "native_id": "b8b287b6277fccd4b7c9c72577177328", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.979841709136963, "incorrect_loss_raw": 8.21179473400116, "correct_loss_per_char": 0.5979841709136963, "incorrect_loss_per_char": 0.9644484115971459, "correct_loss_per_token": 5.979841709136963, "incorrect_loss_per_token": 6.999820828437805, "correct_loss_uncond": -8.476122379302979, "incorrect_loss_uncond": -6.8751150369644165}, "model_output": [{"sum_logits": -9.695791244506836, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.74686050415039, "logits_per_token": -4.847895622253418, "logits_per_char": -0.9695791244506836, "num_chars": 10}, {"sum_logits": -6.090670108795166, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -6.090670108795166, "logits_per_char": -0.8700957298278809, "num_chars": 7}, {"sum_logits": -8.818812370300293, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.015286445617676, "logits_per_token": -8.818812370300293, "logits_per_char": -1.1023515462875366, "num_chars": 8}, {"sum_logits": -5.979841709136963, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.455964088439941, "logits_per_token": -5.979841709136963, "logits_per_char": -0.5979841709136963, "num_chars": 10}, {"sum_logits": -8.241905212402344, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.022502899169922, "logits_per_token": -8.241905212402344, "logits_per_char": -0.9157672458224826, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 24, "native_id": "f646f3e064f06423fc25b98500796cf0", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 2.0522265434265137, "incorrect_loss_raw": 6.203996658325195, "correct_loss_per_char": 0.29317522048950195, "incorrect_loss_per_char": 0.7650665669214158, "correct_loss_per_token": 2.0522265434265137, "incorrect_loss_per_token": 2.9815953572591147, "correct_loss_uncond": -10.663516521453857, "incorrect_loss_uncond": -10.267170429229736}, "model_output": [{"sum_logits": -5.80999755859375, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.405513763427734, "logits_per_token": -2.904998779296875, "logits_per_char": -0.8299996512276786, "num_chars": 7}, {"sum_logits": -10.61941146850586, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.994915008544922, "logits_per_token": -3.5398038228352866, "logits_per_char": -0.8849509557088217, "num_chars": 12}, {"sum_logits": -2.576580047607422, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.078725814819336, "logits_per_token": -2.576580047607422, "logits_per_char": -0.5153160095214844, "num_chars": 5}, {"sum_logits": -5.80999755859375, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.405513763427734, "logits_per_token": -2.904998779296875, "logits_per_char": -0.8299996512276786, "num_chars": 7}, {"sum_logits": -2.0522265434265137, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.715743064880371, "logits_per_token": -2.0522265434265137, "logits_per_char": -0.29317522048950195, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 25, "native_id": "b0f7d7978ac41c465108a92660d70e84", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.2117702960968018, "incorrect_loss_raw": 20.192309617996216, "correct_loss_per_char": 0.12117702960968017, "incorrect_loss_per_char": 1.1317860657002503, "correct_loss_per_token": 1.2117702960968018, "incorrect_loss_per_token": 7.4120266199111935, "correct_loss_uncond": -14.001788854598999, "incorrect_loss_uncond": -3.213024854660034}, "model_output": [{"sum_logits": -17.541141510009766, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.910301208496094, "logits_per_token": -8.770570755004883, "logits_per_char": -1.1694094340006511, "num_chars": 15}, {"sum_logits": -13.098830223083496, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.250640869140625, "logits_per_token": -6.549415111541748, "logits_per_char": -0.9356307302202497, "num_chars": 14}, {"sum_logits": -14.340890884399414, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.08435821533203, "logits_per_token": -7.170445442199707, "logits_per_char": -1.3037173531272195, "num_chars": 11}, {"sum_logits": -1.2117702960968018, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -15.2135591506958, "logits_per_token": -1.2117702960968018, "logits_per_char": -0.12117702960968017, "num_chars": 10}, {"sum_logits": -35.78837585449219, "num_tokens": 5, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -37.37603759765625, "logits_per_token": -7.157675170898438, "logits_per_char": -1.1183867454528809, "num_chars": 32}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 26, "native_id": "54075de8b8b89ecef2e4eb4eaee2713d", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.520618438720703, "incorrect_loss_raw": 12.546863317489624, "correct_loss_per_char": 1.1041236877441407, "incorrect_loss_per_char": 1.1640792102604123, "correct_loss_per_token": 5.520618438720703, "incorrect_loss_per_token": 6.33475645383199, "correct_loss_uncond": -8.840645790100098, "incorrect_loss_uncond": -4.314466953277588}, "model_output": [{"sum_logits": -13.901241302490234, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.822216033935547, "logits_per_token": -6.950620651245117, "logits_per_char": -1.7376551628112793, "num_chars": 8}, {"sum_logits": -5.520618438720703, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.3612642288208, "logits_per_token": -5.520618438720703, "logits_per_char": -1.1041236877441407, "num_chars": 5}, {"sum_logits": -18.63723373413086, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.162675857543945, "logits_per_token": -9.31861686706543, "logits_per_char": -1.331230981009347, "num_chars": 14}, {"sum_logits": -12.86878490447998, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.415130615234375, "logits_per_token": -4.289594968159993, "logits_per_char": -0.9899065311138446, "num_chars": 13}, {"sum_logits": -4.780193328857422, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.04529857635498, "logits_per_token": -4.780193328857422, "logits_per_char": -0.5975241661071777, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 27, "native_id": "65435b996ce9d1685bebb74b49c1ba7f", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.952692031860352, "incorrect_loss_raw": 12.246721029281616, "correct_loss_per_char": 0.434543251991272, "incorrect_loss_per_char": 1.1991435575794864, "correct_loss_per_token": 3.476346015930176, "incorrect_loss_per_token": 10.061744451522827, "correct_loss_uncond": -14.929769515991211, "incorrect_loss_uncond": -3.2913119792938232}, "model_output": [{"sum_logits": -6.952692031860352, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.882461547851562, "logits_per_token": -3.476346015930176, "logits_per_char": -0.434543251991272, "num_chars": 16}, {"sum_logits": -15.099669456481934, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.02373504638672, "logits_per_token": -15.099669456481934, "logits_per_char": -1.2583057880401611, "num_chars": 12}, {"sum_logits": -5.526080131530762, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.504511833190918, "logits_per_token": -5.526080131530762, "logits_per_char": -0.3947200093950544, "num_chars": 14}, {"sum_logits": -10.881321907043457, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.576016426086426, "logits_per_token": -10.881321907043457, "logits_per_char": -1.5544745581490653, "num_chars": 7}, {"sum_logits": -17.479812622070312, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.047868728637695, "logits_per_token": -8.739906311035156, "logits_per_char": -1.5890738747336648, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 28, "native_id": "9889e5389917d812c09d6e5d382d333d", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.125885009765625, "incorrect_loss_raw": 12.288137674331665, "correct_loss_per_char": 0.2657356262207031, "incorrect_loss_per_char": 1.3674843468508877, "correct_loss_per_token": 1.0629425048828125, "incorrect_loss_per_token": 7.681710958480835, "correct_loss_uncond": -16.242992401123047, "incorrect_loss_uncond": -2.987614870071411}, "model_output": [{"sum_logits": -8.899978637695312, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.135162353515625, "logits_per_token": -4.449989318847656, "logits_per_char": -0.9888865152994791, "num_chars": 9}, {"sum_logits": -2.125885009765625, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.368877410888672, "logits_per_token": -1.0629425048828125, "logits_per_char": -0.2657356262207031, "num_chars": 8}, {"sum_logits": -16.77882957458496, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -8.38941478729248, "logits_per_char": -1.8643143971761067, "num_chars": 9}, {"sum_logits": -11.172605514526367, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.325485229492188, "logits_per_token": -5.586302757263184, "logits_per_char": -0.8594311934251052, "num_chars": 13}, {"sum_logits": -12.30113697052002, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.363737106323242, "logits_per_token": -12.30113697052002, "logits_per_char": -1.75730528150286, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 29, "native_id": "a651ffa44ac5febf0aede6748899b981", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.6769833564758301, "incorrect_loss_raw": 9.679099202156067, "correct_loss_per_char": 0.11283055941263835, "incorrect_loss_per_char": 1.189347118741334, "correct_loss_per_token": 0.6769833564758301, "incorrect_loss_per_token": 7.963875253995259, "correct_loss_uncond": -12.593494892120361, "incorrect_loss_uncond": -5.3188546895980835}, "model_output": [{"sum_logits": -9.418916702270508, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.656285285949707, "logits_per_token": -9.418916702270508, "logits_per_char": -0.8562651547518644, "num_chars": 11}, {"sum_logits": -0.6769833564758301, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": true, "sum_logits_uncond": -13.270478248596191, "logits_per_token": -0.6769833564758301, "logits_per_char": -0.11283055941263835, "num_chars": 6}, {"sum_logits": -12.859109878540039, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.553000450134277, "logits_per_token": -12.859109878540039, "logits_per_char": -1.4287899865044489, "num_chars": 9}, {"sum_logits": -6.147026538848877, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.986200332641602, "logits_per_token": -6.147026538848877, "logits_per_char": -1.5367566347122192, "num_chars": 4}, {"sum_logits": -10.291343688964844, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.796329498291016, "logits_per_token": -3.4304478963216147, "logits_per_char": -0.935576698996804, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 30, "native_id": "bdcfbe2132295d437e4c5701085f19c0", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.095537185668945, "incorrect_loss_raw": 10.566004276275635, "correct_loss_per_char": 1.2993624550955636, "incorrect_loss_per_char": 1.3546611132129789, "correct_loss_per_token": 4.547768592834473, "incorrect_loss_per_token": 6.631838838259379, "correct_loss_uncond": -9.368780136108398, "incorrect_loss_uncond": -5.041405439376831}, "model_output": [{"sum_logits": -7.868618011474609, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -10.994718551635742, "logits_per_token": -7.868618011474609, "logits_per_char": -1.9671545028686523, "num_chars": 4}, {"sum_logits": -9.095537185668945, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.464317321777344, "logits_per_token": -4.547768592834473, "logits_per_char": -1.2993624550955636, "num_chars": 7}, {"sum_logits": -14.566225051879883, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.524173736572266, "logits_per_token": -4.855408350626628, "logits_per_char": -1.618469450208876, "num_chars": 9}, {"sum_logits": -12.051690101623535, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.346126556396484, "logits_per_token": -6.025845050811768, "logits_per_char": -0.8608350072588239, "num_chars": 14}, {"sum_logits": -7.777483940124512, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.564620018005371, "logits_per_token": -7.777483940124512, "logits_per_char": -0.972185492515564, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 31, "native_id": "8d3dc21a53523850ec80771daaa5ff20", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.110344886779785, "incorrect_loss_raw": 10.935851097106934, "correct_loss_per_char": 0.38879311084747314, "incorrect_loss_per_char": 0.8979588537949782, "correct_loss_per_token": 3.110344886779785, "incorrect_loss_per_token": 5.37171995639801, "correct_loss_uncond": -9.913209915161133, "incorrect_loss_uncond": -8.15446662902832}, "model_output": [{"sum_logits": -3.110344886779785, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.023554801940918, "logits_per_token": -3.110344886779785, "logits_per_char": -0.38879311084747314, "num_chars": 8}, {"sum_logits": -9.850316047668457, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.251731872558594, "logits_per_token": -4.9251580238342285, "logits_per_char": -0.6566877365112305, "num_chars": 15}, {"sum_logits": -17.172697067260742, "num_tokens": 4, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -27.298786163330078, "logits_per_token": -4.2931742668151855, "logits_per_char": -0.6869078826904297, "num_chars": 25}, {"sum_logits": -8.903687477111816, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -20.710254669189453, "logits_per_token": -4.451843738555908, "logits_per_char": -0.6848990367009089, "num_chars": 13}, {"sum_logits": -7.816703796386719, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -11.10049819946289, "logits_per_token": -7.816703796386719, "logits_per_char": -1.5633407592773438, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 32, "native_id": "a80ee7775e934c423012fe98e20ba28b", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.0680768489837646, "incorrect_loss_raw": 10.791123390197754, "correct_loss_per_char": 0.20680768489837648, "incorrect_loss_per_char": 1.4675545567557926, "correct_loss_per_token": 1.0340384244918823, "incorrect_loss_per_token": 6.9961549838384, "correct_loss_uncond": -13.386968851089478, "incorrect_loss_uncond": -5.4121057987213135}, "model_output": [{"sum_logits": -8.550355911254883, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.705713272094727, "logits_per_token": -4.275177955627441, "logits_per_char": -0.8550355911254883, "num_chars": 10}, {"sum_logits": -10.757927894592285, "num_tokens": 3, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -18.625558853149414, "logits_per_token": -3.585975964864095, "logits_per_char": -1.5368468420846122, "num_chars": 7}, {"sum_logits": -7.465487480163574, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.410253524780273, "logits_per_token": -3.732743740081787, "logits_per_char": -0.7465487480163574, "num_chars": 10}, {"sum_logits": -16.390722274780273, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -16.390722274780273, "logits_per_char": -2.7317870457967124, "num_chars": 6}, {"sum_logits": -2.0680768489837646, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.455045700073242, "logits_per_token": -1.0340384244918823, "logits_per_char": -0.20680768489837648, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 33, "native_id": "48a315cfa3ce11f7a9d615bc854331d5", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.899580478668213, "incorrect_loss_raw": 10.292208194732666, "correct_loss_per_char": 0.2785414627620152, "incorrect_loss_per_char": 1.1796265533992223, "correct_loss_per_token": 1.299860159556071, "incorrect_loss_per_token": 6.052990913391113, "correct_loss_uncond": -13.58653974533081, "incorrect_loss_uncond": -5.365783452987671}, "model_output": [{"sum_logits": -9.16920280456543, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.643402099609375, "logits_per_token": -4.584601402282715, "logits_per_char": -0.6549430574689593, "num_chars": 14}, {"sum_logits": -8.285442352294922, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.11477279663086, "logits_per_token": -2.7618141174316406, "logits_per_char": -0.6904535293579102, "num_chars": 12}, {"sum_logits": -10.016908645629883, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.346129417419434, "logits_per_token": -10.016908645629883, "logits_per_char": -2.0033817291259766, "num_chars": 5}, {"sum_logits": -3.899580478668213, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.486120223999023, "logits_per_token": -1.299860159556071, "logits_per_char": -0.2785414627620152, "num_chars": 14}, {"sum_logits": -13.69727897644043, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.52766227722168, "logits_per_token": -6.848639488220215, "logits_per_char": -1.369727897644043, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 34, "native_id": "4acd496cc78d96c2431279a5fd87de7c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 1.9285566806793213, "incorrect_loss_raw": 7.33378529548645, "correct_loss_per_char": 0.19285566806793214, "incorrect_loss_per_char": 1.191239745188982, "correct_loss_per_token": 1.9285566806793213, "incorrect_loss_per_token": 6.230154633522034, "correct_loss_uncond": -10.80595088005066, "incorrect_loss_uncond": -5.91662073135376}, "model_output": [{"sum_logits": -2.3767318725585938, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.35517406463623, "logits_per_token": -2.3767318725585938, "logits_per_char": -0.18282552865835336, "num_chars": 13}, {"sum_logits": -8.231535911560059, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.15064525604248, "logits_per_token": -8.231535911560059, "logits_per_char": -1.3719226519266765, "num_chars": 6}, {"sum_logits": -8.829045295715332, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.363466262817383, "logits_per_token": -4.414522647857666, "logits_per_char": -0.7357537746429443, "num_chars": 12}, {"sum_logits": -9.897828102111816, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.132338523864746, "logits_per_token": -9.897828102111816, "logits_per_char": -2.474457025527954, "num_chars": 4}, {"sum_logits": -1.9285566806793213, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": true, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -1.9285566806793213, "logits_per_char": -0.19285566806793214, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 35, "native_id": "91e0f4ab62c9d2fd440d73a3f5308d96", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.033224582672119, "incorrect_loss_raw": 17.22345495223999, "correct_loss_per_char": 0.31457653641700745, "incorrect_loss_per_char": 1.9304340311459134, "correct_loss_per_token": 2.5166122913360596, "incorrect_loss_per_token": 10.415625095367432, "correct_loss_uncond": -10.887625217437744, "incorrect_loss_uncond": -2.285707950592041}, "model_output": [{"sum_logits": -36.30842590332031, "num_tokens": 4, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -34.116973876953125, "logits_per_token": -9.077106475830078, "logits_per_char": -3.025702158610026, "num_chars": 12}, {"sum_logits": -10.093416213989258, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.349469184875488, "logits_per_token": -10.093416213989258, "logits_per_char": -1.6822360356648762, "num_chars": 6}, {"sum_logits": -5.033224582672119, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.920849800109863, "logits_per_token": -2.5166122913360596, "logits_per_char": -0.31457653641700745, "num_chars": 16}, {"sum_logits": -11.328840255737305, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.001758575439453, "logits_per_token": -11.328840255737305, "logits_per_char": -1.618405750819615, "num_chars": 7}, {"sum_logits": -11.163137435913086, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.568449974060059, "logits_per_token": -11.163137435913086, "logits_per_char": -1.3953921794891357, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 36, "native_id": "b61e849e44db16a581f0b65e28ab95dc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.5214462280273438, "incorrect_loss_raw": 8.822300434112549, "correct_loss_per_char": 0.7042892456054688, "incorrect_loss_per_char": 1.0558978568939934, "correct_loss_per_token": 3.5214462280273438, "incorrect_loss_per_token": 7.229814767837524, "correct_loss_uncond": -8.733282089233398, "incorrect_loss_uncond": -5.6316001415252686}, "model_output": [{"sum_logits": -8.096725463867188, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.436007499694824, "logits_per_token": -8.096725463867188, "logits_per_char": -1.6193450927734374, "num_chars": 5}, {"sum_logits": -12.739885330200195, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.10889434814453, "logits_per_token": -6.369942665100098, "logits_per_char": -0.849325688680013, "num_chars": 15}, {"sum_logits": -3.5214462280273438, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -3.5214462280273438, "logits_per_char": -0.7042892456054688, "num_chars": 5}, {"sum_logits": -7.225436210632324, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.892474174499512, "logits_per_token": -7.225436210632324, "logits_per_char": -1.0322051729474748, "num_chars": 7}, {"sum_logits": -7.227154731750488, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.378226280212402, "logits_per_token": -7.227154731750488, "logits_per_char": -0.7227154731750488, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 37, "native_id": "ba6bd1bdef02d0ebfe5370f92365ae18", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.0628600120544434, "incorrect_loss_raw": 9.958751559257507, "correct_loss_per_char": 0.23560461631188026, "incorrect_loss_per_char": 1.2426027065231686, "correct_loss_per_token": 3.0628600120544434, "incorrect_loss_per_token": 5.5622652769088745, "correct_loss_uncond": -10.139249324798584, "incorrect_loss_uncond": -7.453142523765564}, "model_output": [{"sum_logits": -17.322654724121094, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.87339973449707, "logits_per_token": -8.661327362060547, "logits_per_char": -2.1653318405151367, "num_chars": 8}, {"sum_logits": -3.0628600120544434, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.202109336853027, "logits_per_token": -3.0628600120544434, "logits_per_char": -0.23560461631188026, "num_chars": 13}, {"sum_logits": -4.663115978240967, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.800455093383789, "logits_per_token": -4.663115978240967, "logits_per_char": -0.9326231956481934, "num_chars": 5}, {"sum_logits": -11.380908012390137, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.235506057739258, "logits_per_token": -5.690454006195068, "logits_per_char": -0.9484090010325114, "num_chars": 12}, {"sum_logits": -6.468327522277832, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.738215446472168, "logits_per_token": -3.234163761138916, "logits_per_char": -0.9240467888968331, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 38, "native_id": "dc55d473c22b04877b11d584f9548194", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 16.363706588745117, "incorrect_loss_raw": 10.49249267578125, "correct_loss_per_char": 1.0909137725830078, "incorrect_loss_per_char": 1.3353899121284485, "correct_loss_per_token": 5.454568862915039, "incorrect_loss_per_token": 6.0945388078689575, "correct_loss_uncond": -3.327615737915039, "incorrect_loss_uncond": -4.903733015060425}, "model_output": [{"sum_logits": -16.363706588745117, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.691322326660156, "logits_per_token": -5.454568862915039, "logits_per_char": -1.0909137725830078, "num_chars": 15}, {"sum_logits": -12.193567276000977, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.634092330932617, "logits_per_token": -6.096783638000488, "logits_per_char": -1.1085061160000889, "num_chars": 11}, {"sum_logits": -6.78633975982666, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.956701278686523, "logits_per_token": -6.78633975982666, "logits_per_char": -1.696584939956665, "num_chars": 4}, {"sum_logits": -14.395654678344727, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.048660278320312, "logits_per_token": -7.197827339172363, "logits_per_char": -1.3086958798495205, "num_chars": 11}, {"sum_logits": -8.594408988952637, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.945448875427246, "logits_per_token": -4.297204494476318, "logits_per_char": -1.2277727127075195, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 39, "native_id": "113aaea2b1a27a976547f54e531d99bb", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.018421173095703, "incorrect_loss_raw": 8.329325199127197, "correct_loss_per_char": 0.2744019248268821, "incorrect_loss_per_char": 0.9343214631080627, "correct_loss_per_token": 1.5092105865478516, "incorrect_loss_per_token": 5.456262111663818, "correct_loss_uncond": -12.334474563598633, "incorrect_loss_uncond": -8.880033016204834}, "model_output": [{"sum_logits": -11.988191604614258, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.1570987701416, "logits_per_token": -5.994095802307129, "logits_per_char": -1.7125988006591797, "num_chars": 7}, {"sum_logits": -3.018421173095703, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.352895736694336, "logits_per_token": -1.5092105865478516, "logits_per_char": -0.2744019248268821, "num_chars": 11}, {"sum_logits": -4.8404130935668945, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.241388320922852, "logits_per_token": -4.8404130935668945, "logits_per_char": -0.6050516366958618, "num_chars": 8}, {"sum_logits": -10.996313095092773, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.352176666259766, "logits_per_token": -5.498156547546387, "logits_per_char": -0.7330875396728516, "num_chars": 15}, {"sum_logits": -5.492383003234863, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.086769104003906, "logits_per_token": -5.492383003234863, "logits_per_char": -0.6865478754043579, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 40, "native_id": "ba640b9634ad6b4ad98b17b4f152e562", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.597764492034912, "incorrect_loss_raw": 13.135165452957153, "correct_loss_per_char": 0.3298882246017456, "incorrect_loss_per_char": 1.4570571385897124, "correct_loss_per_token": 2.199254830678304, "incorrect_loss_per_token": 7.1955660581588745, "correct_loss_uncond": -10.45215654373169, "incorrect_loss_uncond": -6.671010971069336}, "model_output": [{"sum_logits": -9.692070960998535, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.832438468933105, "logits_per_token": -9.692070960998535, "logits_per_char": -1.9384141921997071, "num_chars": 5}, {"sum_logits": -6.597764492034912, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -17.0499210357666, "logits_per_token": -2.199254830678304, "logits_per_char": -0.3298882246017456, "num_chars": 20}, {"sum_logits": -17.242341995239258, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -21.52896499633789, "logits_per_token": -8.621170997619629, "logits_per_char": -1.7242341995239259, "num_chars": 10}, {"sum_logits": -14.004612922668457, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.102449417114258, "logits_per_token": -4.668204307556152, "logits_per_char": -1.273146629333496, "num_chars": 11}, {"sum_logits": -11.601635932922363, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -24.760852813720703, "logits_per_token": -5.800817966461182, "logits_per_char": -0.8924335333017203, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 41, "native_id": "750ebdf36a0b3b407be0fe2163e3700b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.976927280426025, "incorrect_loss_raw": 11.461900174617767, "correct_loss_per_char": 0.49769272804260256, "incorrect_loss_per_char": 0.9245577240231062, "correct_loss_per_token": 2.4884636402130127, "incorrect_loss_per_token": 5.730950087308884, "correct_loss_uncond": -14.025933742523193, "incorrect_loss_uncond": -7.073687136173248}, "model_output": [{"sum_logits": -16.97158432006836, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -20.339109420776367, "logits_per_token": -8.48579216003418, "logits_per_char": -1.131438954671224, "num_chars": 15}, {"sum_logits": -4.976927280426025, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -19.00286102294922, "logits_per_token": -2.4884636402130127, "logits_per_char": -0.49769272804260256, "num_chars": 10}, {"sum_logits": -15.803966522216797, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -18.831424713134766, "logits_per_token": -7.901983261108398, "logits_per_char": -1.3169972101847331, "num_chars": 12}, {"sum_logits": -3.0406148433685303, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -17.746070861816406, "logits_per_token": -1.5203074216842651, "logits_per_char": -0.3378460937076145, "num_chars": 9}, {"sum_logits": -10.031435012817383, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -17.225744247436523, "logits_per_token": -5.015717506408691, "logits_per_char": -0.911948637528853, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 42, "native_id": "8f01273422a370a8dbda6bf473a395a0", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.8158252239227295, "incorrect_loss_raw": 8.710116386413574, "correct_loss_per_char": 0.4022607462746756, "incorrect_loss_per_char": 1.0429601574700975, "correct_loss_per_token": 2.8158252239227295, "incorrect_loss_per_token": 8.710116386413574, "correct_loss_uncond": -10.547911882400513, "incorrect_loss_uncond": -3.815122604370117}, "model_output": [{"sum_logits": -14.790549278259277, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.543240547180176, "logits_per_token": -14.790549278259277, "logits_per_char": -1.643394364251031, "num_chars": 9}, {"sum_logits": -6.992031097412109, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.832630157470703, "logits_per_token": -6.992031097412109, "logits_per_char": -0.998861585344587, "num_chars": 7}, {"sum_logits": -7.83597469329834, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.08862590789795, "logits_per_token": -7.83597469329834, "logits_per_char": -0.783597469329834, "num_chars": 10}, {"sum_logits": -5.22191047668457, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.636459350585938, "logits_per_token": -5.22191047668457, "logits_per_char": -0.7459872109549386, "num_chars": 7}, {"sum_logits": -2.8158252239227295, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.363737106323242, "logits_per_token": -2.8158252239227295, "logits_per_char": -0.4022607462746756, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 43, "native_id": "e6586bba9fe96d38792e6e6d4f2703dc", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.031379699707031, "incorrect_loss_raw": 10.615025758743286, "correct_loss_per_char": 1.1718966166178386, "incorrect_loss_per_char": 1.6099230130513509, "correct_loss_per_token": 7.031379699707031, "incorrect_loss_per_token": 8.75891637802124, "correct_loss_uncond": -4.858273506164551, "incorrect_loss_uncond": -4.2345147132873535}, "model_output": [{"sum_logits": -14.848875045776367, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.345714569091797, "logits_per_token": -7.424437522888184, "logits_per_char": -1.649875005086263, "num_chars": 9}, {"sum_logits": -5.632951736450195, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.376179695129395, "logits_per_token": -5.632951736450195, "logits_per_char": -1.1265903472900392, "num_chars": 5}, {"sum_logits": -12.208949089050293, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.08862590789795, "logits_per_token": -12.208949089050293, "logits_per_char": -1.2208949089050294, "num_chars": 10}, {"sum_logits": -9.769327163696289, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.587641716003418, "logits_per_token": -9.769327163696289, "logits_per_char": -2.4423317909240723, "num_chars": 4}, {"sum_logits": -7.031379699707031, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -7.031379699707031, "logits_per_char": -1.1718966166178386, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 44, "native_id": "6e433471d0e2590b8c73ceef275022b1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 11.332511901855469, "incorrect_loss_raw": 13.303492784500122, "correct_loss_per_char": 1.0302283547141335, "incorrect_loss_per_char": 1.6013374522134853, "correct_loss_per_token": 5.666255950927734, "incorrect_loss_per_token": 9.42620023091634, "correct_loss_uncond": -10.315189361572266, "incorrect_loss_uncond": -2.4097518920898438}, "model_output": [{"sum_logits": -10.038469314575195, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -15.697437286376953, "logits_per_token": -5.019234657287598, "logits_per_char": -0.9125881195068359, "num_chars": 11}, {"sum_logits": -11.780938148498535, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -12.923977851867676, "logits_per_token": -11.780938148498535, "logits_per_char": -2.945234537124634, "num_chars": 4}, {"sum_logits": -15.734903335571289, "num_tokens": 3, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -19.306278228759766, "logits_per_token": -5.244967778523763, "logits_per_char": -1.1239216668265206, "num_chars": 14}, {"sum_logits": -11.332511901855469, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -21.647701263427734, "logits_per_token": -5.666255950927734, "logits_per_char": -1.0302283547141335, "num_chars": 11}, {"sum_logits": -15.659660339355469, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.925285339355469, "logits_per_token": -15.659660339355469, "logits_per_char": -1.4236054853959517, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 45, "native_id": "1bc986f8aea88d6927d8a45367855a94", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.919724464416504, "incorrect_loss_raw": 15.090426445007324, "correct_loss_per_char": 0.4613149642944336, "incorrect_loss_per_char": 1.2286908103437026, "correct_loss_per_token": 3.459862232208252, "incorrect_loss_per_token": 8.173739671707153, "correct_loss_uncond": -11.596455574035645, "incorrect_loss_uncond": -2.47847056388855}, "model_output": [{"sum_logits": -17.360143661499023, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.855520248413086, "logits_per_token": -5.786714553833008, "logits_per_char": -1.0211849212646484, "num_chars": 17}, {"sum_logits": -15.086376190185547, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.922809600830078, "logits_per_token": -7.543188095092773, "logits_per_char": -0.7940197994834498, "num_chars": 19}, {"sum_logits": -10.814926147460938, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.753066062927246, "logits_per_token": -10.814926147460938, "logits_per_char": -1.5449894496372767, "num_chars": 7}, {"sum_logits": -17.10025978088379, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.744192123413086, "logits_per_token": -8.550129890441895, "logits_per_char": -1.5545690709894353, "num_chars": 11}, {"sum_logits": -6.919724464416504, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.51618003845215, "logits_per_token": -3.459862232208252, "logits_per_char": -0.4613149642944336, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 46, "native_id": "8d1563697d751a364d688d6701ebdb39", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.59587574005127, "incorrect_loss_raw": 8.46044933795929, "correct_loss_per_char": 0.859587574005127, "incorrect_loss_per_char": 0.7419817255927132, "correct_loss_per_token": 4.297937870025635, "incorrect_loss_per_token": 3.906686782836914, "correct_loss_uncond": -7.841912269592285, "incorrect_loss_uncond": -10.307213187217712}, "model_output": [{"sum_logits": -8.59587574005127, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.437788009643555, "logits_per_token": -4.297937870025635, "logits_per_char": -0.859587574005127, "num_chars": 10}, {"sum_logits": -6.064547538757324, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.48784065246582, "logits_per_token": -3.032273769378662, "logits_per_char": -0.5513225035233931, "num_chars": 11}, {"sum_logits": -10.006170272827148, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.139310836791992, "logits_per_token": -5.003085136413574, "logits_per_char": -0.9096518429842863, "num_chars": 11}, {"sum_logits": -7.764909267425537, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.304187774658203, "logits_per_token": -2.5883030891418457, "logits_per_char": -0.5973007128788874, "num_chars": 13}, {"sum_logits": -10.006170272827148, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.139310836791992, "logits_per_token": -5.003085136413574, "logits_per_char": -0.9096518429842863, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 47, "native_id": "91f512273a2da7ae796919069b20d6cf", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.9611027240753174, "incorrect_loss_raw": 14.529460191726685, "correct_loss_per_char": 0.3300918936729431, "incorrect_loss_per_char": 1.419559681624697, "correct_loss_per_token": 1.9805513620376587, "incorrect_loss_per_token": 8.18634517987569, "correct_loss_uncond": -16.37509799003601, "incorrect_loss_uncond": -3.5301342010498047}, "model_output": [{"sum_logits": -14.427474975585938, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -18.781972885131836, "logits_per_token": -7.213737487792969, "logits_per_char": -1.2022895812988281, "num_chars": 12}, {"sum_logits": -16.452281951904297, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -14.202067375183105, "logits_per_token": -16.452281951904297, "logits_per_char": -2.056535243988037, "num_chars": 8}, {"sum_logits": -10.38038158416748, "num_tokens": 3, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -17.15215492248535, "logits_per_token": -3.4601271947224936, "logits_per_char": -0.54633587285092, "num_chars": 19}, {"sum_logits": -16.857702255249023, "num_tokens": 3, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -22.102182388305664, "logits_per_token": -5.619234085083008, "logits_per_char": -1.8730780283610027, "num_chars": 9}, {"sum_logits": -3.9611027240753174, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -20.336200714111328, "logits_per_token": -1.9805513620376587, "logits_per_char": -0.3300918936729431, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 48, "native_id": "49cda7eedbf63b3f38e59ba72f1ee1f9", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.756802082061768, "incorrect_loss_raw": 7.331773906946182, "correct_loss_per_char": 1.126133680343628, "incorrect_loss_per_char": 0.7853949156900246, "correct_loss_per_token": 6.756802082061768, "incorrect_loss_per_token": 5.059643894433975, "correct_loss_uncond": -6.021728038787842, "incorrect_loss_uncond": -8.90904888510704}, "model_output": [{"sum_logits": -12.58473014831543, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.539966583251953, "logits_per_token": -6.292365074157715, "logits_per_char": -1.3983033498128254, "num_chars": 9}, {"sum_logits": -9.19572925567627, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -9.19572925567627, "logits_per_char": -1.1494661569595337, "num_chars": 8}, {"sum_logits": -1.9543262720108032, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": true, "sum_logits_uncond": -13.023554801940918, "logits_per_token": -1.9543262720108032, "logits_per_char": -0.2442907840013504, "num_chars": 8}, {"sum_logits": -5.592309951782227, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.82292366027832, "logits_per_token": -2.7961549758911133, "logits_per_char": -0.34951937198638916, "num_chars": 16}, {"sum_logits": -6.756802082061768, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.77853012084961, "logits_per_token": -6.756802082061768, "logits_per_char": -1.126133680343628, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 49, "native_id": "a588407ecaecf0f30c2241c30b470fe2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.45385456085205, "incorrect_loss_raw": 12.142898321151733, "correct_loss_per_char": 0.9544878800710043, "incorrect_loss_per_char": 1.1407314240932465, "correct_loss_per_token": 3.817951520284017, "incorrect_loss_per_token": 6.872400999069214, "correct_loss_uncond": -8.711417198181152, "incorrect_loss_uncond": -6.721726417541504}, "model_output": [{"sum_logits": -10.278115272521973, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.767902374267578, "logits_per_token": -5.139057636260986, "logits_per_char": -0.8565096060434977, "num_chars": 12}, {"sum_logits": -8.939123153686523, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.35763168334961, "logits_per_token": -4.469561576843262, "logits_per_char": -0.9932359059651693, "num_chars": 9}, {"sum_logits": -11.45385456085205, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.165271759033203, "logits_per_token": -3.817951520284017, "logits_per_char": -0.9544878800710043, "num_chars": 12}, {"sum_logits": -15.297826766967773, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -22.130897521972656, "logits_per_token": -3.8244566917419434, "logits_per_char": -0.9561141729354858, "num_chars": 16}, {"sum_logits": -14.056528091430664, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.202067375183105, "logits_per_token": -14.056528091430664, "logits_per_char": -1.757066011428833, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 50, "native_id": "011096bcfff30fd38046cf9db3a411c5", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.555263996124268, "incorrect_loss_raw": 11.817669153213501, "correct_loss_per_char": 0.5959330905567516, "incorrect_loss_per_char": 1.0495793362716577, "correct_loss_per_token": 3.277631998062134, "incorrect_loss_per_token": 6.3486513296763105, "correct_loss_uncond": -15.145839214324951, "incorrect_loss_uncond": -6.533514499664307}, "model_output": [{"sum_logits": -8.31960678100586, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -24.18323516845703, "logits_per_token": -4.15980339050293, "logits_per_char": -0.7563278891823508, "num_chars": 11}, {"sum_logits": -13.47653579711914, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.86181640625, "logits_per_token": -4.492178599039714, "logits_per_char": -0.8422834873199463, "num_chars": 16}, {"sum_logits": -6.555263996124268, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.70110321044922, "logits_per_token": -3.277631998062134, "logits_per_char": -0.5959330905567516, "num_chars": 11}, {"sum_logits": -8.010712623596191, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -8.010712623596191, "logits_per_char": -1.1443875176565987, "num_chars": 7}, {"sum_logits": -17.463821411132812, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.796693801879883, "logits_per_token": -8.731910705566406, "logits_per_char": -1.4553184509277344, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 51, "native_id": "435a728f45d32faa4b3c4553c966fd6b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.468445301055908, "incorrect_loss_raw": 9.196280598640442, "correct_loss_per_char": 0.3645630200703939, "incorrect_loss_per_char": 1.0446943882911923, "correct_loss_per_token": 1.8228151003519695, "incorrect_loss_per_token": 5.901381313800812, "correct_loss_uncond": -13.282506465911865, "incorrect_loss_uncond": -5.930248856544495}, "model_output": [{"sum_logits": -6.8671088218688965, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.83715057373047, "logits_per_token": -3.4335544109344482, "logits_per_char": -0.6867108821868897, "num_chars": 10}, {"sum_logits": -5.468445301055908, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.750951766967773, "logits_per_token": -1.8228151003519695, "logits_per_char": -0.3645630200703939, "num_chars": 15}, {"sum_logits": -10.425928115844727, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -11.685454368591309, "logits_per_token": -10.425928115844727, "logits_per_char": -1.737654685974121, "num_chars": 6}, {"sum_logits": -10.366657257080078, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.949359893798828, "logits_per_token": -5.183328628540039, "logits_per_char": -0.7404755183628627, "num_chars": 14}, {"sum_logits": -9.125428199768066, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.03415298461914, "logits_per_token": -4.562714099884033, "logits_per_char": -1.0139364666408963, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 52, "native_id": "e953dee48c70159ad879143a319ec607", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.4912748336792, "incorrect_loss_raw": 10.26456069946289, "correct_loss_per_char": 0.943474981519911, "incorrect_loss_per_char": 1.18644911306245, "correct_loss_per_token": 8.4912748336792, "incorrect_loss_per_token": 6.948420405387878, "correct_loss_uncond": -5.808994293212891, "incorrect_loss_uncond": -3.9945335388183594}, "model_output": [{"sum_logits": -8.706535339355469, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.39815616607666, "logits_per_token": -8.706535339355469, "logits_per_char": -1.7413070678710938, "num_chars": 5}, {"sum_logits": -13.168609619140625, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.001760482788086, "logits_per_token": -6.5843048095703125, "logits_per_char": -0.9406149727957589, "num_chars": 14}, {"sum_logits": -5.822585105895996, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.327664375305176, "logits_per_token": -5.822585105895996, "logits_per_char": -0.7278231382369995, "num_chars": 8}, {"sum_logits": -8.4912748336792, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.30026912689209, "logits_per_token": -8.4912748336792, "logits_per_char": -0.943474981519911, "num_chars": 9}, {"sum_logits": -13.360512733459473, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.308795928955078, "logits_per_token": -6.680256366729736, "logits_per_char": -1.3360512733459473, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 53, "native_id": "9c784727afd7176b54764055df7a7927", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 11.584501266479492, "incorrect_loss_raw": 17.13094139099121, "correct_loss_per_char": 1.2871668073866103, "incorrect_loss_per_char": 1.2298837093522759, "correct_loss_per_token": 5.792250633239746, "incorrect_loss_per_token": 7.381071150302887, "correct_loss_uncond": -8.305112838745117, "incorrect_loss_uncond": -3.688466787338257}, "model_output": [{"sum_logits": -12.43472671508789, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.959651947021484, "logits_per_token": -6.217363357543945, "logits_per_char": -1.2434726715087892, "num_chars": 10}, {"sum_logits": -12.02155876159668, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.01435375213623, "logits_per_token": -12.02155876159668, "logits_per_char": -1.502694845199585, "num_chars": 8}, {"sum_logits": -11.584501266479492, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.88961410522461, "logits_per_token": -5.792250633239746, "logits_per_char": -1.2871668073866103, "num_chars": 9}, {"sum_logits": -28.66234016418457, "num_tokens": 8, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -32.417572021484375, "logits_per_token": -3.5827925205230713, "logits_per_char": -0.9883565573856748, "num_chars": 29}, {"sum_logits": -15.405139923095703, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.88605499267578, "logits_per_token": -7.702569961547852, "logits_per_char": -1.1850107633150542, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 54, "native_id": "b47d912136e3304cb5e5890b6b879551", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.177501678466797, "incorrect_loss_raw": 13.066526174545288, "correct_loss_per_char": 0.5521155137282151, "incorrect_loss_per_char": 0.9960954860446936, "correct_loss_per_token": 2.392500559488932, "incorrect_loss_per_token": 7.072452068328857, "correct_loss_uncond": -11.620742797851562, "incorrect_loss_uncond": -4.229351997375488}, "model_output": [{"sum_logits": -15.729644775390625, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.857337951660156, "logits_per_token": -7.8648223876953125, "logits_per_char": -1.310803731282552, "num_chars": 12}, {"sum_logits": -9.09060001373291, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.239903450012207, "logits_per_token": -9.09060001373291, "logits_per_char": -0.8264181830666282, "num_chars": 11}, {"sum_logits": -13.114595413208008, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.972402572631836, "logits_per_token": -6.557297706604004, "logits_per_char": -1.0928829511006672, "num_chars": 12}, {"sum_logits": -7.177501678466797, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.79824447631836, "logits_per_token": -2.392500559488932, "logits_per_char": -0.5521155137282151, "num_chars": 13}, {"sum_logits": -14.33126449584961, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -20.113868713378906, "logits_per_token": -4.777088165283203, "logits_per_char": -0.7542770787289268, "num_chars": 19}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 55, "native_id": "49b4c9e1bd7946a819e173ce8fa4c7c9", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.8897391557693481, "incorrect_loss_raw": 11.694982528686523, "correct_loss_per_char": 0.08897391557693482, "incorrect_loss_per_char": 1.2000790848618461, "correct_loss_per_token": 0.8897391557693481, "incorrect_loss_per_token": 8.224810123443604, "correct_loss_uncond": -13.522674441337585, "incorrect_loss_uncond": -3.572991132736206}, "model_output": [{"sum_logits": -12.956533432006836, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.03429412841797, "logits_per_token": -6.478266716003418, "logits_per_char": -0.925466673714774, "num_chars": 14}, {"sum_logits": -14.804845809936523, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.353347778320312, "logits_per_token": -7.402422904968262, "logits_per_char": -1.0574889864240373, "num_chars": 14}, {"sum_logits": -8.457547187805176, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.993132591247559, "logits_per_token": -8.457547187805176, "logits_per_char": -1.057193398475647, "num_chars": 8}, {"sum_logits": -10.561003684997559, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.691120147705078, "logits_per_token": -10.561003684997559, "logits_per_char": -1.7601672808329265, "num_chars": 6}, {"sum_logits": -0.8897391557693481, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -14.412413597106934, "logits_per_token": -0.8897391557693481, "logits_per_char": -0.08897391557693482, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 56, "native_id": "950af0b765c298960ce3dada66df8db1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.388649940490723, "incorrect_loss_raw": 12.144481420516968, "correct_loss_per_char": 0.5323874950408936, "incorrect_loss_per_char": 1.385127005251971, "correct_loss_per_token": 6.388649940490723, "incorrect_loss_per_token": 5.556834856669108, "correct_loss_uncond": -8.768885612487793, "incorrect_loss_uncond": -4.130229234695435}, "model_output": [{"sum_logits": -10.847315788269043, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.837525367736816, "logits_per_token": -5.4236578941345215, "logits_per_char": -0.9861196171153676, "num_chars": 11}, {"sum_logits": -6.388649940490723, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -6.388649940490723, "logits_per_char": -0.5323874950408936, "num_chars": 12}, {"sum_logits": -11.659523963928223, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.49689483642578, "logits_per_token": -5.829761981964111, "logits_per_char": -1.2955026626586914, "num_chars": 9}, {"sum_logits": -13.701345443725586, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.95015811920166, "logits_per_token": -6.850672721862793, "logits_per_char": -1.7126681804656982, "num_chars": 8}, {"sum_logits": -12.36974048614502, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.814264297485352, "logits_per_token": -4.123246828715007, "logits_per_char": -1.5462175607681274, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 57, "native_id": "63cf1adb5fe302b9867ead8bc8103d0b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.819608688354492, "incorrect_loss_raw": 10.107918977737427, "correct_loss_per_char": 0.6546405792236328, "incorrect_loss_per_char": 1.0555145303408306, "correct_loss_per_token": 3.273202896118164, "incorrect_loss_per_token": 6.808380484580994, "correct_loss_uncond": -11.254568099975586, "incorrect_loss_uncond": -5.439924240112305}, "model_output": [{"sum_logits": -14.205819129943848, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.00970458984375, "logits_per_token": -7.102909564971924, "logits_per_char": -0.7102909564971924, "num_chars": 20}, {"sum_logits": -8.589709281921387, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -10.4298095703125, "logits_per_token": -8.589709281921387, "logits_per_char": -1.7179418563842774, "num_chars": 5}, {"sum_logits": -5.4456586837768555, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.635601997375488, "logits_per_token": -5.4456586837768555, "logits_per_char": -0.7779512405395508, "num_chars": 7}, {"sum_logits": -9.819608688354492, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.074176788330078, "logits_per_token": -3.273202896118164, "logits_per_char": -0.6546405792236328, "num_chars": 15}, {"sum_logits": -12.190488815307617, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.116256713867188, "logits_per_token": -6.095244407653809, "logits_per_char": -1.0158740679423015, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 58, "native_id": "ede4d302fc2ffe07703158f83c1493f2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.210210800170898, "incorrect_loss_raw": 11.566179037094116, "correct_loss_per_char": 1.2455789777967665, "incorrect_loss_per_char": 1.6154368784692552, "correct_loss_per_token": 11.210210800170898, "incorrect_loss_per_token": 11.566179037094116, "correct_loss_uncond": -3.2490968704223633, "incorrect_loss_uncond": -2.6186628341674805}, "model_output": [{"sum_logits": -13.252484321594238, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.400054931640625, "logits_per_token": -13.252484321594238, "logits_per_char": -2.208747386932373, "num_chars": 6}, {"sum_logits": -11.210210800170898, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.459307670593262, "logits_per_token": -11.210210800170898, "logits_per_char": -1.2455789777967665, "num_chars": 9}, {"sum_logits": -7.104297637939453, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.189236640930176, "logits_per_token": -7.104297637939453, "logits_per_char": -1.1840496063232422, "num_chars": 6}, {"sum_logits": -13.700963973999023, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.096293449401855, "logits_per_token": -13.700963973999023, "logits_per_char": -1.712620496749878, "num_chars": 8}, {"sum_logits": -12.20697021484375, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.05378246307373, "logits_per_token": -12.20697021484375, "logits_per_char": -1.3563300238715277, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 59, "native_id": "74ad13a03634e79c85382f1b90969b74", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 14.350509643554688, "incorrect_loss_raw": 15.89298129081726, "correct_loss_per_char": 1.4350509643554688, "incorrect_loss_per_char": 1.1666871317431458, "correct_loss_per_token": 7.175254821777344, "incorrect_loss_per_token": 6.836506883303324, "correct_loss_uncond": -9.504241943359375, "incorrect_loss_uncond": -4.756975412368774}, "model_output": [{"sum_logits": -14.350509643554688, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -23.854751586914062, "logits_per_token": -7.175254821777344, "logits_per_char": -1.4350509643554688, "num_chars": 10}, {"sum_logits": -11.220409393310547, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.494699478149414, "logits_per_token": -5.610204696655273, "logits_per_char": -1.1220409393310546, "num_chars": 10}, {"sum_logits": -26.639610290527344, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -29.039321899414062, "logits_per_token": -8.879870096842447, "logits_per_char": -1.4799783494737413, "num_chars": 18}, {"sum_logits": -13.554895401000977, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.01854705810547, "logits_per_token": -6.777447700500488, "logits_per_char": -1.1295746167500813, "num_chars": 12}, {"sum_logits": -12.157010078430176, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.047258377075195, "logits_per_token": -6.078505039215088, "logits_per_char": -0.9351546214177058, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 60, "native_id": "49e466b1782aa4837dae53ff891fcdee", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.880915641784668, "incorrect_loss_raw": 12.928721189498901, "correct_loss_per_char": 1.3201017379760742, "incorrect_loss_per_char": 1.0116559894649417, "correct_loss_per_token": 5.940457820892334, "incorrect_loss_per_token": 6.359538594881694, "correct_loss_uncond": -3.397709846496582, "incorrect_loss_uncond": -5.331818580627441}, "model_output": [{"sum_logits": -12.574409484863281, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.348003387451172, "logits_per_token": -4.191469828287761, "logits_per_char": -0.9672622680664062, "num_chars": 13}, {"sum_logits": -11.880915641784668, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -5.940457820892334, "logits_per_char": -1.3201017379760742, "num_chars": 9}, {"sum_logits": -14.455961227416992, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.732297897338867, "logits_per_token": -3.613990306854248, "logits_per_char": -1.0325686591012138, "num_chars": 14}, {"sum_logits": -10.5808744430542, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.953621864318848, "logits_per_token": -10.5808744430542, "logits_per_char": -0.9618976766412909, "num_chars": 11}, {"sum_logits": -14.103639602661133, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.008235931396484, "logits_per_token": -7.051819801330566, "logits_per_char": -1.0848953540508564, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 61, "native_id": "a8a8ae7792901c7179ff5538c701af1f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.121320724487305, "incorrect_loss_raw": 9.573075294494629, "correct_loss_per_char": 1.020220120747884, "incorrect_loss_per_char": 1.34139227441379, "correct_loss_per_token": 6.121320724487305, "incorrect_loss_per_token": 8.410770654678345, "correct_loss_uncond": -6.657209396362305, "incorrect_loss_uncond": -5.188302040100098}, "model_output": [{"sum_logits": -12.595281600952148, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.017801284790039, "logits_per_token": -12.595281600952148, "logits_per_char": -2.5190563201904297, "num_chars": 5}, {"sum_logits": -7.420889854431152, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -7.420889854431152, "logits_per_char": -1.060127122061593, "num_chars": 7}, {"sum_logits": -8.977692604064941, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.663012504577637, "logits_per_token": -8.977692604064941, "logits_per_char": -1.1222115755081177, "num_chars": 8}, {"sum_logits": -9.298437118530273, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.801706314086914, "logits_per_token": -4.649218559265137, "logits_per_char": -0.6641740798950195, "num_chars": 14}, {"sum_logits": -6.121320724487305, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.77853012084961, "logits_per_token": -6.121320724487305, "logits_per_char": -1.020220120747884, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 62, "native_id": "2ffa3808ce26181926990b454e429c85", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.1582727432250977, "incorrect_loss_raw": 10.446910381317139, "correct_loss_per_char": 0.3158272743225098, "incorrect_loss_per_char": 0.9382117089298037, "correct_loss_per_token": 1.5791363716125488, "incorrect_loss_per_token": 5.221166233221689, "correct_loss_uncond": -12.221293449401855, "incorrect_loss_uncond": -6.190150260925293}, "model_output": [{"sum_logits": -7.891663551330566, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.453655242919922, "logits_per_token": -7.891663551330566, "logits_per_char": -0.8768515057033963, "num_chars": 9}, {"sum_logits": -6.647019386291504, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.778640747070312, "logits_per_token": -2.2156731287638345, "logits_per_char": -0.415438711643219, "num_chars": 16}, {"sum_logits": -3.1582727432250977, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.379566192626953, "logits_per_token": -1.5791363716125488, "logits_per_char": -0.3158272743225098, "num_chars": 10}, {"sum_logits": -11.388604164123535, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.548044204711914, "logits_per_token": -2.847151041030884, "logits_per_char": -1.1388604164123535, "num_chars": 10}, {"sum_logits": -15.86035442352295, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.767902374267578, "logits_per_token": -7.930177211761475, "logits_per_char": -1.3216962019602458, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 63, "native_id": "4319eaa36d256a92b72445c0392f9c94", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 16.057714462280273, "incorrect_loss_raw": 11.277587652206421, "correct_loss_per_char": 2.676285743713379, "incorrect_loss_per_char": 1.254321422510677, "correct_loss_per_token": 8.028857231140137, "incorrect_loss_per_token": 6.160061875979105, "correct_loss_uncond": -3.7715721130371094, "incorrect_loss_uncond": -6.3140339851379395}, "model_output": [{"sum_logits": -8.109127044677734, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.596142768859863, "logits_per_token": -8.109127044677734, "logits_per_char": -1.6218254089355468, "num_chars": 5}, {"sum_logits": -11.816947937011719, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.83474349975586, "logits_per_token": -3.9389826456705728, "logits_per_char": -1.3129942152235243, "num_chars": 9}, {"sum_logits": -13.558635711669922, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.15987777709961, "logits_per_token": -6.779317855834961, "logits_per_char": -1.3558635711669922, "num_chars": 10}, {"sum_logits": -16.057714462280273, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.829286575317383, "logits_per_token": -8.028857231140137, "logits_per_char": -2.676285743713379, "num_chars": 6}, {"sum_logits": -11.625639915466309, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.77572250366211, "logits_per_token": -5.812819957733154, "logits_per_char": -0.7266024947166443, "num_chars": 16}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 64, "native_id": "ec79ef747bb89281923edb89ba26786d", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.68120002746582, "incorrect_loss_raw": 10.673856258392334, "correct_loss_per_char": 0.9710181843150746, "incorrect_loss_per_char": 1.1255277679080056, "correct_loss_per_token": 5.34060001373291, "incorrect_loss_per_token": 6.047301570574443, "correct_loss_uncond": -9.121992111206055, "incorrect_loss_uncond": -5.7792887687683105}, "model_output": [{"sum_logits": -10.68120002746582, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.803192138671875, "logits_per_token": -5.34060001373291, "logits_per_char": -0.9710181843150746, "num_chars": 11}, {"sum_logits": -8.261961936950684, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.565593719482422, "logits_per_token": -4.130980968475342, "logits_per_char": -0.9179957707722982, "num_chars": 9}, {"sum_logits": -13.719768524169922, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.47163200378418, "logits_per_token": -4.573256174723308, "logits_per_char": -1.3719768524169922, "num_chars": 10}, {"sum_logits": -10.256243705749512, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.060895919799805, "logits_per_token": -10.256243705749512, "logits_per_char": -1.4651776722499303, "num_chars": 7}, {"sum_logits": -10.457450866699219, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.714458465576172, "logits_per_token": -5.228725433349609, "logits_per_char": -0.7469607761928013, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 65, "native_id": "2d33cde5e3987adc8fa2bca0af4dd3dd", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.706323623657227, "incorrect_loss_raw": 12.48016905784607, "correct_loss_per_char": 0.705906867980957, "incorrect_loss_per_char": 1.0481854086314446, "correct_loss_per_token": 4.235441207885742, "incorrect_loss_per_token": 6.240084528923035, "correct_loss_uncond": -10.644365310668945, "incorrect_loss_uncond": -7.288588285446167}, "model_output": [{"sum_logits": -12.706323623657227, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -23.350688934326172, "logits_per_token": -4.235441207885742, "logits_per_char": -0.705906867980957, "num_chars": 18}, {"sum_logits": -15.718963623046875, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.208065032958984, "logits_per_token": -7.8594818115234375, "logits_per_char": -1.4289966930042615, "num_chars": 11}, {"sum_logits": -9.361709594726562, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.047704696655273, "logits_per_token": -4.680854797363281, "logits_per_char": -0.8510645086115057, "num_chars": 11}, {"sum_logits": -11.625138282775879, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.717823028564453, "logits_per_token": -5.8125691413879395, "logits_per_char": -0.9687615235646566, "num_chars": 12}, {"sum_logits": -13.214864730834961, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -22.101436614990234, "logits_per_token": -6.6074323654174805, "logits_per_char": -0.9439189093453544, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 66, "native_id": "cc46d936bf69d69a3863b0cb85d75c17", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.291689872741699, "incorrect_loss_raw": 10.974267959594727, "correct_loss_per_char": 0.7291689872741699, "incorrect_loss_per_char": 1.9964307454916148, "correct_loss_per_token": 7.291689872741699, "incorrect_loss_per_token": 10.974267959594727, "correct_loss_uncond": -5.256032943725586, "incorrect_loss_uncond": -2.645801544189453}, "model_output": [{"sum_logits": -10.958152770996094, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.923977851867676, "logits_per_token": -10.958152770996094, "logits_per_char": -2.7395381927490234, "num_chars": 4}, {"sum_logits": -10.829917907714844, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.76987361907959, "logits_per_token": -10.829917907714844, "logits_per_char": -2.707479476928711, "num_chars": 4}, {"sum_logits": -12.771142959594727, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.35517406463623, "logits_per_token": -12.771142959594727, "logits_per_char": -0.9823956122765174, "num_chars": 13}, {"sum_logits": -7.291689872741699, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.547722816467285, "logits_per_token": -7.291689872741699, "logits_per_char": -0.7291689872741699, "num_chars": 10}, {"sum_logits": -9.337858200073242, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.431252479553223, "logits_per_token": -9.337858200073242, "logits_per_char": -1.556309700012207, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 67, "native_id": "46bc1a50eeead10509a43a048e01194e", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.333691120147705, "incorrect_loss_raw": 7.38910049200058, "correct_loss_per_char": 0.9167113900184631, "incorrect_loss_per_char": 0.6195821704098542, "correct_loss_per_token": 2.444563706715902, "incorrect_loss_per_token": 4.175001919269562, "correct_loss_uncond": -8.033825397491455, "incorrect_loss_uncond": -10.758690893650055}, "model_output": [{"sum_logits": -7.333691120147705, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.36751651763916, "logits_per_token": -2.444563706715902, "logits_per_char": -0.9167113900184631, "num_chars": 8}, {"sum_logits": -10.43008041381836, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.580848693847656, "logits_per_token": -5.21504020690918, "logits_per_char": -0.8023138779860276, "num_chars": 13}, {"sum_logits": -3.843613386154175, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.90507698059082, "logits_per_token": -3.843613386154175, "logits_per_char": -0.6406022310256958, "num_chars": 6}, {"sum_logits": -7.537743091583252, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.516460418701172, "logits_per_token": -3.768871545791626, "logits_per_char": -0.5798263916602502, "num_chars": 13}, {"sum_logits": -7.744965076446533, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.58877944946289, "logits_per_token": -3.8724825382232666, "logits_per_char": -0.45558618096744313, "num_chars": 17}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 68, "native_id": "4336a8c55b7cb17275d1c60206cd2f18", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.707119464874268, "incorrect_loss_raw": 8.532995223999023, "correct_loss_per_char": 0.9511865774790446, "incorrect_loss_per_char": 1.1530131195530746, "correct_loss_per_token": 5.707119464874268, "incorrect_loss_per_token": 7.215579390525818, "correct_loss_uncond": -7.071410655975342, "incorrect_loss_uncond": -5.467540979385376}, "model_output": [{"sum_logits": -10.539326667785645, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.63452911376953, "logits_per_token": -5.269663333892822, "logits_per_char": -0.9581206061623313, "num_chars": 11}, {"sum_logits": -9.531872749328613, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.782123565673828, "logits_per_token": -9.531872749328613, "logits_per_char": -1.9063745498657227, "num_chars": 5}, {"sum_logits": -5.707119464874268, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.77853012084961, "logits_per_token": -5.707119464874268, "logits_per_char": -0.9511865774790446, "num_chars": 6}, {"sum_logits": -8.22546100616455, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.022502899169922, "logits_per_token": -8.22546100616455, "logits_per_char": -0.9139401117960612, "num_chars": 9}, {"sum_logits": -5.835320472717285, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -5.835320472717285, "logits_per_char": -0.8336172103881836, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 69, "native_id": "a287575d3ba4b9f958536fc14a1f5b5a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.644558906555176, "incorrect_loss_raw": 11.380064725875854, "correct_loss_per_char": 0.5206512723650251, "incorrect_loss_per_char": 1.142901002818888, "correct_loss_per_token": 3.644558906555176, "incorrect_loss_per_token": 5.33163321018219, "correct_loss_uncond": -10.597147941589355, "incorrect_loss_uncond": -4.760331869125366}, "model_output": [{"sum_logits": -13.605682373046875, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.306940078735352, "logits_per_token": -6.8028411865234375, "logits_per_char": -1.2368802157315342, "num_chars": 11}, {"sum_logits": -14.454504013061523, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.565717697143555, "logits_per_token": -7.227252006530762, "logits_per_char": -1.4454504013061524, "num_chars": 10}, {"sum_logits": -8.858492851257324, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.770219802856445, "logits_per_token": -4.429246425628662, "logits_per_char": -1.1073116064071655, "num_chars": 8}, {"sum_logits": -3.644558906555176, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.241706848144531, "logits_per_token": -3.644558906555176, "logits_per_char": -0.5206512723650251, "num_chars": 7}, {"sum_logits": -8.601579666137695, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.918708801269531, "logits_per_token": -2.8671932220458984, "logits_per_char": -0.7819617878306996, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 70, "native_id": "f481dc35b0a97a20dc5cdfe1a59746e2", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.2540671825408936, "incorrect_loss_raw": 11.738192081451416, "correct_loss_per_char": 0.20901119709014893, "incorrect_loss_per_char": 1.2425613028662545, "correct_loss_per_token": 1.2540671825408936, "incorrect_loss_per_token": 8.291751384735107, "correct_loss_uncond": -12.913954019546509, "incorrect_loss_uncond": -4.20671820640564}, "model_output": [{"sum_logits": -15.036344528198242, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.320587158203125, "logits_per_token": -7.518172264099121, "logits_per_char": -1.5036344528198242, "num_chars": 10}, {"sum_logits": -6.415928840637207, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.927323341369629, "logits_per_token": -6.415928840637207, "logits_per_char": -0.9165612629481724, "num_chars": 7}, {"sum_logits": -12.535181045532227, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.94315528869629, "logits_per_token": -6.267590522766113, "logits_per_char": -1.2535181045532227, "num_chars": 10}, {"sum_logits": -12.965313911437988, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.58857536315918, "logits_per_token": -12.965313911437988, "logits_per_char": -1.2965313911437988, "num_chars": 10}, {"sum_logits": -1.2540671825408936, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": true, "sum_logits_uncond": -14.168021202087402, "logits_per_token": -1.2540671825408936, "logits_per_char": -0.20901119709014893, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 71, "native_id": "c1c7a9efa379b8a7024a71cf364a144c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.240427255630493, "incorrect_loss_raw": 13.831030130386353, "correct_loss_per_char": 0.4629181793757847, "incorrect_loss_per_char": 1.4110123816162647, "correct_loss_per_token": 3.240427255630493, "incorrect_loss_per_token": 7.276180426279704, "correct_loss_uncond": -9.322561979293823, "incorrect_loss_uncond": -3.823892593383789}, "model_output": [{"sum_logits": -5.942285537719727, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -14.693686485290527, "logits_per_token": -5.942285537719727, "logits_per_char": -1.1884571075439454, "num_chars": 5}, {"sum_logits": -23.306331634521484, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -17.460208892822266, "logits_per_token": -11.653165817260742, "logits_per_char": -2.118757421320135, "num_chars": 11}, {"sum_logits": -9.17088794708252, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -19.82883071899414, "logits_per_token": -3.0569626490275064, "logits_per_char": -0.458544397354126, "num_chars": 20}, {"sum_logits": -16.90461540222168, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -18.636964797973633, "logits_per_token": -8.45230770111084, "logits_per_char": -1.8782906002468533, "num_chars": 9}, {"sum_logits": -3.240427255630493, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -3.240427255630493, "logits_per_char": -0.4629181793757847, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 72, "native_id": "821b32d39f57396979069b948030afe9", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.38124942779541, "incorrect_loss_raw": 12.078395247459412, "correct_loss_per_char": 0.3587499618530273, "incorrect_loss_per_char": 1.1440239584112502, "correct_loss_per_token": 1.7937498092651367, "incorrect_loss_per_token": 7.04783970117569, "correct_loss_uncond": -15.173954963684082, "incorrect_loss_uncond": -5.252280592918396}, "model_output": [{"sum_logits": -16.648380279541016, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.018857955932617, "logits_per_token": -8.324190139770508, "logits_per_char": -1.2806446368877704, "num_chars": 13}, {"sum_logits": -6.968477725982666, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.691415786743164, "logits_per_token": -3.484238862991333, "logits_per_char": -0.6334979750893333, "num_chars": 11}, {"sum_logits": -12.47068977355957, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.509748458862305, "logits_per_token": -4.156896591186523, "logits_per_char": -1.1336990703235974, "num_chars": 11}, {"sum_logits": -5.38124942779541, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.555204391479492, "logits_per_token": -1.7937498092651367, "logits_per_char": -0.3587499618530273, "num_chars": 15}, {"sum_logits": -12.226033210754395, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.102681159973145, "logits_per_token": -12.226033210754395, "logits_per_char": -1.5282541513442993, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 73, "native_id": "c68b4082a6872cf8198502651d0f3352", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.408810615539551, "incorrect_loss_raw": 10.597337126731873, "correct_loss_per_char": 0.5826191468672319, "incorrect_loss_per_char": 1.0736990256915018, "correct_loss_per_token": 3.2044053077697754, "incorrect_loss_per_token": 5.6895031332969666, "correct_loss_uncond": -11.012042045593262, "incorrect_loss_uncond": -5.9814523458480835}, "model_output": [{"sum_logits": -6.408810615539551, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.420852661132812, "logits_per_token": -3.2044053077697754, "logits_per_char": -0.5826191468672319, "num_chars": 11}, {"sum_logits": -11.116127967834473, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.976317405700684, "logits_per_token": -11.116127967834473, "logits_per_char": -1.5880182811192103, "num_chars": 7}, {"sum_logits": -5.206590175628662, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.1314640045166, "logits_per_token": -2.603295087814331, "logits_per_char": -0.5206590175628663, "num_chars": 10}, {"sum_logits": -10.087727546691895, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.022384643554688, "logits_per_token": -5.043863773345947, "logits_per_char": -1.1208586162990994, "num_chars": 9}, {"sum_logits": -15.978902816772461, "num_tokens": 4, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.18499183654785, "logits_per_token": -3.9947257041931152, "logits_per_char": -1.0652601877848307, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 74, "native_id": "dd11fea36d89aa09f9a6069545ba4c9c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.74246883392334, "incorrect_loss_raw": 12.675236582756042, "correct_loss_per_char": 0.7285390694936117, "incorrect_loss_per_char": 1.348421084880829, "correct_loss_per_token": 2.9141562779744468, "incorrect_loss_per_token": 6.6297965288162235, "correct_loss_uncond": -11.42736530303955, "incorrect_loss_uncond": -4.485661625862122}, "model_output": [{"sum_logits": -11.15522575378418, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -16.80799102783203, "logits_per_token": -5.57761287689209, "logits_per_char": -1.115522575378418, "num_chars": 10}, {"sum_logits": -23.255184173583984, "num_tokens": 5, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -24.868690490722656, "logits_per_token": -4.6510368347167965, "logits_per_char": -1.2239570617675781, "num_chars": 19}, {"sum_logits": -8.74246883392334, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.16983413696289, "logits_per_token": -2.9141562779744468, "logits_per_char": -0.7285390694936117, "num_chars": 12}, {"sum_logits": -10.17345905303955, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.029365539550781, "logits_per_token": -10.17345905303955, "logits_per_char": -2.03469181060791, "num_chars": 5}, {"sum_logits": -6.117077350616455, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.937545776367188, "logits_per_token": -6.117077350616455, "logits_per_char": -1.0195128917694092, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 75, "native_id": "7792b2c6518ecf9775efba6d41253312", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.944594144821167, "incorrect_loss_raw": 11.757470607757568, "correct_loss_per_char": 0.3585994677110152, "incorrect_loss_per_char": 0.9388121936441477, "correct_loss_per_token": 3.944594144821167, "incorrect_loss_per_token": 8.091844260692596, "correct_loss_uncond": -8.210633516311646, "incorrect_loss_uncond": -3.795659303665161}, "model_output": [{"sum_logits": -9.799906730651855, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -16.467552185058594, "logits_per_token": -4.899953365325928, "logits_per_char": -0.7538389792809119, "num_chars": 13}, {"sum_logits": -3.944594144821167, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.155227661132812, "logits_per_token": -3.944594144821167, "logits_per_char": -0.3585994677110152, "num_chars": 11}, {"sum_logits": -13.016736030578613, "num_tokens": 4, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -18.401098251342773, "logits_per_token": -3.2541840076446533, "logits_per_char": -0.6850913700304533, "num_chars": 19}, {"sum_logits": -12.662630081176758, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -12.662630081176758, "logits_per_char": -1.2662630081176758, "num_chars": 10}, {"sum_logits": -11.550609588623047, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.60936164855957, "logits_per_token": -11.550609588623047, "logits_per_char": -1.0500554171475498, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 76, "native_id": "1feb4c2a0e8ed638259f5d27b16eae9a", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.03787899017334, "incorrect_loss_raw": 8.560890555381775, "correct_loss_per_char": 0.41982324918111164, "incorrect_loss_per_char": 1.3463763922839969, "correct_loss_per_token": 5.03787899017334, "incorrect_loss_per_token": 6.866366982460022, "correct_loss_uncond": -10.119656562805176, "incorrect_loss_uncond": -5.990451693534851}, "model_output": [{"sum_logits": -9.044808387756348, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.533038139343262, "logits_per_token": -9.044808387756348, "logits_per_char": -1.8089616775512696, "num_chars": 5}, {"sum_logits": -6.355435371398926, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.126147270202637, "logits_per_token": -6.355435371398926, "logits_per_char": -1.5888588428497314, "num_chars": 4}, {"sum_logits": -5.03787899017334, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -5.03787899017334, "logits_per_char": -0.41982324918111164, "num_chars": 12}, {"sum_logits": -5.287129878997803, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.858729362487793, "logits_per_token": -5.287129878997803, "logits_per_char": -0.7553042684282575, "num_chars": 7}, {"sum_logits": -13.556188583374023, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.687454223632812, "logits_per_token": -6.778094291687012, "logits_per_char": -1.2323807803067295, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 77, "native_id": "2de08c7a518b7c226e19bdc8fc10ef1d", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.611016273498535, "incorrect_loss_raw": 11.120887160301208, "correct_loss_per_char": 0.6919105703180487, "incorrect_loss_per_char": 1.5035194019476572, "correct_loss_per_token": 7.611016273498535, "incorrect_loss_per_token": 8.22409212589264, "correct_loss_uncond": -6.357217788696289, "incorrect_loss_uncond": -5.828890919685364}, "model_output": [{"sum_logits": -23.174360275268555, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -25.33893585205078, "logits_per_token": -11.587180137634277, "logits_per_char": -2.5749289194742837, "num_chars": 9}, {"sum_logits": -9.689189910888672, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.041705131530762, "logits_per_token": -9.689189910888672, "logits_per_char": -1.9378379821777343, "num_chars": 5}, {"sum_logits": -6.395730018615723, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.26093578338623, "logits_per_token": -6.395730018615723, "logits_per_char": -1.0659550031026204, "num_chars": 6}, {"sum_logits": -5.224268436431885, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -5.224268436431885, "logits_per_char": -0.4353557030359904, "num_chars": 12}, {"sum_logits": -7.611016273498535, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.968234062194824, "logits_per_token": -7.611016273498535, "logits_per_char": -0.6919105703180487, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 78, "native_id": "ea8664e77205224154f8519f922220e1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.0536507368087769, "incorrect_loss_raw": 11.015504598617554, "correct_loss_per_char": 0.15052153382982528, "incorrect_loss_per_char": 1.4769226729869842, "correct_loss_per_token": 1.0536507368087769, "incorrect_loss_per_token": 9.728447675704956, "correct_loss_uncond": -8.403184056282043, "incorrect_loss_uncond": -1.9005193710327148}, "model_output": [{"sum_logits": -10.296455383300781, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.045604705810547, "logits_per_token": -5.148227691650391, "logits_per_char": -1.0296455383300782, "num_chars": 10}, {"sum_logits": -1.0536507368087769, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": true, "sum_logits_uncond": -9.45683479309082, "logits_per_token": -1.0536507368087769, "logits_per_char": -0.15052153382982528, "num_chars": 7}, {"sum_logits": -8.764663696289062, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -10.02719497680664, "logits_per_token": -8.764663696289062, "logits_per_char": -1.7529327392578125, "num_chars": 5}, {"sum_logits": -13.039430618286133, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.874704360961914, "logits_per_token": -13.039430618286133, "logits_per_char": -1.6299288272857666, "num_chars": 8}, {"sum_logits": -11.961468696594238, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.716591835021973, "logits_per_token": -11.961468696594238, "logits_per_char": -1.4951835870742798, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 79, "native_id": "a64d45cecde84fdcf5f0a79805a0c6fe", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.747303009033203, "incorrect_loss_raw": 9.460927486419678, "correct_loss_per_char": 1.7497003343370225, "incorrect_loss_per_char": 1.2431673467761337, "correct_loss_per_token": 7.873651504516602, "incorrect_loss_per_token": 7.095042983690897, "correct_loss_uncond": -4.789234161376953, "incorrect_loss_uncond": -6.428168535232544}, "model_output": [{"sum_logits": -5.696041107177734, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.796329498291016, "logits_per_token": -1.8986803690592449, "logits_per_char": -0.5178219188343395, "num_chars": 11}, {"sum_logits": -10.889708518981934, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.619281768798828, "logits_per_token": -10.889708518981934, "logits_per_char": -2.7224271297454834, "num_chars": 4}, {"sum_logits": -15.747303009033203, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.536537170410156, "logits_per_token": -7.873651504516602, "logits_per_char": -1.7497003343370225, "num_chars": 9}, {"sum_logits": -11.332354545593262, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.856544494628906, "logits_per_token": -5.666177272796631, "logits_per_char": -0.629575252532959, "num_chars": 18}, {"sum_logits": -9.925605773925781, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.284228324890137, "logits_per_token": -9.925605773925781, "logits_per_char": -1.1028450859917536, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 80, "native_id": "60e92cd2f35c345872d1a898e1718d55", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.8193373680114746, "incorrect_loss_raw": 12.723307132720947, "correct_loss_per_char": 0.16386747360229492, "incorrect_loss_per_char": 1.6437566629563918, "correct_loss_per_token": 0.8193373680114746, "incorrect_loss_per_token": 7.8233067989349365, "correct_loss_uncond": -11.731332302093506, "incorrect_loss_uncond": -2.381472110748291}, "model_output": [{"sum_logits": -16.48240089416504, "num_tokens": 2, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -8.24120044708252, "logits_per_char": -1.8313778771294489, "num_chars": 9}, {"sum_logits": -0.8193373680114746, "num_tokens": 1, "num_tokens_all": 167, "is_greedy": true, "sum_logits_uncond": -12.55066967010498, "logits_per_token": -0.8193373680114746, "logits_per_char": -0.16386747360229492, "num_chars": 5}, {"sum_logits": -13.08765697479248, "num_tokens": 2, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -17.456369400024414, "logits_per_token": -6.54382848739624, "logits_per_char": -1.1897869977084072, "num_chars": 11}, {"sum_logits": -9.629944801330566, "num_tokens": 2, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -15.442835807800293, "logits_per_token": -4.814972400665283, "logits_per_char": -1.604990800221761, "num_chars": 6}, {"sum_logits": -11.693225860595703, "num_tokens": 1, "num_tokens_all": 167, "is_greedy": false, "sum_logits_uncond": -12.241286277770996, "logits_per_token": -11.693225860595703, "logits_per_char": -1.9488709767659504, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 81, "native_id": "08f3c187908646997b9080c7e9ea7da4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.509580135345459, "incorrect_loss_raw": 8.134429216384888, "correct_loss_per_char": 0.25053222974141437, "incorrect_loss_per_char": 1.345105392403073, "correct_loss_per_token": 2.2547900676727295, "incorrect_loss_per_token": 5.441686928272247, "correct_loss_uncond": -13.970091342926025, "incorrect_loss_uncond": -6.415668487548828}, "model_output": [{"sum_logits": -15.642126083374023, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.034934997558594, "logits_per_token": -7.821063041687012, "logits_per_char": -2.6070210138956704, "num_chars": 6}, {"sum_logits": -6.393885135650635, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.27395248413086, "logits_per_token": -6.393885135650635, "logits_per_char": -1.2787770271301269, "num_chars": 5}, {"sum_logits": -5.8998122215271, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.998479843139648, "logits_per_token": -2.94990611076355, "logits_per_char": -0.9833020369211832, "num_chars": 6}, {"sum_logits": -4.509580135345459, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.479671478271484, "logits_per_token": -2.2547900676727295, "logits_per_char": -0.25053222974141437, "num_chars": 18}, {"sum_logits": -4.601893424987793, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.893023490905762, "logits_per_token": -4.601893424987793, "logits_per_char": -0.5113214916653104, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 82, "native_id": "9aff72f0c480c2b4edde45bd2e7e4870", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.288107395172119, "incorrect_loss_raw": 10.03520679473877, "correct_loss_per_char": 0.5240089495976766, "incorrect_loss_per_char": 0.8373702000348996, "correct_loss_per_token": 3.1440536975860596, "incorrect_loss_per_token": 4.423571268717448, "correct_loss_uncond": -16.535918712615967, "incorrect_loss_uncond": -10.992459297180176}, "model_output": [{"sum_logits": -8.206302642822266, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.475753784179688, "logits_per_token": -4.103151321411133, "logits_per_char": -0.6312540494478666, "num_chars": 13}, {"sum_logits": -8.142284393310547, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.424877166748047, "logits_per_token": -4.071142196655273, "logits_per_char": -0.8142284393310547, "num_chars": 10}, {"sum_logits": -9.535469055175781, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.68991470336914, "logits_per_token": -4.767734527587891, "logits_per_char": -0.9535469055175781, "num_chars": 10}, {"sum_logits": -14.256771087646484, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -25.520118713378906, "logits_per_token": -4.752257029215495, "logits_per_char": -0.950451405843099, "num_chars": 15}, {"sum_logits": -6.288107395172119, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -22.824026107788086, "logits_per_token": -3.1440536975860596, "logits_per_char": -0.5240089495976766, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 83, "native_id": "fd243c96edec5b1b8520d5bfeddc6622", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.897624969482422, "incorrect_loss_raw": 7.093680739402771, "correct_loss_per_char": 0.4452386335893111, "incorrect_loss_per_char": 1.2333633133343287, "correct_loss_per_token": 1.6325416564941406, "incorrect_loss_per_token": 7.093680739402771, "correct_loss_uncond": -12.336715698242188, "incorrect_loss_uncond": -6.278623938560486}, "model_output": [{"sum_logits": -4.897624969482422, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.23434066772461, "logits_per_token": -1.6325416564941406, "logits_per_char": -0.4452386335893111, "num_chars": 11}, {"sum_logits": -5.817902088165283, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.887771606445312, "logits_per_token": -5.817902088165283, "logits_per_char": -0.8311288697378976, "num_chars": 7}, {"sum_logits": -8.212599754333496, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.898821830749512, "logits_per_token": -8.212599754333496, "logits_per_char": -2.053149938583374, "num_chars": 4}, {"sum_logits": -9.588460922241211, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.520729064941406, "logits_per_token": -9.588460922241211, "logits_per_char": -1.3697801317487444, "num_chars": 7}, {"sum_logits": -4.755760192871094, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.181896209716797, "logits_per_token": -4.755760192871094, "logits_per_char": -0.6793943132672992, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 84, "native_id": "f5ec4fdfd0e37e733bfc1606b986f1e2", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.676380157470703, "incorrect_loss_raw": 13.094687461853027, "correct_loss_per_char": 1.1862644619411893, "incorrect_loss_per_char": 1.326224946975708, "correct_loss_per_token": 5.338190078735352, "incorrect_loss_per_token": 4.827879746754965, "correct_loss_uncond": -10.635244369506836, "incorrect_loss_uncond": -6.817784786224365}, "model_output": [{"sum_logits": -11.111614227294922, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.003028869628906, "logits_per_token": -5.555807113647461, "logits_per_char": -1.8519357045491536, "num_chars": 6}, {"sum_logits": -12.66409683227539, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.36650276184082, "logits_per_token": -4.221365610758464, "logits_per_char": -1.055341402689616, "num_chars": 12}, {"sum_logits": -12.409196853637695, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.82491111755371, "logits_per_token": -4.136398951212565, "logits_per_char": -1.2409196853637696, "num_chars": 10}, {"sum_logits": -16.1938419342041, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.455446243286133, "logits_per_token": -5.397947311401367, "logits_per_char": -1.156702995300293, "num_chars": 14}, {"sum_logits": -10.676380157470703, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.31162452697754, "logits_per_token": -5.338190078735352, "logits_per_char": -1.1862644619411893, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 85, "native_id": "e3c6d147f8a727d314046e70e9579ba0", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.22843074798584, "incorrect_loss_raw": 8.164260745048523, "correct_loss_per_char": 0.5190358956654867, "incorrect_loss_per_char": 0.6587910383939743, "correct_loss_per_token": 3.11421537399292, "incorrect_loss_per_token": 3.8365670442581177, "correct_loss_uncond": -11.874974250793457, "incorrect_loss_uncond": -8.486971974372864}, "model_output": [{"sum_logits": -6.22843074798584, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.103404998779297, "logits_per_token": -3.11421537399292, "logits_per_char": -0.5190358956654867, "num_chars": 12}, {"sum_logits": -4.011905193328857, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.022502899169922, "logits_per_token": -4.011905193328857, "logits_per_char": -0.44576724370320636, "num_chars": 9}, {"sum_logits": -10.715902328491211, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.2312068939209, "logits_per_token": -5.3579511642456055, "logits_per_char": -0.7143934885660808, "num_chars": 15}, {"sum_logits": -9.45136547088623, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.910375595092773, "logits_per_token": -3.1504551569620767, "logits_per_char": -0.945136547088623, "num_chars": 10}, {"sum_logits": -8.477869987487793, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.440845489501953, "logits_per_token": -2.825956662495931, "logits_per_char": -0.5298668742179871, "num_chars": 16}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 86, "native_id": "8ce13c6e08bf38d4cd4af756b661e47c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.676079750061035, "incorrect_loss_raw": 6.689115405082703, "correct_loss_per_char": 0.5195644166734483, "incorrect_loss_per_char": 0.7849683655159814, "correct_loss_per_token": 4.676079750061035, "incorrect_loss_per_token": 5.079612374305725, "correct_loss_uncond": -10.310400009155273, "incorrect_loss_uncond": -8.884947657585144}, "model_output": [{"sum_logits": -5.291012763977051, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.220001220703125, "logits_per_token": -5.291012763977051, "logits_per_char": -0.6613765954971313, "num_chars": 8}, {"sum_logits": -12.87602424621582, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.49764633178711, "logits_per_token": -6.43801212310791, "logits_per_char": -1.287602424621582, "num_chars": 10}, {"sum_logits": -2.025308132171631, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -2.025308132171631, "logits_per_char": -0.25316351652145386, "num_chars": 8}, {"sum_logits": -4.676079750061035, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.986479759216309, "logits_per_token": -4.676079750061035, "logits_per_char": -0.5195644166734483, "num_chars": 9}, {"sum_logits": -6.564116477966309, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.001758575439453, "logits_per_token": -6.564116477966309, "logits_per_char": -0.9377309254237584, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 87, "native_id": "0f4159e80f8dbf682819215bbf0f5b5a_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.363748550415039, "incorrect_loss_raw": 8.659350275993347, "correct_loss_per_char": 1.0454685688018799, "incorrect_loss_per_char": 0.8759203411111929, "correct_loss_per_token": 8.363748550415039, "incorrect_loss_per_token": 7.701614856719971, "correct_loss_uncond": -4.587651252746582, "incorrect_loss_uncond": -5.1618980169296265}, "model_output": [{"sum_logits": -5.548738956451416, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.428765296936035, "logits_per_token": -5.548738956451416, "logits_per_char": -0.5548738956451416, "num_chars": 10}, {"sum_logits": -8.363748550415039, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.951399803161621, "logits_per_token": -8.363748550415039, "logits_per_char": -1.0454685688018799, "num_chars": 8}, {"sum_logits": -7.661883354187012, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.716516494750977, "logits_per_token": -3.830941677093506, "logits_per_char": -0.7661883354187011, "num_chars": 10}, {"sum_logits": -11.619142532348633, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.98448371887207, "logits_per_token": -11.619142532348633, "logits_per_char": -1.291015836927626, "num_chars": 9}, {"sum_logits": -9.807636260986328, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.155227661132812, "logits_per_token": -9.807636260986328, "logits_per_char": -0.8916032964533026, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 88, "native_id": "1a8b3c2a46efabcbd506f9cf70886ed0", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.599355697631836, "incorrect_loss_raw": 20.847785711288452, "correct_loss_per_char": 0.5332975387573242, "incorrect_loss_per_char": 1.380653580815801, "correct_loss_per_token": 3.1997852325439453, "incorrect_loss_per_token": 7.725359360376994, "correct_loss_uncond": -12.114858627319336, "incorrect_loss_uncond": -2.06646466255188}, "model_output": [{"sum_logits": -11.822867393493652, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -23.635160446166992, "logits_per_token": -3.9409557978312173, "logits_per_char": -0.9094513379610502, "num_chars": 13}, {"sum_logits": -20.94137954711914, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.074176788330078, "logits_per_token": -6.980459849039714, "logits_per_char": -1.3960919698079428, "num_chars": 15}, {"sum_logits": -32.00055694580078, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -26.745718002319336, "logits_per_token": -10.666852315266928, "logits_per_char": -2.285754067557199, "num_chars": 14}, {"sum_logits": -18.626338958740234, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.201946258544922, "logits_per_token": -9.313169479370117, "logits_per_char": -0.9313169479370117, "num_chars": 20}, {"sum_logits": -9.599355697631836, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.714214324951172, "logits_per_token": -3.1997852325439453, "logits_per_char": -0.5332975387573242, "num_chars": 18}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 89, "native_id": "db0cfd52ca6b2bbfcf26d1a898fd929b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.76858377456665, "incorrect_loss_raw": 8.468966841697693, "correct_loss_per_char": 0.3973819812138875, "incorrect_loss_per_char": 1.0787659254339006, "correct_loss_per_token": 2.384291887283325, "incorrect_loss_per_token": 6.560056805610657, "correct_loss_uncond": -14.154107570648193, "incorrect_loss_uncond": -5.415078520774841}, "model_output": [{"sum_logits": -6.7906928062438965, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -12.027201652526855, "logits_per_token": -6.7906928062438965, "logits_per_char": -1.3581385612487793, "num_chars": 5}, {"sum_logits": -11.813894271850586, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -13.484567642211914, "logits_per_token": -11.813894271850586, "logits_per_char": -1.3126549190945096, "num_chars": 9}, {"sum_logits": -4.76858377456665, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -18.922691345214844, "logits_per_token": -2.384291887283325, "logits_per_char": -0.3973819812138875, "num_chars": 12}, {"sum_logits": -6.35135555267334, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -15.630819320678711, "logits_per_token": -3.17567777633667, "logits_per_char": -0.529279629389445, "num_chars": 12}, {"sum_logits": -8.91992473602295, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.393592834472656, "logits_per_token": -4.459962368011475, "logits_per_char": -1.1149905920028687, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 90, "native_id": "400fb2e196e71abb70e5b3f9aab4b9ee", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.939424514770508, "incorrect_loss_raw": 11.185070753097534, "correct_loss_per_char": 1.4924280643463135, "incorrect_loss_per_char": 1.2992578857547634, "correct_loss_per_token": 11.939424514770508, "incorrect_loss_per_token": 6.852256536483765, "correct_loss_uncond": -4.35169792175293, "incorrect_loss_uncond": -4.422242164611816}, "model_output": [{"sum_logits": -10.07776927947998, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.733248710632324, "logits_per_token": -10.07776927947998, "logits_per_char": -1.4396813256399972, "num_chars": 7}, {"sum_logits": -13.390028953552246, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -16.959651947021484, "logits_per_token": -6.695014476776123, "logits_per_char": -1.3390028953552247, "num_chars": 10}, {"sum_logits": -11.939424514770508, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.291122436523438, "logits_per_token": -11.939424514770508, "logits_per_char": -1.4924280643463135, "num_chars": 8}, {"sum_logits": -12.558744430541992, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -17.83720588684082, "logits_per_token": -6.279372215270996, "logits_per_char": -0.9660572638878455, "num_chars": 13}, {"sum_logits": -8.713740348815918, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.899145126342773, "logits_per_token": -4.356870174407959, "logits_per_char": -1.4522900581359863, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 91, "native_id": "3fb36127a61903029a363911a1d2b1e9_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.255267143249512, "incorrect_loss_raw": 7.130030035972595, "correct_loss_per_char": 0.7255267143249512, "incorrect_loss_per_char": 0.9821852178950059, "correct_loss_per_token": 3.627633571624756, "incorrect_loss_per_token": 6.0898072719573975, "correct_loss_uncond": -10.199959754943848, "incorrect_loss_uncond": -6.8110352754592896}, "model_output": [{"sum_logits": -6.3647589683532715, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -6.3647589683532715, "logits_per_char": -1.060793161392212, "num_chars": 6}, {"sum_logits": -8.321782112121582, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.474275588989258, "logits_per_token": -4.160891056060791, "logits_per_char": -0.43798853221692535, "num_chars": 19}, {"sum_logits": -7.255267143249512, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.45522689819336, "logits_per_token": -3.627633571624756, "logits_per_char": -0.7255267143249512, "num_chars": 10}, {"sum_logits": -4.49008846282959, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.709348678588867, "logits_per_token": -4.49008846282959, "logits_per_char": -0.5612610578536987, "num_chars": 8}, {"sum_logits": -9.343490600585938, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.509245872497559, "logits_per_token": -9.343490600585938, "logits_per_char": -1.8686981201171875, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 92, "native_id": "8494b0b95533dcedbd76ae2916c481d4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.440888404846191, "incorrect_loss_raw": 28.196635961532593, "correct_loss_per_char": 0.8700740337371826, "incorrect_loss_per_char": 1.7883076247515635, "correct_loss_per_token": 5.220444202423096, "incorrect_loss_per_token": 9.621900844573975, "correct_loss_uncond": -7.719162940979004, "incorrect_loss_uncond": 1.651008129119873}, "model_output": [{"sum_logits": -10.771857261657715, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.454096794128418, "logits_per_token": -10.771857261657715, "logits_per_char": -1.5388367516653878, "num_chars": 7}, {"sum_logits": -37.317874908447266, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -31.51237678527832, "logits_per_token": -9.329468727111816, "logits_per_char": -2.073215272691515, "num_chars": 18}, {"sum_logits": -18.156383514404297, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.859439849853516, "logits_per_token": -9.078191757202148, "logits_per_char": -0.9555991323370683, "num_chars": 19}, {"sum_logits": -10.440888404846191, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.160051345825195, "logits_per_token": -5.220444202423096, "logits_per_char": -0.8700740337371826, "num_chars": 12}, {"sum_logits": -46.540428161621094, "num_tokens": 5, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -43.356597900390625, "logits_per_token": -9.308085632324218, "logits_per_char": -2.585579342312283, "num_chars": 18}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 93, "native_id": "1531f1523f5fd24bbdb42c311dbf90e8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.595741271972656, "incorrect_loss_raw": 8.694559574127197, "correct_loss_per_char": 0.7328601413302951, "incorrect_loss_per_char": 1.0591989176256673, "correct_loss_per_token": 3.297870635986328, "incorrect_loss_per_token": 5.382757067680359, "correct_loss_uncond": -9.514923095703125, "incorrect_loss_uncond": -9.93880295753479}, "model_output": [{"sum_logits": -4.475566864013672, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.781972885131836, "logits_per_token": -2.237783432006836, "logits_per_char": -0.37296390533447266, "num_chars": 12}, {"sum_logits": -14.931825637817383, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.41530990600586, "logits_per_token": -7.465912818908691, "logits_per_char": -1.1486019721397986, "num_chars": 13}, {"sum_logits": -8.283818244934082, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.484856605529785, "logits_per_token": -8.283818244934082, "logits_per_char": -2.0709545612335205, "num_chars": 4}, {"sum_logits": -7.087027549743652, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.85131072998047, "logits_per_token": -3.543513774871826, "logits_per_char": -0.6442752317948774, "num_chars": 11}, {"sum_logits": -6.595741271972656, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.11066436767578, "logits_per_token": -3.297870635986328, "logits_per_char": -0.7328601413302951, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 94, "native_id": "716ce4404a84b42dd64e561390c4b53b", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.0696845054626465, "incorrect_loss_raw": 6.971703290939331, "correct_loss_per_char": 0.5087105631828308, "incorrect_loss_per_char": 0.6545107556240899, "correct_loss_per_token": 2.0348422527313232, "incorrect_loss_per_token": 3.0793475210666656, "correct_loss_uncond": -12.635908603668213, "incorrect_loss_uncond": -10.598338842391968}, "model_output": [{"sum_logits": -8.240708351135254, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.70156478881836, "logits_per_token": -4.120354175567627, "logits_per_char": -0.7491553046486594, "num_chars": 11}, {"sum_logits": -6.994336128234863, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.592458724975586, "logits_per_token": -3.4971680641174316, "logits_per_char": -0.6358487389304421, "num_chars": 11}, {"sum_logits": -6.147702693939209, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.253847122192383, "logits_per_token": -3.0738513469696045, "logits_per_char": -0.7684628367424011, "num_chars": 8}, {"sum_logits": -6.504065990447998, "num_tokens": 4, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.732297897338867, "logits_per_token": -1.6260164976119995, "logits_per_char": -0.464576142174857, "num_chars": 14}, {"sum_logits": -4.0696845054626465, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.70559310913086, "logits_per_token": -2.0348422527313232, "logits_per_char": -0.5087105631828308, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 95, "native_id": "5169f7ae0781b15161551de3a189ebef", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.064414024353027, "incorrect_loss_raw": 13.50267243385315, "correct_loss_per_char": 0.861743858882359, "incorrect_loss_per_char": 1.0973692499279406, "correct_loss_per_token": 12.064414024353027, "incorrect_loss_per_token": 9.615700682004292, "correct_loss_uncond": -0.4400978088378906, "incorrect_loss_uncond": -2.3302958011627197}, "model_output": [{"sum_logits": -17.358654022216797, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.211681365966797, "logits_per_token": -5.786218007405599, "logits_per_char": -0.9136133695903578, "num_chars": 19}, {"sum_logits": -15.824642181396484, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.588586807250977, "logits_per_token": -15.824642181396484, "logits_per_char": -1.5824642181396484, "num_chars": 10}, {"sum_logits": -7.950901985168457, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.528963088989258, "logits_per_token": -3.9754509925842285, "logits_per_char": -0.7228092713789507, "num_chars": 11}, {"sum_logits": -12.87649154663086, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.002641677856445, "logits_per_token": -12.87649154663086, "logits_per_char": -1.1705901406028054, "num_chars": 11}, {"sum_logits": -12.064414024353027, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.504511833190918, "logits_per_token": -12.064414024353027, "logits_per_char": -0.861743858882359, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 96, "native_id": "ef22ef7aeec70aaa688720f805c1cf38", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.164531707763672, "incorrect_loss_raw": 12.10765790939331, "correct_loss_per_char": 0.6546094076974052, "incorrect_loss_per_char": 1.4513843854268393, "correct_loss_per_token": 4.582265853881836, "incorrect_loss_per_token": 9.93593454360962, "correct_loss_uncond": -8.922130584716797, "incorrect_loss_uncond": -3.412130832672119}, "model_output": [{"sum_logits": -10.131463050842285, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -10.131463050842285, "logits_per_char": -1.0131463050842284, "num_chars": 10}, {"sum_logits": -9.164531707763672, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.08666229248047, "logits_per_token": -4.582265853881836, "logits_per_char": -0.6546094076974052, "num_chars": 14}, {"sum_logits": -7.785919189453125, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.98448371887207, "logits_per_token": -7.785919189453125, "logits_per_char": -0.8651021321614584, "num_chars": 9}, {"sum_logits": -13.1394624710083, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -13.1394624710083, "logits_per_char": -2.189910411834717, "num_chars": 6}, {"sum_logits": -17.37378692626953, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -23.288772583007812, "logits_per_token": -8.686893463134766, "logits_per_char": -1.7373786926269532, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 97, "native_id": "514310637fb43a252bfadc8cbf79b277", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.060657024383545, "incorrect_loss_raw": 8.82748806476593, "correct_loss_per_char": 0.18733245676214044, "incorrect_loss_per_char": 0.9752878703768291, "correct_loss_per_token": 2.060657024383545, "incorrect_loss_per_token": 4.800338983535767, "correct_loss_uncond": -11.455302715301514, "incorrect_loss_uncond": -8.375638365745544}, "model_output": [{"sum_logits": -7.924127578735352, "num_tokens": 1, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -15.883575439453125, "logits_per_token": -7.924127578735352, "logits_per_char": -0.8804586198594835, "num_chars": 9}, {"sum_logits": -1.588975429534912, "num_tokens": 1, "num_tokens_all": 158, "is_greedy": true, "sum_logits_uncond": -12.611625671386719, "logits_per_token": -1.588975429534912, "logits_per_char": -0.22699648993355886, "num_chars": 7}, {"sum_logits": -19.261030197143555, "num_tokens": 3, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -24.60169219970703, "logits_per_token": -6.420343399047852, "logits_per_char": -2.1401144663492837, "num_chars": 9}, {"sum_logits": -2.060657024383545, "num_tokens": 1, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -13.515959739685059, "logits_per_token": -2.060657024383545, "logits_per_char": -0.18733245676214044, "num_chars": 11}, {"sum_logits": -6.535819053649902, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -15.715612411499023, "logits_per_token": -3.267909526824951, "logits_per_char": -0.6535819053649903, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 98, "native_id": "9370b2b0897b796dec4a40f107854c8d", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.857830047607422, "incorrect_loss_raw": 12.454570293426514, "correct_loss_per_char": 0.6044484652005709, "incorrect_loss_per_char": 1.249205509821574, "correct_loss_per_token": 3.928915023803711, "incorrect_loss_per_token": 7.150797963142395, "correct_loss_uncond": -8.550468444824219, "incorrect_loss_uncond": -5.751230716705322}, "model_output": [{"sum_logits": -12.000102043151855, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.72738265991211, "logits_per_token": -6.000051021575928, "logits_per_char": -1.000008503595988, "num_chars": 12}, {"sum_logits": -7.857830047607422, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.40829849243164, "logits_per_token": -3.928915023803711, "logits_per_char": -0.6044484652005709, "num_chars": 13}, {"sum_logits": -11.213249206542969, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.431522369384766, "logits_per_token": -5.606624603271484, "logits_per_char": -1.0193862915039062, "num_chars": 11}, {"sum_logits": -19.216827392578125, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -23.678098678588867, "logits_per_token": -9.608413696289062, "logits_per_char": -1.130401611328125, "num_chars": 17}, {"sum_logits": -7.3881025314331055, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.986200332641602, "logits_per_token": -7.3881025314331055, "logits_per_char": -1.8470256328582764, "num_chars": 4}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 99, "native_id": "49902e768c45aa41a0f9f95be81114e5", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.3959325551986694, "incorrect_loss_raw": 15.766504526138306, "correct_loss_per_char": 0.2791865110397339, "incorrect_loss_per_char": 1.2734537560240669, "correct_loss_per_token": 1.3959325551986694, "incorrect_loss_per_token": 4.872741413116455, "correct_loss_uncond": -10.858795762062073, "incorrect_loss_uncond": -6.398361444473267}, "model_output": [{"sum_logits": -22.258655548095703, "num_tokens": 4, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -27.72464370727539, "logits_per_token": -5.564663887023926, "logits_per_char": -1.171508186741879, "num_chars": 19}, {"sum_logits": -22.142423629760742, "num_tokens": 5, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -28.827789306640625, "logits_per_token": -4.428484725952148, "logits_per_char": -1.8452019691467285, "num_chars": 12}, {"sum_logits": -4.9142560958862305, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.019702911376953, "logits_per_token": -4.9142560958862305, "logits_per_char": -0.7020365851266044, "num_chars": 7}, {"sum_logits": -13.750682830810547, "num_tokens": 3, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.08732795715332, "logits_per_token": -4.583560943603516, "logits_per_char": -1.3750682830810548, "num_chars": 10}, {"sum_logits": -1.3959325551986694, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": true, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -1.3959325551986694, "logits_per_char": -0.2791865110397339, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 100, "native_id": "e1f90cd664a6b150291e6d8444d85c54", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.98997688293457, "incorrect_loss_raw": 9.418415784835815, "correct_loss_per_char": 0.4158314069112142, "incorrect_loss_per_char": 1.1665409684181214, "correct_loss_per_token": 2.494988441467285, "incorrect_loss_per_token": 6.956748723983765, "correct_loss_uncond": -12.477386474609375, "incorrect_loss_uncond": -6.114270925521851}, "model_output": [{"sum_logits": -10.738458633422852, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.716591835021973, "logits_per_token": -10.738458633422852, "logits_per_char": -1.3423073291778564, "num_chars": 8}, {"sum_logits": -4.98997688293457, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.467363357543945, "logits_per_token": -2.494988441467285, "logits_per_char": -0.4158314069112142, "num_chars": 12}, {"sum_logits": -10.323577880859375, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.631671905517578, "logits_per_token": -5.1617889404296875, "logits_per_char": -0.938507080078125, "num_chars": 11}, {"sum_logits": -9.369758605957031, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.579090118408203, "logits_per_token": -4.684879302978516, "logits_per_char": -0.9369758605957031, "num_chars": 10}, {"sum_logits": -7.241868019104004, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.20339298248291, "logits_per_token": -7.241868019104004, "logits_per_char": -1.4483736038208008, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 101, "native_id": "320ec9b68fdefe13d59cc8b628083790", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.4025068283081055, "incorrect_loss_raw": 17.013197898864746, "correct_loss_per_char": 0.6289295469011579, "incorrect_loss_per_char": 1.6453504615359837, "correct_loss_per_token": 4.4025068283081055, "incorrect_loss_per_token": 6.889739894866944, "correct_loss_uncond": -10.132575035095215, "incorrect_loss_uncond": -3.237412691116333}, "model_output": [{"sum_logits": -13.932348251342773, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -6.966174125671387, "logits_per_char": -1.5480386945936415, "num_chars": 9}, {"sum_logits": -16.2470760345459, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.980520248413086, "logits_per_token": -8.12353801727295, "logits_per_char": -1.6247076034545898, "num_chars": 10}, {"sum_logits": -16.31524658203125, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.463717460632324, "logits_per_token": -8.157623291015625, "logits_per_char": -2.33074951171875, "num_chars": 7}, {"sum_logits": -4.4025068283081055, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.53508186340332, "logits_per_token": -4.4025068283081055, "logits_per_char": -0.6289295469011579, "num_chars": 7}, {"sum_logits": -21.558120727539062, "num_tokens": 5, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -32.279579162597656, "logits_per_token": -4.311624145507812, "logits_per_char": -1.077906036376953, "num_chars": 20}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 102, "native_id": "964185aed0e381853332bca1a4d91f46", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.283713340759277, "incorrect_loss_raw": 13.947680711746216, "correct_loss_per_char": 0.5283713340759277, "incorrect_loss_per_char": 1.242624058609917, "correct_loss_per_token": 2.6418566703796387, "incorrect_loss_per_token": 7.481467247009277, "correct_loss_uncond": -14.960494041442871, "incorrect_loss_uncond": -3.3662753105163574}, "model_output": [{"sum_logits": -13.965314865112305, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.5414981842041, "logits_per_token": -6.982657432556152, "logits_per_char": -1.745664358139038, "num_chars": 8}, {"sum_logits": -15.595833778381348, "num_tokens": 3, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -19.060514450073242, "logits_per_token": -5.198611259460449, "logits_per_char": -0.8664352099100748, "num_chars": 18}, {"sum_logits": -9.259626388549805, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.641528129577637, "logits_per_token": -9.259626388549805, "logits_per_char": -0.6614018848964146, "num_chars": 14}, {"sum_logits": -5.283713340759277, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.24420738220215, "logits_per_token": -2.6418566703796387, "logits_per_char": -0.5283713340759277, "num_chars": 10}, {"sum_logits": -16.969947814941406, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.012283325195312, "logits_per_token": -8.484973907470703, "logits_per_char": -1.6969947814941406, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 103, "native_id": "db8e010754c532d78635e5b7cf81a147", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.1215033531188965, "incorrect_loss_raw": 9.150454878807068, "correct_loss_per_char": 0.5121503353118897, "incorrect_loss_per_char": 0.83584360359035, "correct_loss_per_token": 1.7071677843729656, "incorrect_loss_per_token": 4.575227439403534, "correct_loss_uncond": -10.379879474639893, "incorrect_loss_uncond": -8.749370694160461}, "model_output": [{"sum_logits": -6.957971096038818, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -17.332115173339844, "logits_per_token": -3.478985548019409, "logits_per_char": -0.5798309246699015, "num_chars": 12}, {"sum_logits": -9.1630277633667, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -20.816768646240234, "logits_per_token": -4.58151388168335, "logits_per_char": -0.6545019830976214, "num_chars": 14}, {"sum_logits": -7.249695777893066, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -13.766878128051758, "logits_per_token": -3.624847888946533, "logits_per_char": -0.9062119722366333, "num_chars": 8}, {"sum_logits": -13.231124877929688, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -19.68354034423828, "logits_per_token": -6.615562438964844, "logits_per_char": -1.2028295343572444, "num_chars": 11}, {"sum_logits": -5.1215033531188965, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -15.501382827758789, "logits_per_token": -1.7071677843729656, "logits_per_char": -0.5121503353118897, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 104, "native_id": "998381f854f51da2a6ccde45909e5168", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.530702590942383, "incorrect_loss_raw": 9.970172882080078, "correct_loss_per_char": 0.8100540454571064, "incorrect_loss_per_char": 1.0721655340122997, "correct_loss_per_token": 5.265351295471191, "incorrect_loss_per_token": 7.418041229248047, "correct_loss_uncond": -8.096027374267578, "incorrect_loss_uncond": -6.169957160949707}, "model_output": [{"sum_logits": -15.312789916992188, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.84908676147461, "logits_per_token": -5.1042633056640625, "logits_per_char": -0.8059363114206415, "num_chars": 19}, {"sum_logits": -7.746883392333984, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.925796508789062, "logits_per_token": -7.746883392333984, "logits_per_char": -0.645573616027832, "num_chars": 12}, {"sum_logits": -9.223400115966797, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.564719200134277, "logits_per_token": -9.223400115966797, "logits_per_char": -1.3176285879952567, "num_chars": 7}, {"sum_logits": -10.530702590942383, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.62672996520996, "logits_per_token": -5.265351295471191, "logits_per_char": -0.8100540454571064, "num_chars": 13}, {"sum_logits": -7.597618103027344, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.220917701721191, "logits_per_token": -7.597618103027344, "logits_per_char": -1.5195236206054688, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 105, "native_id": "bc38ad28e99cff7a65771233f734a007", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.816014289855957, "incorrect_loss_raw": 9.263147115707397, "correct_loss_per_char": 1.1632028579711915, "incorrect_loss_per_char": 0.9934687217076619, "correct_loss_per_token": 5.816014289855957, "incorrect_loss_per_token": 5.351189732551575, "correct_loss_uncond": -9.399543762207031, "incorrect_loss_uncond": -7.645395278930664}, "model_output": [{"sum_logits": -5.756929397583008, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.20167064666748, "logits_per_token": -5.756929397583008, "logits_per_char": -0.719616174697876, "num_chars": 8}, {"sum_logits": -8.029322624206543, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.572669982910156, "logits_per_token": -4.0146613121032715, "logits_per_char": -0.6691102186838785, "num_chars": 12}, {"sum_logits": -10.331398010253906, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.188396453857422, "logits_per_token": -5.165699005126953, "logits_per_char": -1.147933112250434, "num_chars": 9}, {"sum_logits": -5.816014289855957, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.215558052062988, "logits_per_token": -5.816014289855957, "logits_per_char": -1.1632028579711915, "num_chars": 5}, {"sum_logits": -12.934938430786133, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.671432495117188, "logits_per_token": -6.467469215393066, "logits_per_char": -1.4372153811984592, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 106, "native_id": "e3949997bf9d02048cfa5d8dd0f287aa", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.058172225952148, "incorrect_loss_raw": 13.30651342868805, "correct_loss_per_char": 0.5372114817301432, "incorrect_loss_per_char": 1.8992844558897475, "correct_loss_per_token": 4.029086112976074, "incorrect_loss_per_token": 7.548173189163208, "correct_loss_uncond": -10.930830001831055, "incorrect_loss_uncond": -4.422289252281189}, "model_output": [{"sum_logits": -15.206805229187012, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.205810546875, "logits_per_token": -7.603402614593506, "logits_per_char": -2.5344675381978354, "num_chars": 6}, {"sum_logits": -18.14763069152832, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.625734329223633, "logits_per_token": -9.07381534576416, "logits_per_char": -1.814763069152832, "num_chars": 10}, {"sum_logits": -7.159331798553467, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -7.159331798553467, "logits_per_char": -1.4318663597106933, "num_chars": 5}, {"sum_logits": -12.712285995483398, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.828937530517578, "logits_per_token": -6.356142997741699, "logits_per_char": -1.8160408564976283, "num_chars": 7}, {"sum_logits": -8.058172225952148, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.989002227783203, "logits_per_token": -4.029086112976074, "logits_per_char": -0.5372114817301432, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 107, "native_id": "a7d51b753c2113d8b2dbd0ebb5375855", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.4104766845703125, "incorrect_loss_raw": 14.48637342453003, "correct_loss_per_char": 0.45087305704752606, "incorrect_loss_per_char": 1.1344796419143677, "correct_loss_per_token": 2.7052383422851562, "incorrect_loss_per_token": 4.969425082206726, "correct_loss_uncond": -11.885868072509766, "incorrect_loss_uncond": -4.4597344398498535}, "model_output": [{"sum_logits": -5.4104766845703125, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.296344757080078, "logits_per_token": -2.7052383422851562, "logits_per_char": -0.45087305704752606, "num_chars": 12}, {"sum_logits": -11.54166030883789, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.00812530517578, "logits_per_token": -5.770830154418945, "logits_per_char": -0.7694440205891927, "num_chars": 15}, {"sum_logits": -12.276073455810547, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.558395385742188, "logits_per_token": -4.092024485270183, "logits_per_char": -1.0230061213175456, "num_chars": 12}, {"sum_logits": -17.79486846923828, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -23.26673126220703, "logits_per_token": -5.931622823079427, "logits_per_char": -1.1121792793273926, "num_chars": 16}, {"sum_logits": -16.3328914642334, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.95117950439453, "logits_per_token": -4.08322286605835, "logits_per_char": -1.6332891464233399, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 108, "native_id": "3e4b326aff96e9adbb52ba18cfa877b2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.673293113708496, "incorrect_loss_raw": 9.24084222316742, "correct_loss_per_char": 0.5192547904120551, "incorrect_loss_per_char": 1.0007493448979927, "correct_loss_per_token": 4.673293113708496, "incorrect_loss_per_token": 5.712951838970184, "correct_loss_uncond": -8.11298942565918, "incorrect_loss_uncond": -7.5150498151779175}, "model_output": [{"sum_logits": -4.673293113708496, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -12.786282539367676, "logits_per_token": -4.673293113708496, "logits_per_char": -0.5192547904120551, "num_chars": 9}, {"sum_logits": -8.740245819091797, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -14.235330581665039, "logits_per_token": -8.740245819091797, "logits_per_char": -1.4567076365152996, "num_chars": 6}, {"sum_logits": -5.4548115730285645, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -18.96527862548828, "logits_per_token": -2.7274057865142822, "logits_per_char": -0.5454811573028564, "num_chars": 10}, {"sum_logits": -9.113004684448242, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -15.665518760681152, "logits_per_token": -4.556502342224121, "logits_per_char": -0.7594170570373535, "num_chars": 12}, {"sum_logits": -13.655306816101074, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -18.157440185546875, "logits_per_token": -6.827653408050537, "logits_per_char": -1.2413915287364612, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 109, "native_id": "5ac83e9e6fa9851ad3cccb0d57c1d88f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.546309471130371, "incorrect_loss_raw": 9.133650541305542, "correct_loss_per_char": 0.6932886838912964, "incorrect_loss_per_char": 0.860083721787356, "correct_loss_per_token": 5.546309471130371, "incorrect_loss_per_token": 6.464856743812561, "correct_loss_uncond": -7.705926895141602, "incorrect_loss_uncond": -6.221534252166748}, "model_output": [{"sum_logits": -12.128324508666992, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.48845100402832, "logits_per_token": -6.064162254333496, "logits_per_char": -0.8085549672444662, "num_chars": 15}, {"sum_logits": -9.222025871276855, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.331403732299805, "logits_per_token": -4.611012935638428, "logits_per_char": -0.709386605482835, "num_chars": 13}, {"sum_logits": -7.773743629455566, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.02486801147461, "logits_per_token": -7.773743629455566, "logits_per_char": -0.8637492921617296, "num_chars": 9}, {"sum_logits": -7.410508155822754, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.576016426086426, "logits_per_token": -7.410508155822754, "logits_per_char": -1.0586440222603934, "num_chars": 7}, {"sum_logits": -5.546309471130371, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.252236366271973, "logits_per_token": -5.546309471130371, "logits_per_char": -0.6932886838912964, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 110, "native_id": "2c0030cc14a27be2401dcfdaa501f0fc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.2855756282806396, "incorrect_loss_raw": 13.665276765823364, "correct_loss_per_char": 0.10713130235671997, "incorrect_loss_per_char": 1.1717291854895078, "correct_loss_per_token": 0.6427878141403198, "incorrect_loss_per_token": 7.166944265365601, "correct_loss_uncond": -14.99960732460022, "incorrect_loss_uncond": -3.016094446182251}, "model_output": [{"sum_logits": -9.827533721923828, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.88292121887207, "logits_per_token": -4.913766860961914, "logits_per_char": -0.6551689147949219, "num_chars": 15}, {"sum_logits": -12.12663745880127, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.390989303588867, "logits_per_token": -12.12663745880127, "logits_per_char": -1.5158296823501587, "num_chars": 8}, {"sum_logits": -1.2855756282806396, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": true, "sum_logits_uncond": -16.28518295288086, "logits_per_token": -0.6427878141403198, "logits_per_char": -0.10713130235671997, "num_chars": 12}, {"sum_logits": -18.904380798339844, "num_tokens": 4, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.47638511657715, "logits_per_token": -4.726095199584961, "logits_per_char": -1.4541831383338342, "num_chars": 13}, {"sum_logits": -13.802555084228516, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.975189208984375, "logits_per_token": -6.901277542114258, "logits_per_char": -1.0617350064791167, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 111, "native_id": "feb83263e6be392351db0794004efc3f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.997834205627441, "incorrect_loss_raw": 10.176701545715332, "correct_loss_per_char": 0.4735702213488127, "incorrect_loss_per_char": 1.2416165828704835, "correct_loss_per_token": 4.498917102813721, "incorrect_loss_per_token": 6.920006990432739, "correct_loss_uncond": -11.23160457611084, "incorrect_loss_uncond": -6.004380464553833}, "model_output": [{"sum_logits": -11.390785217285156, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.488544464111328, "logits_per_token": -5.695392608642578, "logits_per_char": -0.5695392608642578, "num_chars": 20}, {"sum_logits": -14.662771224975586, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.22649574279785, "logits_per_token": -7.331385612487793, "logits_per_char": -1.4662771224975586, "num_chars": 10}, {"sum_logits": -8.997834205627441, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.22943878173828, "logits_per_token": -4.498917102813721, "logits_per_char": -0.4735702213488127, "num_chars": 19}, {"sum_logits": -8.016523361206055, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.621137619018555, "logits_per_token": -8.016523361206055, "logits_per_char": -1.603304672241211, "num_chars": 5}, {"sum_logits": -6.636726379394531, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.388150215148926, "logits_per_token": -6.636726379394531, "logits_per_char": -1.3273452758789062, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 112, "native_id": "80697d599280d994d8a584c95824ef1f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.790785789489746, "incorrect_loss_raw": 8.451147079467773, "correct_loss_per_char": 0.6434206432766385, "incorrect_loss_per_char": 0.9123243663046096, "correct_loss_per_token": 2.895392894744873, "incorrect_loss_per_token": 4.587368726730347, "correct_loss_uncond": -9.758831024169922, "incorrect_loss_uncond": -7.673663377761841}, "model_output": [{"sum_logits": -8.898449897766113, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.30141258239746, "logits_per_token": -4.449224948883057, "logits_per_char": -0.8898449897766113, "num_chars": 10}, {"sum_logits": -15.938789367675781, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -7.969394683837891, "logits_per_char": -1.77097659640842, "num_chars": 9}, {"sum_logits": -6.0729875564575195, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.658267974853516, "logits_per_token": -3.0364937782287598, "logits_per_char": -0.50608229637146, "num_chars": 12}, {"sum_logits": -2.8943614959716797, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.26093578338623, "logits_per_token": -2.8943614959716797, "logits_per_char": -0.4823935826619466, "num_chars": 6}, {"sum_logits": -5.790785789489746, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.549616813659668, "logits_per_token": -2.895392894744873, "logits_per_char": -0.6434206432766385, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 113, "native_id": "3c1800e7dd96d37fdd3c51b9fe502342", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.102760314941406, "incorrect_loss_raw": 8.298079252243042, "correct_loss_per_char": 0.7628450393676758, "incorrect_loss_per_char": 1.1239067571503776, "correct_loss_per_token": 6.102760314941406, "incorrect_loss_per_token": 7.013512015342712, "correct_loss_uncond": -7.595952033996582, "incorrect_loss_uncond": -6.8593909740448}, "model_output": [{"sum_logits": -8.820608139038086, "num_tokens": 1, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -14.729721069335938, "logits_per_token": -8.820608139038086, "logits_per_char": -1.2600868770054408, "num_chars": 7}, {"sum_logits": -9.557333946228027, "num_tokens": 1, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -14.717216491699219, "logits_per_token": -9.557333946228027, "logits_per_char": -1.1946667432785034, "num_chars": 8}, {"sum_logits": -4.537837028503418, "num_tokens": 1, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -14.463476181030273, "logits_per_token": -4.537837028503418, "logits_per_char": -0.7563061714172363, "num_chars": 6}, {"sum_logits": -6.102760314941406, "num_tokens": 1, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -13.698712348937988, "logits_per_token": -6.102760314941406, "logits_per_char": -0.7628450393676758, "num_chars": 8}, {"sum_logits": -10.276537895202637, "num_tokens": 2, "num_tokens_all": 162, "is_greedy": false, "sum_logits_uncond": -16.719467163085938, "logits_per_token": -5.138268947601318, "logits_per_char": -1.2845672369003296, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 114, "native_id": "4da33e6f4b789776acb1bc10195baa83", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.079885482788086, "incorrect_loss_raw": 8.25184452533722, "correct_loss_per_char": 0.846647580464681, "incorrect_loss_per_char": 1.318135901434081, "correct_loss_per_token": 5.079885482788086, "incorrect_loss_per_token": 7.208887457847595, "correct_loss_uncond": -7.698644638061523, "incorrect_loss_uncond": -6.455212473869324}, "model_output": [{"sum_logits": -6.849323272705078, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -11.587641716003418, "logits_per_token": -6.849323272705078, "logits_per_char": -1.7123308181762695, "num_chars": 4}, {"sum_logits": -5.079885482788086, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.77853012084961, "logits_per_token": -5.079885482788086, "logits_per_char": -0.846647580464681, "num_chars": 6}, {"sum_logits": -7.981897830963135, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.378170013427734, "logits_per_token": -7.981897830963135, "logits_per_char": -0.9977372288703918, "num_chars": 8}, {"sum_logits": -9.832500457763672, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -9.832500457763672, "logits_per_char": -1.9665000915527344, "num_chars": 5}, {"sum_logits": -8.343656539916992, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.03429412841797, "logits_per_token": -4.171828269958496, "logits_per_char": -0.5959754671369281, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 115, "native_id": "ae038e9af9d5a511ada7456b5e73b15e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.92926549911499, "incorrect_loss_raw": 12.918716549873352, "correct_loss_per_char": 0.5929265499114991, "incorrect_loss_per_char": 1.628580927848816, "correct_loss_per_token": 5.92926549911499, "incorrect_loss_per_token": 7.494051516056061, "correct_loss_uncond": -10.285658359527588, "incorrect_loss_uncond": -4.744087815284729}, "model_output": [{"sum_logits": -21.987510681152344, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -24.440719604492188, "logits_per_token": -10.993755340576172, "logits_per_char": -2.4430567423502603, "num_chars": 9}, {"sum_logits": -5.92926549911499, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.214923858642578, "logits_per_token": -5.92926549911499, "logits_per_char": -0.5929265499114991, "num_chars": 10}, {"sum_logits": -13.571540832519531, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.781972885131836, "logits_per_token": -6.785770416259766, "logits_per_char": -1.1309617360432942, "num_chars": 12}, {"sum_logits": -8.277545928955078, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.31786060333252, "logits_per_token": -8.277545928955078, "logits_per_char": -2.0693864822387695, "num_chars": 4}, {"sum_logits": -7.838268756866455, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.11066436767578, "logits_per_token": -3.9191343784332275, "logits_per_char": -0.8709187507629395, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 116, "native_id": "a400b9fd1e319f901471c4b42d401c52", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.131471157073975, "incorrect_loss_raw": 12.646173477172852, "correct_loss_per_char": 0.4131471157073975, "incorrect_loss_per_char": 1.0584981358713574, "correct_loss_per_token": 2.0657355785369873, "incorrect_loss_per_token": 5.700590809186299, "correct_loss_uncond": -14.998459339141846, "incorrect_loss_uncond": -8.222909927368164}, "model_output": [{"sum_logits": -13.880711555480957, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.994060516357422, "logits_per_token": -6.9403557777404785, "logits_per_char": -1.1567259629567463, "num_chars": 12}, {"sum_logits": -8.16601276397705, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -22.511512756347656, "logits_per_token": -4.083006381988525, "logits_per_char": -0.9073347515530057, "num_chars": 9}, {"sum_logits": -13.598067283630371, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -22.91067886352539, "logits_per_token": -6.7990336418151855, "logits_per_char": -1.2361879348754883, "num_chars": 11}, {"sum_logits": -14.939902305603027, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.060081481933594, "logits_per_token": -4.979967435201009, "logits_per_char": -0.9337438941001892, "num_chars": 16}, {"sum_logits": -4.131471157073975, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.12993049621582, "logits_per_token": -2.0657355785369873, "logits_per_char": -0.4131471157073975, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 117, "native_id": "9dffd2021771e0ecddb19031acf3701b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.352134704589844, "incorrect_loss_raw": 12.597912788391113, "correct_loss_per_char": 1.3352134704589844, "incorrect_loss_per_char": 1.2939423967749526, "correct_loss_per_token": 6.676067352294922, "incorrect_loss_per_token": 7.655424118041992, "correct_loss_uncond": -3.261007308959961, "incorrect_loss_uncond": -3.461676597595215}, "model_output": [{"sum_logits": -12.383339881896973, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.565593719482422, "logits_per_token": -6.191669940948486, "logits_per_char": -1.3759266535441081, "num_chars": 9}, {"sum_logits": -10.851741790771484, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.404241561889648, "logits_per_token": -10.851741790771484, "logits_per_char": -1.5502488272530692, "num_chars": 7}, {"sum_logits": -13.352134704589844, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.613142013549805, "logits_per_token": -6.676067352294922, "logits_per_char": -1.3352134704589844, "num_chars": 10}, {"sum_logits": -15.905076026916504, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.306940078735352, "logits_per_token": -7.952538013458252, "logits_per_char": -1.4459160024469548, "num_chars": 11}, {"sum_logits": -11.251493453979492, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.96158218383789, "logits_per_token": -5.625746726989746, "logits_per_char": -0.8036781038556781, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 118, "native_id": "3730c646fdf54472ab873aac9ff7852e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.204137802124023, "incorrect_loss_raw": 14.895934104919434, "correct_loss_per_char": 0.37172412872314453, "incorrect_loss_per_char": 1.2474578062693278, "correct_loss_per_token": 1.7347126007080078, "incorrect_loss_per_token": 9.133506298065186, "correct_loss_uncond": -14.53533935546875, "incorrect_loss_uncond": -2.697336435317993}, "model_output": [{"sum_logits": -14.00625991821289, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.971073150634766, "logits_per_token": -7.003129959106445, "logits_per_char": -1.167188326517741, "num_chars": 12}, {"sum_logits": -21.444252014160156, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.511398315429688, "logits_per_token": -10.722126007080078, "logits_per_char": -1.0722126007080077, "num_chars": 20}, {"sum_logits": -10.648910522460938, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.68854331970215, "logits_per_token": -5.324455261230469, "logits_per_char": -1.0648910522460937, "num_chars": 10}, {"sum_logits": -13.48431396484375, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.202067375183105, "logits_per_token": -13.48431396484375, "logits_per_char": -1.6855392456054688, "num_chars": 8}, {"sum_logits": -5.204137802124023, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.739477157592773, "logits_per_token": -1.7347126007080078, "logits_per_char": -0.37172412872314453, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 119, "native_id": "175e7dcdded13d5adafaebf2264c3abd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.648144245147705, "incorrect_loss_raw": 10.21642005443573, "correct_loss_per_char": 0.509876283009847, "incorrect_loss_per_char": 0.8763225458917163, "correct_loss_per_token": 3.8240721225738525, "incorrect_loss_per_token": 6.4348947405815125, "correct_loss_uncond": -10.92225980758667, "incorrect_loss_uncond": -6.859025597572327}, "model_output": [{"sum_logits": -3.522778034210205, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.777209281921387, "logits_per_token": -1.7613890171051025, "logits_per_char": -0.3522778034210205, "num_chars": 10}, {"sum_logits": -10.61347770690918, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.412413597106934, "logits_per_token": -10.61347770690918, "logits_per_char": -1.061347770690918, "num_chars": 10}, {"sum_logits": -7.648144245147705, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.570404052734375, "logits_per_token": -3.8240721225738525, "logits_per_char": -0.509876283009847, "num_chars": 15}, {"sum_logits": -15.323280334472656, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.034700393676758, "logits_per_token": -7.661640167236328, "logits_per_char": -1.2769400278727214, "num_chars": 12}, {"sum_logits": -11.406144142150879, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.07745933532715, "logits_per_token": -5.7030720710754395, "logits_per_char": -0.8147245815822056, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 120, "native_id": "11d7db1d8e1cff2f40d4184f15cf7ae7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.620003700256348, "incorrect_loss_raw": 16.373183250427246, "correct_loss_per_char": 0.44133358001708983, "incorrect_loss_per_char": 1.2499221589254297, "correct_loss_per_token": 3.310001850128174, "incorrect_loss_per_token": 9.252487738927204, "correct_loss_uncond": -13.261361122131348, "incorrect_loss_uncond": -2.7864859104156494}, "model_output": [{"sum_logits": -17.542938232421875, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -17.801706314086914, "logits_per_token": -8.771469116210938, "logits_per_char": -1.2530670166015625, "num_chars": 14}, {"sum_logits": -6.620003700256348, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -19.881364822387695, "logits_per_token": -3.310001850128174, "logits_per_char": -0.44133358001708983, "num_chars": 15}, {"sum_logits": -23.28002166748047, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -25.22509765625, "logits_per_token": -7.760007222493489, "logits_per_char": -1.0121748551078464, "num_chars": 23}, {"sum_logits": -16.28717613220215, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -14.202067375183105, "logits_per_token": -16.28717613220215, "logits_per_char": -2.0358970165252686, "num_chars": 8}, {"sum_logits": -8.382596969604492, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -19.409805297851562, "logits_per_token": -4.191298484802246, "logits_per_char": -0.698549747467041, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 121, "native_id": "08db69edf0ec5848c1a53dca8fc1601a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.438385009765625, "incorrect_loss_raw": 13.702980756759644, "correct_loss_per_char": 1.2709316677517362, "incorrect_loss_per_char": 1.3897113539955832, "correct_loss_per_token": 5.7191925048828125, "incorrect_loss_per_token": 9.526868065198261, "correct_loss_uncond": -5.543302536010742, "incorrect_loss_uncond": -2.7208311557769775}, "model_output": [{"sum_logits": -13.034051895141602, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.250038146972656, "logits_per_token": -13.034051895141602, "logits_per_char": -1.1849138086492366, "num_chars": 11}, {"sum_logits": -17.2652645111084, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.184574127197266, "logits_per_token": -5.755088170369466, "logits_per_char": -1.5695695010098545, "num_chars": 11}, {"sum_logits": -11.438385009765625, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.981687545776367, "logits_per_token": -5.7191925048828125, "logits_per_char": -1.2709316677517362, "num_chars": 9}, {"sum_logits": -14.12405776977539, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.771713256835938, "logits_per_token": -14.12405776977539, "logits_per_char": -1.7655072212219238, "num_chars": 8}, {"sum_logits": -10.388548851013184, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.488922119140625, "logits_per_token": -5.194274425506592, "logits_per_char": -1.0388548851013184, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 122, "native_id": "855ab6ba47f6311104c4d29e24ef0234", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.778400421142578, "incorrect_loss_raw": 23.01047992706299, "correct_loss_per_char": 0.6111500263214111, "incorrect_loss_per_char": 1.3459634680707917, "correct_loss_per_token": 4.889200210571289, "incorrect_loss_per_token": 7.627487727573939, "correct_loss_uncond": -8.948169708251953, "incorrect_loss_uncond": -4.151102542877197}, "model_output": [{"sum_logits": -24.04910659790039, "num_tokens": 3, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -29.285202026367188, "logits_per_token": -8.016368865966797, "logits_per_char": -1.4146533292882584, "num_chars": 17}, {"sum_logits": -13.705211639404297, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -20.58279800415039, "logits_per_token": -6.852605819702148, "logits_per_char": -0.9789436885288784, "num_chars": 14}, {"sum_logits": -32.207908630371094, "num_tokens": 7, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -38.23808670043945, "logits_per_token": -4.601129804338727, "logits_per_char": -1.1502824510846819, "num_chars": 28}, {"sum_logits": -9.778400421142578, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -18.72657012939453, "logits_per_token": -4.889200210571289, "logits_per_char": -0.6111500263214111, "num_chars": 16}, {"sum_logits": -22.079692840576172, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -20.54024314880371, "logits_per_token": -11.039846420288086, "logits_per_char": -1.8399744033813477, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 123, "native_id": "7ec11eeca4221795c117943ca2639e86", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.444965362548828, "incorrect_loss_raw": 14.762489080429077, "correct_loss_per_char": 1.040451396595348, "incorrect_loss_per_char": 1.1615893159157191, "correct_loss_per_token": 5.722482681274414, "incorrect_loss_per_token": 5.643454909324646, "correct_loss_uncond": -8.50798225402832, "incorrect_loss_uncond": -5.173665285110474}, "model_output": [{"sum_logits": -17.37030029296875, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -22.130897521972656, "logits_per_token": -4.3425750732421875, "logits_per_char": -1.0856437683105469, "num_chars": 16}, {"sum_logits": -11.264337539672852, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.728801727294922, "logits_per_token": -5.632168769836426, "logits_per_char": -1.126433753967285, "num_chars": 10}, {"sum_logits": -14.76381778717041, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.381080627441406, "logits_per_token": -7.381908893585205, "logits_per_char": -1.2303181489308674, "num_chars": 12}, {"sum_logits": -11.444965362548828, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.95294761657715, "logits_per_token": -5.722482681274414, "logits_per_char": -1.040451396595348, "num_chars": 11}, {"sum_logits": -15.651500701904297, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -22.50383758544922, "logits_per_token": -5.217166900634766, "logits_per_char": -1.2039615924541767, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 124, "native_id": "e9389b08fdd17f14b148d498d6ff4dfe", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.9537858963012695, "incorrect_loss_raw": 13.205505847930908, "correct_loss_per_char": 0.41281549135843915, "incorrect_loss_per_char": 1.3655460509988997, "correct_loss_per_token": 2.4768929481506348, "incorrect_loss_per_token": 6.753486633300781, "correct_loss_uncond": -10.711732864379883, "incorrect_loss_uncond": -4.176104545593262}, "model_output": [{"sum_logits": -4.9537858963012695, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.665518760681152, "logits_per_token": -2.4768929481506348, "logits_per_char": -0.41281549135843915, "num_chars": 12}, {"sum_logits": -22.92058563232422, "num_tokens": 3, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -26.15388298034668, "logits_per_token": -7.640195210774739, "logits_per_char": -1.2733658684624567, "num_chars": 18}, {"sum_logits": -15.791529655456543, "num_tokens": 3, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -17.5189208984375, "logits_per_token": -5.263843218485515, "logits_per_char": -1.9739412069320679, "num_chars": 8}, {"sum_logits": -6.829925537109375, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -12.786282539367676, "logits_per_token": -6.829925537109375, "logits_per_char": -0.758880615234375, "num_chars": 9}, {"sum_logits": -7.279982566833496, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -13.067355155944824, "logits_per_token": -7.279982566833496, "logits_per_char": -1.4559965133666992, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 125, "native_id": "afa2899cc21e204fa64e63e7839e8c1e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.445942401885986, "incorrect_loss_raw": 9.630585551261902, "correct_loss_per_char": 0.3419955693758451, "incorrect_loss_per_char": 0.7669946650664012, "correct_loss_per_token": 1.481980800628662, "incorrect_loss_per_token": 5.756008505821228, "correct_loss_uncond": -18.58375120162964, "incorrect_loss_uncond": -7.672476649284363}, "model_output": [{"sum_logits": -7.183620929718018, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.485740661621094, "logits_per_token": -2.394540309906006, "logits_per_char": -0.5986350774765015, "num_chars": 12}, {"sum_logits": -4.445942401885986, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -23.029693603515625, "logits_per_token": -1.481980800628662, "logits_per_char": -0.3419955693758451, "num_chars": 13}, {"sum_logits": -9.472267150878906, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.993610382080078, "logits_per_token": -4.736133575439453, "logits_per_char": -0.7893555959065756, "num_chars": 12}, {"sum_logits": -11.946187973022461, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.822582244873047, "logits_per_token": -5.9730939865112305, "logits_per_char": -0.8532991409301758, "num_chars": 14}, {"sum_logits": -9.920266151428223, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.91031551361084, "logits_per_token": -9.920266151428223, "logits_per_char": -0.8266888459523519, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 126, "native_id": "f898eb5b789d2dc6804edba269f051f0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.242931365966797, "incorrect_loss_raw": 9.053590297698975, "correct_loss_per_char": 0.606133052280971, "incorrect_loss_per_char": 1.0065976579984028, "correct_loss_per_token": 4.242931365966797, "incorrect_loss_per_token": 6.177339553833008, "correct_loss_uncond": -9.525967597961426, "incorrect_loss_uncond": -5.737314462661743}, "model_output": [{"sum_logits": -7.934901237487793, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.094202041625977, "logits_per_token": -2.6449670791625977, "logits_per_char": -0.5667786598205566, "num_chars": 14}, {"sum_logits": -7.020814895629883, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.495595932006836, "logits_per_token": -7.020814895629883, "logits_per_char": -1.1701358159383137, "num_chars": 6}, {"sum_logits": -8.828507423400879, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.768731117248535, "logits_per_token": -8.828507423400879, "logits_per_char": -0.7357089519500732, "num_chars": 12}, {"sum_logits": -4.242931365966797, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.768898963928223, "logits_per_token": -4.242931365966797, "logits_per_char": -0.606133052280971, "num_chars": 7}, {"sum_logits": -12.430137634277344, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.805089950561523, "logits_per_token": -6.215068817138672, "logits_per_char": -1.553767204284668, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 127, "native_id": "7ed7379fc51fd35a47be022f6c56ce51", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.6566271781921387, "incorrect_loss_raw": 11.04993486404419, "correct_loss_per_char": 0.6094378630320231, "incorrect_loss_per_char": 1.060499456847385, "correct_loss_per_token": 3.6566271781921387, "incorrect_loss_per_token": 5.848311424255371, "correct_loss_uncond": -9.929782390594482, "incorrect_loss_uncond": -6.068320989608765}, "model_output": [{"sum_logits": -8.479612350463867, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.658267974853516, "logits_per_token": -4.239806175231934, "logits_per_char": -0.7066343625386556, "num_chars": 12}, {"sum_logits": -3.6566271781921387, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.586409568786621, "logits_per_token": -3.6566271781921387, "logits_per_char": -0.6094378630320231, "num_chars": 6}, {"sum_logits": -16.226104736328125, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -19.933612823486328, "logits_per_token": -8.113052368164062, "logits_per_char": -1.4751004305752842, "num_chars": 11}, {"sum_logits": -2.586751937866211, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.889824867248535, "logits_per_token": -2.586751937866211, "logits_per_char": -0.3695359911237444, "num_chars": 7}, {"sum_logits": -16.907270431518555, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.991317749023438, "logits_per_token": -8.453635215759277, "logits_per_char": -1.6907270431518555, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 128, "native_id": "15798a23ee6952fedd6d202064069126", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 11.1442232131958, "incorrect_loss_raw": 15.313088655471802, "correct_loss_per_char": 0.8572479394766, "incorrect_loss_per_char": 1.3022883347102574, "correct_loss_per_token": 3.714741071065267, "incorrect_loss_per_token": 8.572335660457611, "correct_loss_uncond": -7.776091575622559, "incorrect_loss_uncond": -2.1982014179229736}, "model_output": [{"sum_logits": -18.602190017700195, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.934391021728516, "logits_per_token": -9.301095008850098, "logits_per_char": -0.9301095008850098, "num_chars": 20}, {"sum_logits": -19.538284301757812, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.68854331970215, "logits_per_token": -9.769142150878906, "logits_per_char": -1.9538284301757813, "num_chars": 10}, {"sum_logits": -10.523699760437012, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.77764129638672, "logits_per_token": -2.630924940109253, "logits_per_char": -0.7516928400312152, "num_chars": 14}, {"sum_logits": -12.588180541992188, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.644584655761719, "logits_per_token": -12.588180541992188, "logits_per_char": -1.5735225677490234, "num_chars": 8}, {"sum_logits": -11.1442232131958, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.92031478881836, "logits_per_token": -3.714741071065267, "logits_per_char": -0.8572479394766, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 129, "native_id": "273d0134e8ce53d4ebcf41ca7fde02af", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.285900115966797, "incorrect_loss_raw": 10.315061926841736, "correct_loss_per_char": 0.8681461627666767, "incorrect_loss_per_char": 1.1238025944886099, "correct_loss_per_token": 3.7619667053222656, "incorrect_loss_per_token": 7.095936357975006, "correct_loss_uncond": -7.273736953735352, "incorrect_loss_uncond": -5.453817248344421}, "model_output": [{"sum_logits": -9.436080932617188, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.641080856323242, "logits_per_token": -9.436080932617188, "logits_per_char": -1.572680155436198, "num_chars": 6}, {"sum_logits": -6.071162223815918, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.796231269836426, "logits_per_token": -6.071162223815918, "logits_per_char": -0.4670124787550706, "num_chars": 13}, {"sum_logits": -18.811649322509766, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.163928985595703, "logits_per_token": -9.405824661254883, "logits_per_char": -2.0901832580566406, "num_chars": 9}, {"sum_logits": -11.285900115966797, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.55963706970215, "logits_per_token": -3.7619667053222656, "logits_per_char": -0.8681461627666767, "num_chars": 13}, {"sum_logits": -6.941355228424072, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.474275588989258, "logits_per_token": -3.470677614212036, "logits_per_char": -0.3653344857065301, "num_chars": 19}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 130, "native_id": "2f0931adc3d0d422d9ab6264395e89d8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.7379965782165527, "incorrect_loss_raw": 10.113335847854614, "correct_loss_per_char": 0.10542808260236468, "incorrect_loss_per_char": 1.4675884726283315, "correct_loss_per_token": 0.7379965782165527, "incorrect_loss_per_token": 8.993932843208313, "correct_loss_uncond": -14.45020341873169, "incorrect_loss_uncond": -4.5221474170684814}, "model_output": [{"sum_logits": -8.95522403717041, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.595874786376953, "logits_per_token": -4.477612018585205, "logits_per_char": -0.6888633874746469, "num_chars": 13}, {"sum_logits": -9.12051773071289, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.287572860717773, "logits_per_token": -9.12051773071289, "logits_per_char": -1.3029311043875558, "num_chars": 7}, {"sum_logits": -11.438940048217773, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -11.438940048217773, "logits_per_char": -1.1438940048217774, "num_chars": 10}, {"sum_logits": -0.7379965782165527, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": true, "sum_logits_uncond": -15.188199996948242, "logits_per_token": -0.7379965782165527, "logits_per_char": -0.10542808260236468, "num_chars": 7}, {"sum_logits": -10.938661575317383, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.923977851867676, "logits_per_token": -10.938661575317383, "logits_per_char": -2.7346653938293457, "num_chars": 4}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 131, "native_id": "d00d3ba777cb3889a45799d72fca0a50", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.5710349082946777, "incorrect_loss_raw": 10.964731097221375, "correct_loss_per_char": 0.324639537117698, "incorrect_loss_per_char": 1.0420033276081084, "correct_loss_per_token": 3.5710349082946777, "incorrect_loss_per_token": 5.355445722738901, "correct_loss_uncond": -8.778745174407959, "incorrect_loss_uncond": -7.386932969093323}, "model_output": [{"sum_logits": -14.210132598876953, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.80607032775879, "logits_per_token": -4.736710866292317, "logits_per_char": -0.9473421732584636, "num_chars": 15}, {"sum_logits": -14.210132598876953, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.80607032775879, "logits_per_token": -4.736710866292317, "logits_per_char": -0.9473421732584636, "num_chars": 15}, {"sum_logits": -3.5710349082946777, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.349780082702637, "logits_per_token": -3.5710349082946777, "logits_per_char": -0.324639537117698, "num_chars": 11}, {"sum_logits": -6.98059606552124, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.776412963867188, "logits_per_token": -3.49029803276062, "logits_per_char": -0.5817163387934366, "num_chars": 12}, {"sum_logits": -8.458063125610352, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.018102645874023, "logits_per_token": -8.458063125610352, "logits_per_char": -1.6916126251220702, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 132, "native_id": "b1f36d1c8ab7e5a28783cb38e8709c27", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.1544601917266846, "incorrect_loss_raw": 9.419279336929321, "correct_loss_per_char": 0.26930752396583557, "incorrect_loss_per_char": 1.0990676148371263, "correct_loss_per_token": 2.1544601917266846, "incorrect_loss_per_token": 5.294632077217102, "correct_loss_uncond": -13.073805570602417, "incorrect_loss_uncond": -6.615850925445557}, "model_output": [{"sum_logits": -2.1544601917266846, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": true, "sum_logits_uncond": -15.228265762329102, "logits_per_token": -2.1544601917266846, "logits_per_char": -0.26930752396583557, "num_chars": 8}, {"sum_logits": -13.751578330993652, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.872339248657227, "logits_per_token": -6.875789165496826, "logits_per_char": -1.7189472913742065, "num_chars": 8}, {"sum_logits": -4.679939270019531, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.085650444030762, "logits_per_token": -4.679939270019531, "logits_per_char": -0.5849924087524414, "num_chars": 8}, {"sum_logits": -9.192163467407227, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.56879425048828, "logits_per_token": -4.596081733703613, "logits_per_char": -0.8356512243097479, "num_chars": 11}, {"sum_logits": -10.053436279296875, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.613737106323242, "logits_per_token": -5.0267181396484375, "logits_per_char": -1.2566795349121094, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 133, "native_id": "a5e76dd088aab4f89e2fe93f6de6e46d", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.054683208465576, "incorrect_loss_raw": 12.317265033721924, "correct_loss_per_char": 0.3888217852665828, "incorrect_loss_per_char": 1.6684550130178057, "correct_loss_per_token": 5.054683208465576, "incorrect_loss_per_token": 10.97433066368103, "correct_loss_uncond": -8.300490856170654, "incorrect_loss_uncond": -1.50813627243042}, "model_output": [{"sum_logits": -10.743474960327148, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.704061508178711, "logits_per_token": -5.371737480163574, "logits_per_char": -0.8952895800272623, "num_chars": 12}, {"sum_logits": -13.663225173950195, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.275951385498047, "logits_per_token": -13.663225173950195, "logits_per_char": -1.5181361304389105, "num_chars": 9}, {"sum_logits": -12.461359024047852, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.618254661560059, "logits_per_token": -12.461359024047852, "logits_per_char": -1.7801941462925501, "num_chars": 7}, {"sum_logits": -5.054683208465576, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.35517406463623, "logits_per_token": -5.054683208465576, "logits_per_char": -0.3888217852665828, "num_chars": 13}, {"sum_logits": -12.4010009765625, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.703337669372559, "logits_per_token": -12.4010009765625, "logits_per_char": -2.4802001953125, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 134, "native_id": "ac6f0e24dd6203cda43e1089dcf081d6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.646514415740967, "incorrect_loss_raw": 7.712745726108551, "correct_loss_per_char": 0.5538762013117472, "incorrect_loss_per_char": 0.6829279500897191, "correct_loss_per_token": 3.3232572078704834, "incorrect_loss_per_token": 3.808658440907796, "correct_loss_uncond": -10.80723237991333, "incorrect_loss_uncond": -10.930733859539032}, "model_output": [{"sum_logits": -12.533201217651367, "num_tokens": 3, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -25.352907180786133, "logits_per_token": -4.177733739217122, "logits_per_char": -0.7372471304500804, "num_chars": 17}, {"sum_logits": -3.796018362045288, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -12.857548713684082, "logits_per_token": -3.796018362045288, "logits_per_char": -0.5422883374350411, "num_chars": 7}, {"sum_logits": -6.059596538543701, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -17.656585693359375, "logits_per_token": -3.0297982692718506, "logits_per_char": -0.6059596538543701, "num_chars": 10}, {"sum_logits": -6.646514415740967, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -17.453746795654297, "logits_per_token": -3.3232572078704834, "logits_per_char": -0.5538762013117472, "num_chars": 12}, {"sum_logits": -8.462166786193848, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -18.706876754760742, "logits_per_token": -4.231083393096924, "logits_per_char": -0.8462166786193848, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 135, "native_id": "1ab746bcd100ccf513055fe93c61010b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.704961776733398, "incorrect_loss_raw": 11.061553955078125, "correct_loss_per_char": 1.189440197414822, "incorrect_loss_per_char": 1.1128278649042525, "correct_loss_per_token": 3.5683205922444663, "incorrect_loss_per_token": 4.798650781313578, "correct_loss_uncond": -5.1798858642578125, "incorrect_loss_uncond": -7.605980396270752}, "model_output": [{"sum_logits": -8.915719985961914, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.626497268676758, "logits_per_token": -2.9719066619873047, "logits_per_char": -0.6368371418544224, "num_chars": 14}, {"sum_logits": -10.704961776733398, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.884847640991211, "logits_per_token": -3.5683205922444663, "logits_per_char": -1.189440197414822, "num_chars": 9}, {"sum_logits": -16.093856811523438, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.241497039794922, "logits_per_token": -8.046928405761719, "logits_per_char": -1.3411547342936199, "num_chars": 12}, {"sum_logits": -8.655308723449707, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.68340492248535, "logits_per_token": -2.885102907816569, "logits_per_char": -0.9617009692721896, "num_chars": 9}, {"sum_logits": -10.581330299377441, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.118738174438477, "logits_per_token": -5.290665149688721, "logits_per_char": -1.5116186141967773, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 136, "native_id": "af836abc58e0daf36df1d8d6830b70c5", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.598868370056152, "incorrect_loss_raw": 11.623781681060791, "correct_loss_per_char": 0.5058157864738914, "incorrect_loss_per_char": 0.9580014525798328, "correct_loss_per_token": 2.8662894566853843, "incorrect_loss_per_token": 7.848100900650024, "correct_loss_uncond": -10.256651878356934, "incorrect_loss_uncond": -5.036493301391602}, "model_output": [{"sum_logits": -9.896334648132324, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.304811477661133, "logits_per_token": -9.896334648132324, "logits_per_char": -1.413762092590332, "num_chars": 7}, {"sum_logits": -8.598868370056152, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.855520248413086, "logits_per_token": -2.8662894566853843, "logits_per_char": -0.5058157864738914, "num_chars": 17}, {"sum_logits": -6.393345832824707, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.897298812866211, "logits_per_token": -6.393345832824707, "logits_per_char": -0.6393345832824707, "num_chars": 10}, {"sum_logits": -16.728593826293945, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.922809600830078, "logits_per_token": -8.364296913146973, "logits_per_char": -0.8804523066470498, "num_chars": 19}, {"sum_logits": -13.476852416992188, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.51618003845215, "logits_per_token": -6.738426208496094, "logits_per_char": -0.8984568277994792, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 137, "native_id": "2ed66cfd206723a006b37599b516ad6e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.421733379364014, "incorrect_loss_raw": 12.550757884979248, "correct_loss_per_char": 0.33798596733494807, "incorrect_loss_per_char": 1.5814556158505955, "correct_loss_per_token": 2.140577793121338, "incorrect_loss_per_token": 10.794256925582886, "correct_loss_uncond": -15.427353382110596, "incorrect_loss_uncond": -2.617021083831787}, "model_output": [{"sum_logits": -9.363262176513672, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.925796508789062, "logits_per_token": -9.363262176513672, "logits_per_char": -0.780271848042806, "num_chars": 12}, {"sum_logits": -14.052007675170898, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -18.62672996520996, "logits_per_token": -7.026003837585449, "logits_per_char": -1.0809236673208384, "num_chars": 13}, {"sum_logits": -12.020030975341797, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.558703422546387, "logits_per_token": -12.020030975341797, "logits_per_char": -2.0033384958902993, "num_chars": 6}, {"sum_logits": -14.767730712890625, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.55988597869873, "logits_per_token": -14.767730712890625, "logits_per_char": -2.4612884521484375, "num_chars": 6}, {"sum_logits": -6.421733379364014, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -21.84908676147461, "logits_per_token": -2.140577793121338, "logits_per_char": -0.33798596733494807, "num_chars": 19}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 138, "native_id": "e89a2762d578cb7bc2cc0a5b2a16d933", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.551929473876953, "incorrect_loss_raw": 8.611047983169556, "correct_loss_per_char": 1.0501754067160867, "incorrect_loss_per_char": 1.4357324140412466, "correct_loss_per_token": 5.775964736938477, "incorrect_loss_per_token": 8.611047983169556, "correct_loss_uncond": -8.96839714050293, "incorrect_loss_uncond": -3.1865365505218506}, "model_output": [{"sum_logits": -10.987753868103027, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.526091575622559, "logits_per_token": -10.987753868103027, "logits_per_char": -1.8312923113505046, "num_chars": 6}, {"sum_logits": -9.165916442871094, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -10.158601760864258, "logits_per_token": -9.165916442871094, "logits_per_char": -1.3094166346958704, "num_chars": 7}, {"sum_logits": -6.614013195037842, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.150813102722168, "logits_per_token": -6.614013195037842, "logits_per_char": -1.3228026390075684, "num_chars": 5}, {"sum_logits": -7.67650842666626, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.35483169555664, "logits_per_token": -7.67650842666626, "logits_per_char": -1.2794180711110432, "num_chars": 6}, {"sum_logits": -11.551929473876953, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.520326614379883, "logits_per_token": -5.775964736938477, "logits_per_char": -1.0501754067160867, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 139, "native_id": "43cec0fff43a976fade9112d02b66021", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.446887016296387, "incorrect_loss_raw": 10.527249097824097, "correct_loss_per_char": 0.5372405846913656, "incorrect_loss_per_char": 0.9145663098974541, "correct_loss_per_token": 6.446887016296387, "incorrect_loss_per_token": 5.556834101676941, "correct_loss_uncond": -8.710648536682129, "incorrect_loss_uncond": -6.894556045532227}, "model_output": [{"sum_logits": -6.446887016296387, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -6.446887016296387, "logits_per_char": -0.5372405846913656, "num_chars": 12}, {"sum_logits": -9.941475868225098, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.795141220092773, "logits_per_token": -4.970737934112549, "logits_per_char": -0.7647289129403921, "num_chars": 13}, {"sum_logits": -15.382781982421875, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.13370132446289, "logits_per_token": -7.6913909912109375, "logits_per_char": -1.1832909217247596, "num_chars": 13}, {"sum_logits": -7.158697128295898, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.62524700164795, "logits_per_token": -7.158697128295898, "logits_per_char": -1.0226710183279855, "num_chars": 7}, {"sum_logits": -9.626041412353516, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.13313102722168, "logits_per_token": -2.406510353088379, "logits_per_char": -0.6875743865966797, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 140, "native_id": "30e66db11e0257a14a17108b90cd69fb", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.6600184440612793, "incorrect_loss_raw": 13.869901180267334, "correct_loss_per_char": 0.15091076764193448, "incorrect_loss_per_char": 2.3912102580070496, "correct_loss_per_token": 1.6600184440612793, "incorrect_loss_per_token": 13.869901180267334, "correct_loss_uncond": -11.785211086273193, "incorrect_loss_uncond": 1.8014020919799805}, "model_output": [{"sum_logits": -14.031961441040039, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.35275936126709, "logits_per_token": -14.031961441040039, "logits_per_char": -2.33866024017334, "num_chars": 6}, {"sum_logits": -16.52667808532715, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.983606338500977, "logits_per_token": -16.52667808532715, "logits_per_char": -2.0658347606658936, "num_chars": 8}, {"sum_logits": -12.838743209838867, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -11.717869758605957, "logits_per_token": -12.838743209838867, "logits_per_char": -2.1397905349731445, "num_chars": 6}, {"sum_logits": -1.6600184440612793, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": true, "sum_logits_uncond": -13.445229530334473, "logits_per_token": -1.6600184440612793, "logits_per_char": -0.15091076764193448, "num_chars": 11}, {"sum_logits": -12.082221984863281, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -11.21976089477539, "logits_per_token": -12.082221984863281, "logits_per_char": -3.0205554962158203, "num_chars": 4}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 141, "native_id": "f21ef67b31bd36a3174b6b4c7b4bbc7b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.7200927734375, "incorrect_loss_raw": 12.143131971359253, "correct_loss_per_char": 0.8836447975852273, "incorrect_loss_per_char": 1.3504757391779048, "correct_loss_per_token": 4.86004638671875, "incorrect_loss_per_token": 8.681958556175232, "correct_loss_uncond": -10.100908279418945, "incorrect_loss_uncond": -4.710350275039673}, "model_output": [{"sum_logits": -14.777755737304688, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.466819763183594, "logits_per_token": -7.388877868652344, "logits_per_char": -0.7777766177528783, "num_chars": 19}, {"sum_logits": -8.436649322509766, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.105792999267578, "logits_per_token": -8.436649322509766, "logits_per_char": -0.8436649322509766, "num_chars": 10}, {"sum_logits": -12.446491241455078, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.669677734375, "logits_per_token": -12.446491241455078, "logits_per_char": -2.4892982482910155, "num_chars": 5}, {"sum_logits": -12.91163158416748, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.17163848876953, "logits_per_token": -6.45581579208374, "logits_per_char": -1.291163158416748, "num_chars": 10}, {"sum_logits": -9.7200927734375, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.821001052856445, "logits_per_token": -4.86004638671875, "logits_per_char": -0.8836447975852273, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 142, "native_id": "e476e2c8c278eaecfe1a8b884b6aeb8e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.583681583404541, "incorrect_loss_raw": 9.06650561094284, "correct_loss_per_char": 0.5119545119149345, "incorrect_loss_per_char": 1.2253688797554836, "correct_loss_per_token": 3.583681583404541, "incorrect_loss_per_token": 6.525164306163788, "correct_loss_uncond": -9.088305950164795, "incorrect_loss_uncond": -6.542069733142853}, "model_output": [{"sum_logits": -3.0043914318084717, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.760079383850098, "logits_per_token": -3.0043914318084717, "logits_per_char": -0.42919877597263884, "num_chars": 7}, {"sum_logits": -3.583681583404541, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.671987533569336, "logits_per_token": -3.583681583404541, "logits_per_char": -0.5119545119149345, "num_chars": 7}, {"sum_logits": -10.906547546386719, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.542936325073242, "logits_per_token": -5.453273773193359, "logits_per_char": -0.8389651958759015, "num_chars": 13}, {"sum_logits": -12.930900573730469, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -12.930900573730469, "logits_per_char": -2.5861801147460937, "num_chars": 5}, {"sum_logits": -9.424182891845703, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.303163528442383, "logits_per_token": -4.712091445922852, "logits_per_char": -1.0471314324273004, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 143, "native_id": "191e3c676f05a11d6b2565d8c27d2001", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.346395492553711, "incorrect_loss_raw": 12.091760039329529, "correct_loss_per_char": 0.6955329577128092, "incorrect_loss_per_char": 2.120631062984467, "correct_loss_per_token": 4.1731977462768555, "incorrect_loss_per_token": 7.903390049934387, "correct_loss_uncond": -12.9962158203125, "incorrect_loss_uncond": -3.4118889570236206}, "model_output": [{"sum_logits": -8.346395492553711, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -21.34261131286621, "logits_per_token": -4.1731977462768555, "logits_per_char": -0.6955329577128092, "num_chars": 12}, {"sum_logits": -10.160791397094727, "num_tokens": 1, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -13.126147270202637, "logits_per_token": -10.160791397094727, "logits_per_char": -2.5401978492736816, "num_chars": 4}, {"sum_logits": -18.084156036376953, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -17.517730712890625, "logits_per_token": -9.042078018188477, "logits_per_char": -3.6168312072753905, "num_chars": 5}, {"sum_logits": -15.42280387878418, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -18.592187881469727, "logits_per_token": -7.71140193939209, "logits_per_char": -1.542280387878418, "num_chars": 10}, {"sum_logits": -4.699288845062256, "num_tokens": 1, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -12.77853012084961, "logits_per_token": -4.699288845062256, "logits_per_char": -0.783214807510376, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 144, "native_id": "99098375c7b651d524eebac72e358238", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.444979667663574, "incorrect_loss_raw": 13.613809585571289, "correct_loss_per_char": 1.103212833404541, "incorrect_loss_per_char": 1.120341440454706, "correct_loss_per_token": 7.722489833831787, "incorrect_loss_per_token": 7.907875299453735, "correct_loss_uncond": -5.196518898010254, "incorrect_loss_uncond": -3.8138647079467773}, "model_output": [{"sum_logits": -8.807764053344727, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -13.363737106323242, "logits_per_token": -8.807764053344727, "logits_per_char": -1.2582520076206751, "num_chars": 7}, {"sum_logits": -13.497002601623535, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -19.266294479370117, "logits_per_token": -6.748501300811768, "logits_per_char": -0.6748501300811768, "num_chars": 20}, {"sum_logits": -12.925803184509277, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.923294067382812, "logits_per_token": -6.462901592254639, "logits_per_char": -1.1750730167735706, "num_chars": 11}, {"sum_logits": -15.444979667663574, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.641498565673828, "logits_per_token": -7.722489833831787, "logits_per_char": -1.103212833404541, "num_chars": 14}, {"sum_logits": -19.224668502807617, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -19.157371520996094, "logits_per_token": -9.612334251403809, "logits_per_char": -1.3731906073434013, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 145, "native_id": "290fac9f881a83d8bfb34355f8e71044", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.128012657165527, "incorrect_loss_raw": 7.627404093742371, "correct_loss_per_char": 0.5957654504215016, "incorrect_loss_per_char": 0.6107939206636869, "correct_loss_per_token": 3.376004219055176, "incorrect_loss_per_token": 4.770134747028351, "correct_loss_uncond": -10.186505317687988, "incorrect_loss_uncond": -9.391058564186096}, "model_output": [{"sum_logits": -7.6641974449157715, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.500892639160156, "logits_per_token": -3.8320987224578857, "logits_per_char": -0.5895536496089056, "num_chars": 13}, {"sum_logits": -7.828282356262207, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.26586151123047, "logits_per_token": -3.9141411781311035, "logits_per_char": -0.5218854904174804, "num_chars": 15}, {"sum_logits": -7.36567497253418, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.233386993408203, "logits_per_token": -3.68283748626709, "logits_per_char": -0.5665903825026292, "num_chars": 13}, {"sum_logits": -7.651461601257324, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.073709487915039, "logits_per_token": -7.651461601257324, "logits_per_char": -0.7651461601257324, "num_chars": 10}, {"sum_logits": -10.128012657165527, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.314517974853516, "logits_per_token": -3.376004219055176, "logits_per_char": -0.5957654504215016, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 146, "native_id": "6c36226b23377a0dd0188bf56840e22a", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 1.8591759204864502, "incorrect_loss_raw": 10.656640648841858, "correct_loss_per_char": 0.20657510227627224, "incorrect_loss_per_char": 0.8013968793605069, "correct_loss_per_token": 1.8591759204864502, "incorrect_loss_per_token": 5.060633699099223, "correct_loss_uncond": -10.404441595077515, "incorrect_loss_uncond": -8.133930087089539}, "model_output": [{"sum_logits": -12.869207382202148, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.761146545410156, "logits_per_token": -4.289735794067383, "logits_per_char": -0.7149559656778971, "num_chars": 18}, {"sum_logits": -7.8050217628479, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.54871940612793, "logits_per_token": -2.6016739209493003, "logits_per_char": -0.6003862894498385, "num_chars": 13}, {"sum_logits": -1.8591759204864502, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": true, "sum_logits_uncond": -12.263617515563965, "logits_per_token": -1.8591759204864502, "logits_per_char": -0.20657510227627224, "num_chars": 9}, {"sum_logits": -12.901812553405762, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -24.975845336914062, "logits_per_token": -4.300604184468587, "logits_per_char": -0.7589301502003389, "num_chars": 17}, {"sum_logits": -9.050520896911621, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.876571655273438, "logits_per_token": -9.050520896911621, "logits_per_char": -1.1313151121139526, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 147, "native_id": "aa5aa36557a5fbb93391506182f1025c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.4395790100097656, "incorrect_loss_raw": 9.666170835494995, "correct_loss_per_char": 0.3821754455566406, "incorrect_loss_per_char": 1.2235066535927, "correct_loss_per_token": 3.4395790100097656, "incorrect_loss_per_token": 9.666170835494995, "correct_loss_uncond": -8.585289001464844, "incorrect_loss_uncond": -3.8808534145355225}, "model_output": [{"sum_logits": -8.48792552947998, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.083166122436523, "logits_per_token": -8.48792552947998, "logits_per_char": -1.2125607899257116, "num_chars": 7}, {"sum_logits": -12.46239185333252, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.876571655273438, "logits_per_token": -12.46239185333252, "logits_per_char": -1.557798981666565, "num_chars": 8}, {"sum_logits": -3.4395790100097656, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.02486801147461, "logits_per_token": -3.4395790100097656, "logits_per_char": -0.3821754455566406, "num_chars": 9}, {"sum_logits": -6.525280952453613, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.629908561706543, "logits_per_token": -6.525280952453613, "logits_per_char": -0.7250312169392904, "num_chars": 9}, {"sum_logits": -11.189085006713867, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.598450660705566, "logits_per_token": -11.189085006713867, "logits_per_char": -1.3986356258392334, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 148, "native_id": "a38df3e750b1edd30f905e17af803c61", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.673672676086426, "incorrect_loss_raw": 18.454824447631836, "correct_loss_per_char": 0.3336836338043213, "incorrect_loss_per_char": 1.9881742530398898, "correct_loss_per_token": 3.336836338043213, "incorrect_loss_per_token": 11.522263288497925, "correct_loss_uncond": -12.230965614318848, "incorrect_loss_uncond": 0.48595166206359863}, "model_output": [{"sum_logits": -17.765918731689453, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.29446792602539, "logits_per_token": -8.882959365844727, "logits_per_char": -2.9609864552815757, "num_chars": 6}, {"sum_logits": -18.358808517456055, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.904881477355957, "logits_per_token": -18.358808517456055, "logits_per_char": -1.2239205678304037, "num_chars": 15}, {"sum_logits": -6.673672676086426, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.904638290405273, "logits_per_token": -3.336836338043213, "logits_per_char": -0.3336836338043213, "num_chars": 20}, {"sum_logits": -11.586788177490234, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.158472061157227, "logits_per_token": -5.793394088745117, "logits_per_char": -2.317357635498047, "num_chars": 5}, {"sum_logits": -26.1077823638916, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -23.517669677734375, "logits_per_token": -13.0538911819458, "logits_per_char": -1.4504323535495334, "num_chars": 18}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 149, "native_id": "dba51270f789c75a2e38a5201b124d99", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.449247360229492, "incorrect_loss_raw": 8.233509063720703, "correct_loss_per_char": 0.42246236801147463, "incorrect_loss_per_char": 0.892996944059528, "correct_loss_per_token": 2.112311840057373, "incorrect_loss_per_token": 7.014127731323242, "correct_loss_uncond": -11.297317504882812, "incorrect_loss_uncond": -6.191513299942017}, "model_output": [{"sum_logits": -8.522493362426758, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.792759895324707, "logits_per_token": -8.522493362426758, "logits_per_char": -0.9469437069363065, "num_chars": 9}, {"sum_logits": -9.755050659179688, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.956235885620117, "logits_per_token": -4.877525329589844, "logits_per_char": -0.7503885122445914, "num_chars": 13}, {"sum_logits": -6.902565002441406, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.765961647033691, "logits_per_token": -6.902565002441406, "logits_per_char": -0.766951666937934, "num_chars": 9}, {"sum_logits": -7.753927230834961, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.185132026672363, "logits_per_token": -7.753927230834961, "logits_per_char": -1.1077038901192802, "num_chars": 7}, {"sum_logits": -8.449247360229492, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.746564865112305, "logits_per_token": -2.112311840057373, "logits_per_char": -0.42246236801147463, "num_chars": 20}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 150, "native_id": "1be8ec824eb0c7218b6bc160fd191428", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.5216683149337769, "incorrect_loss_raw": 11.39237654209137, "correct_loss_per_char": 0.11705140884105976, "incorrect_loss_per_char": 1.099241244565873, "correct_loss_per_token": 1.5216683149337769, "incorrect_loss_per_token": 9.18512737751007, "correct_loss_uncond": -11.833505749702454, "incorrect_loss_uncond": -3.659958243370056}, "model_output": [{"sum_logits": -1.5216683149337769, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": true, "sum_logits_uncond": -13.35517406463623, "logits_per_token": -1.5216683149337769, "logits_per_char": -0.11705140884105976, "num_chars": 13}, {"sum_logits": -15.401484489440918, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.553597450256348, "logits_per_token": -15.401484489440918, "logits_per_char": -1.1001060349600655, "num_chars": 14}, {"sum_logits": -14.795647621154785, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.349469184875488, "logits_per_token": -14.795647621154785, "logits_per_char": -2.4659412701924643, "num_chars": 6}, {"sum_logits": -11.771995544433594, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.513368606567383, "logits_per_token": -2.9429988861083984, "logits_per_char": -0.4708798217773438, "num_chars": 25}, {"sum_logits": -3.6003785133361816, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.792903900146484, "logits_per_token": -3.6003785133361816, "logits_per_char": -0.36003785133361815, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 151, "native_id": "0e80f2afe5c4f652e8720b52d7c06c87", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.587882041931152, "incorrect_loss_raw": 14.024641990661621, "correct_loss_per_char": 0.5079892765391957, "incorrect_loss_per_char": 1.2696892322055877, "correct_loss_per_token": 5.587882041931152, "incorrect_loss_per_token": 8.030511736869812, "correct_loss_uncond": -8.878608703613281, "incorrect_loss_uncond": -4.896334409713745}, "model_output": [{"sum_logits": -8.305802345275879, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.830584526062012, "logits_per_token": -8.305802345275879, "logits_per_char": -0.9228669272528754, "num_chars": 9}, {"sum_logits": -5.587882041931152, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.466490745544434, "logits_per_token": -5.587882041931152, "logits_per_char": -0.5079892765391957, "num_chars": 11}, {"sum_logits": -12.542835235595703, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.1563720703125, "logits_per_token": -12.542835235595703, "logits_per_char": -1.7918336050851005, "num_chars": 7}, {"sum_logits": -9.843707084655762, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.2939453125, "logits_per_token": -4.921853542327881, "logits_per_char": -1.093745231628418, "num_chars": 9}, {"sum_logits": -25.40622329711914, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -29.403003692626953, "logits_per_token": -6.351555824279785, "logits_per_char": -1.270311164855957, "num_chars": 20}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 152, "native_id": "b67971747e95ba425a5b81e0ba8d0b28", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.391979217529297, "incorrect_loss_raw": 12.10616159439087, "correct_loss_per_char": 0.46199870109558105, "incorrect_loss_per_char": 1.425202993741111, "correct_loss_per_token": 3.6959896087646484, "incorrect_loss_per_token": 7.870223879814148, "correct_loss_uncond": -11.155803680419922, "incorrect_loss_uncond": -3.840819835662842}, "model_output": [{"sum_logits": -21.17941665649414, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.789621353149414, "logits_per_token": -10.58970832824707, "logits_per_char": -2.353268517388238, "num_chars": 9}, {"sum_logits": -7.109935760498047, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.071504592895508, "logits_per_token": -7.109935760498047, "logits_per_char": -1.0157051086425781, "num_chars": 7}, {"sum_logits": -7.42720890045166, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.238256454467773, "logits_per_token": -7.42720890045166, "logits_per_char": -1.0610298429216658, "num_chars": 7}, {"sum_logits": -7.391979217529297, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.54778289794922, "logits_per_token": -3.6959896087646484, "logits_per_char": -0.46199870109558105, "num_chars": 16}, {"sum_logits": -12.708085060119629, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.68854331970215, "logits_per_token": -6.3540425300598145, "logits_per_char": -1.270808506011963, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 153, "native_id": "fcd39cfa321728fea069a6ae4285b06f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.20341682434082, "incorrect_loss_raw": 9.636192321777344, "correct_loss_per_char": 0.7457651658491655, "incorrect_loss_per_char": 0.7700767475170094, "correct_loss_per_token": 4.10170841217041, "incorrect_loss_per_token": 5.384492317835489, "correct_loss_uncond": -12.232965469360352, "incorrect_loss_uncond": -6.187800884246826}, "model_output": [{"sum_logits": -6.622541427612305, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.991364479064941, "logits_per_token": -2.2075138092041016, "logits_per_char": -0.509426263662485, "num_chars": 13}, {"sum_logits": -10.309718132019043, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.109241485595703, "logits_per_token": -3.436572710673014, "logits_per_char": -0.7364084380013602, "num_chars": 14}, {"sum_logits": -10.17525577545166, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.428765296936035, "logits_per_token": -10.17525577545166, "logits_per_char": -1.017525577545166, "num_chars": 10}, {"sum_logits": -8.20341682434082, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.436382293701172, "logits_per_token": -4.10170841217041, "logits_per_char": -0.7457651658491655, "num_chars": 11}, {"sum_logits": -11.437253952026367, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.7666015625, "logits_per_token": -5.718626976013184, "logits_per_char": -0.8169467108590263, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 154, "native_id": "cb6766fb25daee911fc8e9816b98938c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.652368545532227, "incorrect_loss_raw": 13.003203511238098, "correct_loss_per_char": 0.9683971405029297, "incorrect_loss_per_char": 1.2834700961907703, "correct_loss_per_token": 5.326184272766113, "incorrect_loss_per_token": 5.67519211769104, "correct_loss_uncond": -4.511961936950684, "incorrect_loss_uncond": -6.406956076622009}, "model_output": [{"sum_logits": -19.025012969970703, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -23.751724243164062, "logits_per_token": -6.341670989990234, "logits_per_char": -1.268334197998047, "num_chars": 15}, {"sum_logits": -6.699376583099365, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.681718826293945, "logits_per_token": -6.699376583099365, "logits_per_char": -1.6748441457748413, "num_chars": 4}, {"sum_logits": -5.381476402282715, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.210315704345703, "logits_per_token": -2.6907382011413574, "logits_per_char": -0.4484563668568929, "num_chars": 12}, {"sum_logits": -20.90694808959961, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -24.99687957763672, "logits_per_token": -6.968982696533203, "logits_per_char": -1.7422456741333008, "num_chars": 12}, {"sum_logits": -10.652368545532227, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.16433048248291, "logits_per_token": -5.326184272766113, "logits_per_char": -0.9683971405029297, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 155, "native_id": "54231f875bb7fe4d3e4afb6eae64387c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.212265014648438, "incorrect_loss_raw": 15.150820970535278, "correct_loss_per_char": 1.0192968195134944, "incorrect_loss_per_char": 1.5387444435604032, "correct_loss_per_token": 5.606132507324219, "incorrect_loss_per_token": 8.02108120918274, "correct_loss_uncond": -7.801181793212891, "incorrect_loss_uncond": -2.195622444152832}, "model_output": [{"sum_logits": -12.773741722106934, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.05156421661377, "logits_per_token": -12.773741722106934, "logits_per_char": -2.5547483444213865, "num_chars": 5}, {"sum_logits": -24.05333137512207, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.803173065185547, "logits_per_token": -8.01777712504069, "logits_per_char": -1.718095098223005, "num_chars": 14}, {"sum_logits": -18.725107192993164, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.937788009643555, "logits_per_token": -6.241702397664388, "logits_per_char": -1.0402837329440646, "num_chars": 18}, {"sum_logits": -11.212265014648438, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.013446807861328, "logits_per_token": -5.606132507324219, "logits_per_char": -1.0192968195134944, "num_chars": 11}, {"sum_logits": -5.051103591918945, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.59324836730957, "logits_per_token": -5.051103591918945, "logits_per_char": -0.8418505986531576, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 156, "native_id": "7d7f7d7a8ae3b20ca9fc0da6efe467b4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.062358856201172, "incorrect_loss_raw": 12.307130753993988, "correct_loss_per_char": 0.3693053505637429, "incorrect_loss_per_char": 1.0127593141794204, "correct_loss_per_token": 4.062358856201172, "incorrect_loss_per_token": 6.102487683296204, "correct_loss_uncond": -10.862926483154297, "incorrect_loss_uncond": -5.933758318424225}, "model_output": [{"sum_logits": -3.8000385761260986, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.676141738891602, "logits_per_token": -3.8000385761260986, "logits_per_char": -0.7600077152252197, "num_chars": 5}, {"sum_logits": -4.062358856201172, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.925285339355469, "logits_per_token": -4.062358856201172, "logits_per_char": -0.3693053505637429, "num_chars": 11}, {"sum_logits": -9.835639953613281, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.572244644165039, "logits_per_token": -9.835639953613281, "logits_per_char": -1.2294549942016602, "num_chars": 8}, {"sum_logits": -28.088600158691406, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -33.808982849121094, "logits_per_token": -7.022150039672852, "logits_per_char": -1.1235440063476563, "num_chars": 25}, {"sum_logits": -7.504244327545166, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.906187057495117, "logits_per_token": -3.752122163772583, "logits_per_char": -0.9380305409431458, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 157, "native_id": "31b72d4e4ae7c672c20e27e42499ec79", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.247247695922852, "incorrect_loss_raw": 7.545145630836487, "correct_loss_per_char": 1.5412079493204753, "incorrect_loss_per_char": 1.1640918470564343, "correct_loss_per_token": 4.623623847961426, "incorrect_loss_per_token": 7.545145630836487, "correct_loss_uncond": -5.216548919677734, "incorrect_loss_uncond": -6.291447758674622}, "model_output": [{"sum_logits": -10.289445877075195, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.224496841430664, "logits_per_token": -10.289445877075195, "logits_per_char": -1.143271764119466, "num_chars": 9}, {"sum_logits": -5.167762756347656, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.832438468933105, "logits_per_token": -5.167762756347656, "logits_per_char": -1.0335525512695312, "num_chars": 5}, {"sum_logits": -6.583569049835205, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.231552124023438, "logits_per_token": -6.583569049835205, "logits_per_char": -1.316713809967041, "num_chars": 5}, {"sum_logits": -8.13980484008789, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.057886123657227, "logits_per_token": -8.13980484008789, "logits_per_char": -1.1628292628696986, "num_chars": 7}, {"sum_logits": -9.247247695922852, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.463796615600586, "logits_per_token": -4.623623847961426, "logits_per_char": -1.5412079493204753, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 158, "native_id": "26ce83b8e9a263079aa8cdbd5258d667", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.084833145141602, "incorrect_loss_raw": 6.747399568557739, "correct_loss_per_char": 0.8983147939046224, "incorrect_loss_per_char": 1.0619852036710768, "correct_loss_per_token": 8.084833145141602, "incorrect_loss_per_token": 6.747399568557739, "correct_loss_uncond": -7.037353515625, "incorrect_loss_uncond": -3.659285545349121}, "model_output": [{"sum_logits": -8.084833145141602, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.122186660766602, "logits_per_token": -8.084833145141602, "logits_per_char": -0.8983147939046224, "num_chars": 9}, {"sum_logits": -5.938797473907471, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -12.412273406982422, "logits_per_token": -5.938797473907471, "logits_per_char": -0.8483996391296387, "num_chars": 7}, {"sum_logits": -8.903634071350098, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -9.88023853302002, "logits_per_token": -8.903634071350098, "logits_per_char": -0.9892926745944552, "num_chars": 9}, {"sum_logits": -5.847736358642578, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -8.562274932861328, "logits_per_token": -5.847736358642578, "logits_per_char": -0.8353909083775112, "num_chars": 7}, {"sum_logits": -6.2994303703308105, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -10.771953582763672, "logits_per_token": -6.2994303703308105, "logits_per_char": -1.5748575925827026, "num_chars": 4}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 159, "native_id": "30138608d4934a75cf0911a06b021374", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.723121643066406, "incorrect_loss_raw": 17.23432183265686, "correct_loss_per_char": 0.9604459490094867, "incorrect_loss_per_char": 1.4127587524824619, "correct_loss_per_token": 6.723121643066406, "incorrect_loss_per_token": 8.61716091632843, "correct_loss_uncond": -5.730975151062012, "incorrect_loss_uncond": -4.3521153926849365}, "model_output": [{"sum_logits": -16.554994583129883, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.859439849853516, "logits_per_token": -8.277497291564941, "logits_per_char": -0.871315504375257, "num_chars": 19}, {"sum_logits": -19.32560920715332, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -25.403573989868164, "logits_per_token": -9.66280460357666, "logits_per_char": -2.1472899119059243, "num_chars": 9}, {"sum_logits": -13.978814125061035, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.160051345825195, "logits_per_token": -6.989407062530518, "logits_per_char": -1.1649011770884197, "num_chars": 12}, {"sum_logits": -19.077869415283203, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -23.922683715820312, "logits_per_token": -9.538934707641602, "logits_per_char": -1.4675284165602465, "num_chars": 13}, {"sum_logits": -6.723121643066406, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.454096794128418, "logits_per_token": -6.723121643066406, "logits_per_char": -0.9604459490094867, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 160, "native_id": "01abce8c4964371d85a5be2019f75827", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.4192585945129395, "incorrect_loss_raw": 10.179227709770203, "correct_loss_per_char": 0.6774073243141174, "incorrect_loss_per_char": 1.155202521218194, "correct_loss_per_token": 5.4192585945129395, "incorrect_loss_per_token": 6.753827810287476, "correct_loss_uncond": -9.503832340240479, "incorrect_loss_uncond": -5.043775200843811}, "model_output": [{"sum_logits": -5.4192585945129395, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.923090934753418, "logits_per_token": -5.4192585945129395, "logits_per_char": -0.6774073243141174, "num_chars": 8}, {"sum_logits": -12.997297286987305, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.844066619873047, "logits_per_token": -6.498648643493652, "logits_per_char": -1.0831081072489421, "num_chars": 12}, {"sum_logits": -14.405901908874512, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.204669952392578, "logits_per_token": -7.202950954437256, "logits_per_char": -1.6006557676527235, "num_chars": 9}, {"sum_logits": -9.59255599975586, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -9.59255599975586, "logits_per_char": -1.5987593332926433, "num_chars": 6}, {"sum_logits": -3.7211556434631348, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.953621864318848, "logits_per_token": -3.7211556434631348, "logits_per_char": -0.3382868766784668, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 161, "native_id": "3e2222c99e11fca2ad4af2d470eb8ea2_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.476571083068848, "incorrect_loss_raw": 11.44743263721466, "correct_loss_per_char": 0.391183648790632, "incorrect_loss_per_char": 1.111075334758549, "correct_loss_per_token": 2.738285541534424, "incorrect_loss_per_token": 6.44739043712616, "correct_loss_uncond": -12.817351341247559, "incorrect_loss_uncond": -5.461438536643982}, "model_output": [{"sum_logits": -12.327159881591797, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.213274002075195, "logits_per_token": -6.163579940795898, "logits_per_char": -1.2327159881591796, "num_chars": 10}, {"sum_logits": -13.649857521057129, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.353347778320312, "logits_per_token": -6.8249287605285645, "logits_per_char": -0.974989822932652, "num_chars": 14}, {"sum_logits": -5.789392948150635, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -5.789392948150635, "logits_per_char": -1.157878589630127, "num_chars": 5}, {"sum_logits": -5.476571083068848, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.293922424316406, "logits_per_token": -2.738285541534424, "logits_per_char": -0.391183648790632, "num_chars": 14}, {"sum_logits": -14.023320198059082, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -19.81413459777832, "logits_per_token": -7.011660099029541, "logits_per_char": -1.078716938312237, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 162, "native_id": "847dbf5b73c3e8d49bb9a36491d95e79", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.676970481872559, "incorrect_loss_raw": 7.768758535385132, "correct_loss_per_char": 0.476926462990897, "incorrect_loss_per_char": 0.904137862580163, "correct_loss_per_token": 3.3384852409362793, "incorrect_loss_per_token": 6.573779940605164, "correct_loss_uncond": -10.676377296447754, "incorrect_loss_uncond": -6.55721640586853}, "model_output": [{"sum_logits": -7.834076881408691, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.434986114501953, "logits_per_token": -7.834076881408691, "logits_per_char": -0.9792596101760864, "num_chars": 8}, {"sum_logits": -9.559828758239746, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.160451889038086, "logits_per_token": -4.779914379119873, "logits_per_char": -0.682844911302839, "num_chars": 14}, {"sum_logits": -6.676970481872559, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.353347778320312, "logits_per_token": -3.3384852409362793, "logits_per_char": -0.476926462990897, "num_chars": 14}, {"sum_logits": -8.1909818649292, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.238256454467773, "logits_per_token": -8.1909818649292, "logits_per_char": -1.170140266418457, "num_chars": 7}, {"sum_logits": -5.490146636962891, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.470205307006836, "logits_per_token": -5.490146636962891, "logits_per_char": -0.7843066624232701, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 163, "native_id": "fa031cff8e11e75c68d6a99ef0e5ca3a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.716970443725586, "incorrect_loss_raw": 13.338048934936523, "correct_loss_per_char": 1.1433940887451173, "incorrect_loss_per_char": 1.3424891611886403, "correct_loss_per_token": 5.716970443725586, "incorrect_loss_per_token": 8.655060191949207, "correct_loss_uncond": -6.437155723571777, "incorrect_loss_uncond": -3.003196954727173}, "model_output": [{"sum_logits": -12.688932418823242, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.88010597229004, "logits_per_token": -4.229644139607747, "logits_per_char": -1.409881379869249, "num_chars": 9}, {"sum_logits": -5.716970443725586, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.154126167297363, "logits_per_token": -5.716970443725586, "logits_per_char": -1.1433940887451173, "num_chars": 5}, {"sum_logits": -14.626300811767578, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.563986778259277, "logits_per_token": -14.626300811767578, "logits_per_char": -1.2188584009806316, "num_chars": 12}, {"sum_logits": -12.340073585510254, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.188592910766602, "logits_per_token": -12.340073585510254, "logits_per_char": -1.7628676550728934, "num_chars": 7}, {"sum_logits": -13.69688892364502, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.732297897338867, "logits_per_token": -3.424222230911255, "logits_per_char": -0.9783492088317871, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 164, "native_id": "c592258c88295756833e9796e881057b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.698979139328003, "incorrect_loss_raw": 10.775759220123291, "correct_loss_per_char": 0.2249149282773336, "incorrect_loss_per_char": 1.0409905314445496, "correct_loss_per_token": 1.3494895696640015, "incorrect_loss_per_token": 6.5898802280426025, "correct_loss_uncond": -16.082993745803833, "incorrect_loss_uncond": -8.284549713134766}, "model_output": [{"sum_logits": -10.865464210510254, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.858163833618164, "logits_per_token": -5.432732105255127, "logits_per_char": -0.9054553508758545, "num_chars": 12}, {"sum_logits": -9.616004943847656, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.72492790222168, "logits_per_token": -9.616004943847656, "logits_per_char": -1.202000617980957, "num_chars": 8}, {"sum_logits": -2.698979139328003, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.781972885131836, "logits_per_token": -1.3494895696640015, "logits_per_char": -0.2249149282773336, "num_chars": 12}, {"sum_logits": -9.101572036743164, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.85131072998047, "logits_per_token": -4.550786018371582, "logits_per_char": -0.827415639703924, "num_chars": 11}, {"sum_logits": -13.51999568939209, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.806833267211914, "logits_per_token": -6.759997844696045, "logits_per_char": -1.2290905172174627, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 165, "native_id": "e1403a7c581bc263aea2ed8d179826d1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.7297887802124023, "incorrect_loss_raw": 8.041016936302185, "correct_loss_per_char": 0.22748239835103354, "incorrect_loss_per_char": 0.8514852985030129, "correct_loss_per_token": 1.3648943901062012, "incorrect_loss_per_token": 5.73616623878479, "correct_loss_uncond": -13.928479194641113, "incorrect_loss_uncond": -8.00916588306427}, "model_output": [{"sum_logits": -2.7297887802124023, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.658267974853516, "logits_per_token": -1.3648943901062012, "logits_per_char": -0.22748239835103354, "num_chars": 12}, {"sum_logits": -8.206574440002441, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.109554290771484, "logits_per_token": -4.103287220001221, "logits_per_char": -1.172367777143206, "num_chars": 7}, {"sum_logits": -10.232231140136719, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.01914405822754, "logits_per_token": -5.116115570068359, "logits_per_char": -0.6395144462585449, "num_chars": 16}, {"sum_logits": -8.75511360168457, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.495186805725098, "logits_per_token": -8.75511360168457, "logits_per_char": -0.9727904001871744, "num_chars": 9}, {"sum_logits": -4.97014856338501, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -4.97014856338501, "logits_per_char": -0.6212685704231262, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 166, "native_id": "15c38f66e811d6ed68cde931bc31d93c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.166786193847656, "incorrect_loss_raw": 13.227838039398193, "correct_loss_per_char": 0.3854241371154785, "incorrect_loss_per_char": 1.3634316878659385, "correct_loss_per_token": 3.083393096923828, "incorrect_loss_per_token": 7.5933051109313965, "correct_loss_uncond": -13.476139068603516, "incorrect_loss_uncond": -3.7520558834075928}, "model_output": [{"sum_logits": -17.56000328063965, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.208698272705078, "logits_per_token": -8.780001640319824, "logits_per_char": -1.097500205039978, "num_chars": 16}, {"sum_logits": -15.85762882232666, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.982667922973633, "logits_per_token": -7.92881441116333, "logits_per_char": -2.265375546046666, "num_chars": 7}, {"sum_logits": -7.835088729858398, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.37873363494873, "logits_per_token": -7.835088729858398, "logits_per_char": -1.1192983899797713, "num_chars": 7}, {"sum_logits": -11.658631324768066, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.349475860595703, "logits_per_token": -5.829315662384033, "logits_per_char": -0.9715526103973389, "num_chars": 12}, {"sum_logits": -6.166786193847656, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.642925262451172, "logits_per_token": -3.083393096923828, "logits_per_char": -0.3854241371154785, "num_chars": 16}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 167, "native_id": "1ac54dbf6b67f27daa3d456416047584", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.387336254119873, "incorrect_loss_raw": 12.140987634658813, "correct_loss_per_char": 0.4387336254119873, "incorrect_loss_per_char": 1.414827328636533, "correct_loss_per_token": 2.1936681270599365, "incorrect_loss_per_token": 6.63094703356425, "correct_loss_uncond": -15.520412921905518, "incorrect_loss_uncond": -4.054285049438477}, "model_output": [{"sum_logits": -11.511039733886719, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.77572250366211, "logits_per_token": -5.755519866943359, "logits_per_char": -0.7194399833679199, "num_chars": 16}, {"sum_logits": -14.99966812133789, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.799968719482422, "logits_per_token": -7.499834060668945, "logits_per_char": -2.142809731619699, "num_chars": 7}, {"sum_logits": -8.876029968261719, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.916854858398438, "logits_per_token": -8.876029968261719, "logits_per_char": -1.4793383280436199, "num_chars": 6}, {"sum_logits": -4.387336254119873, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.90774917602539, "logits_per_token": -2.1936681270599365, "logits_per_char": -0.4387336254119873, "num_chars": 10}, {"sum_logits": -13.177212715148926, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.288544654846191, "logits_per_token": -4.392404238382976, "logits_per_char": -1.3177212715148925, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 168, "native_id": "21763a65765b5405c9a54484c2e54a72", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 13.054261207580566, "incorrect_loss_raw": 13.744363784790039, "correct_loss_per_char": 1.0878551006317139, "incorrect_loss_per_char": 1.259562958689297, "correct_loss_per_token": 4.3514204025268555, "incorrect_loss_per_token": 10.532256762186687, "correct_loss_uncond": -4.816479682922363, "incorrect_loss_uncond": -2.04556941986084}, "model_output": [{"sum_logits": -13.054261207580566, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.87074089050293, "logits_per_token": -4.3514204025268555, "logits_per_char": -1.0878551006317139, "num_chars": 12}, {"sum_logits": -10.92704963684082, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.973995208740234, "logits_per_token": -10.92704963684082, "logits_per_char": -1.092704963684082, "num_chars": 10}, {"sum_logits": -11.414335250854492, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.455964088439941, "logits_per_token": -11.414335250854492, "logits_per_char": -1.1414335250854493, "num_chars": 10}, {"sum_logits": -19.272642135620117, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.004837036132812, "logits_per_token": -6.424214045206706, "logits_per_char": -1.1336848315070658, "num_chars": 17}, {"sum_logits": -13.363428115844727, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.724936485290527, "logits_per_token": -13.363428115844727, "logits_per_char": -1.6704285144805908, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 169, "native_id": "c492b8b9754a181c924c1df19998cbc7", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.165259838104248, "incorrect_loss_raw": 9.848499298095703, "correct_loss_per_char": 0.28775089437311346, "incorrect_loss_per_char": 1.1609521183100613, "correct_loss_per_token": 3.165259838104248, "incorrect_loss_per_token": 8.393482208251953, "correct_loss_uncond": -11.973021984100342, "incorrect_loss_uncond": -5.577524662017822}, "model_output": [{"sum_logits": -11.598393440246582, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.751547813415527, "logits_per_token": -11.598393440246582, "logits_per_char": -1.4497991800308228, "num_chars": 8}, {"sum_logits": -7.929192543029785, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.966313362121582, "logits_per_token": -7.929192543029785, "logits_per_char": -0.9911490678787231, "num_chars": 8}, {"sum_logits": -3.165259838104248, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.13828182220459, "logits_per_token": -3.165259838104248, "logits_per_char": -0.28775089437311346, "num_chars": 11}, {"sum_logits": -11.64013671875, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.49237823486328, "logits_per_token": -5.820068359375, "logits_per_char": -1.45501708984375, "num_chars": 8}, {"sum_logits": -8.226274490356445, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.49385643005371, "logits_per_token": -8.226274490356445, "logits_per_char": -0.7478431354869496, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 170, "native_id": "fff554fffa1a0adc64b8d1e21d55534b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.9839243292808533, "incorrect_loss_raw": 11.362318515777588, "correct_loss_per_char": 0.10932492547565037, "incorrect_loss_per_char": 1.2768944017398054, "correct_loss_per_token": 0.9839243292808533, "incorrect_loss_per_token": 7.334643205006918, "correct_loss_uncond": -11.94320923089981, "incorrect_loss_uncond": -4.663400173187256}, "model_output": [{"sum_logits": -11.458352088928223, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.999134063720703, "logits_per_token": -3.8194506963094077, "logits_per_char": -1.1458352088928223, "num_chars": 10}, {"sum_logits": -4.38796329498291, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.496073722839355, "logits_per_token": -4.38796329498291, "logits_per_char": -0.5484954118728638, "num_chars": 8}, {"sum_logits": -0.9839243292808533, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": true, "sum_logits_uncond": -12.927133560180664, "logits_per_token": -0.9839243292808533, "logits_per_char": -0.10932492547565037, "num_chars": 9}, {"sum_logits": -16.943599700927734, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.67427635192871, "logits_per_token": -8.471799850463867, "logits_per_char": -1.3033538231482873, "num_chars": 13}, {"sum_logits": -12.659358978271484, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.933390617370605, "logits_per_token": -12.659358978271484, "logits_per_char": -2.1098931630452475, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 171, "native_id": "8ea5720718c0e122efa6277edb511569", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.597208023071289, "incorrect_loss_raw": 11.359908819198608, "correct_loss_per_char": 0.7330231136745877, "incorrect_loss_per_char": 0.8600607629583663, "correct_loss_per_token": 3.2986040115356445, "incorrect_loss_per_token": 6.385569055875142, "correct_loss_uncond": -9.074224472045898, "incorrect_loss_uncond": -6.726531028747559}, "model_output": [{"sum_logits": -14.303396224975586, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.154491424560547, "logits_per_token": -4.767798741658528, "logits_per_char": -0.8413762485279757, "num_chars": 17}, {"sum_logits": -11.942414283752441, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -22.72237777709961, "logits_per_token": -5.971207141876221, "logits_per_char": -0.853029591696603, "num_chars": 14}, {"sum_logits": -8.781108856201172, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.953350067138672, "logits_per_token": -4.390554428100586, "logits_per_char": -0.8781108856201172, "num_chars": 10}, {"sum_logits": -6.597208023071289, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.671432495117188, "logits_per_token": -3.2986040115356445, "logits_per_char": -0.7330231136745877, "num_chars": 9}, {"sum_logits": -10.412715911865234, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.51554012298584, "logits_per_token": -10.412715911865234, "logits_per_char": -0.8677263259887695, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 172, "native_id": "23e4257a49972efd8a97672f060be1c1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.842813014984131, "incorrect_loss_raw": 12.138716101646423, "correct_loss_per_char": 0.7129830013621937, "incorrect_loss_per_char": 1.1691842366487553, "correct_loss_per_token": 3.9214065074920654, "incorrect_loss_per_token": 5.056022584438324, "correct_loss_uncond": -12.384277820587158, "incorrect_loss_uncond": -6.9973384141922}, "model_output": [{"sum_logits": -7.842813014984131, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.22709083557129, "logits_per_token": -3.9214065074920654, "logits_per_char": -0.7129830013621937, "num_chars": 11}, {"sum_logits": -12.255144119262695, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.55963706970215, "logits_per_token": -4.0850480397542315, "logits_per_char": -0.9427033937894381, "num_chars": 13}, {"sum_logits": -12.06490707397461, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -25.284618377685547, "logits_per_token": -4.02163569132487, "logits_per_char": -0.8043271382649739, "num_chars": 15}, {"sum_logits": -7.17445707321167, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.136558532714844, "logits_per_token": -3.587228536605835, "logits_per_char": -0.7971618970235189, "num_chars": 9}, {"sum_logits": -17.06035614013672, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.563404083251953, "logits_per_token": -8.53017807006836, "logits_per_char": -2.13254451751709, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 173, "native_id": "a018d65a74b9e77d81014fd8f6d78f77", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.2203330993652344, "incorrect_loss_raw": 16.078585147857666, "correct_loss_per_char": 0.24771793072040266, "incorrect_loss_per_char": 1.436190550607031, "correct_loss_per_token": 1.6101665496826172, "incorrect_loss_per_token": 9.48816990852356, "correct_loss_uncond": -15.440717697143555, "incorrect_loss_uncond": -4.008232116699219}, "model_output": [{"sum_logits": -13.857572555541992, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.781972885131836, "logits_per_token": -6.928786277770996, "logits_per_char": -1.1547977129618328, "num_chars": 12}, {"sum_logits": -16.52911376953125, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -30.066486358642578, "logits_per_token": -8.264556884765625, "logits_per_char": -1.2714702899639423, "num_chars": 13}, {"sum_logits": -22.33663558959961, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.035186767578125, "logits_per_token": -11.168317794799805, "logits_per_char": -2.0306032354181465, "num_chars": 11}, {"sum_logits": -3.2203330993652344, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.66105079650879, "logits_per_token": -1.6101665496826172, "logits_per_char": -0.24771793072040266, "num_chars": 13}, {"sum_logits": -11.591018676757812, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.463623046875, "logits_per_token": -11.591018676757812, "logits_per_char": -1.2878909640842013, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 174, "native_id": "24ceaf5c10863e73919b5f1b0f2db38e", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.103984832763672, "incorrect_loss_raw": 8.205224752426147, "correct_loss_per_char": 1.1839974721272786, "incorrect_loss_per_char": 1.8310654699802398, "correct_loss_per_token": 7.103984832763672, "incorrect_loss_per_token": 8.205224752426147, "correct_loss_uncond": -6.904879570007324, "incorrect_loss_uncond": -5.0237038135528564}, "model_output": [{"sum_logits": -15.692511558532715, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.484856605529785, "logits_per_token": -15.692511558532715, "logits_per_char": -3.9231278896331787, "num_chars": 4}, {"sum_logits": -6.049921989440918, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.310256958007812, "logits_per_token": -6.049921989440918, "logits_per_char": -1.5124804973602295, "num_chars": 4}, {"sum_logits": -6.717937469482422, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.543754577636719, "logits_per_token": -6.717937469482422, "logits_per_char": -1.3435874938964845, "num_chars": 5}, {"sum_logits": -4.360527992248535, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -4.360527992248535, "logits_per_char": -0.5450659990310669, "num_chars": 8}, {"sum_logits": -7.103984832763672, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.008864402770996, "logits_per_token": -7.103984832763672, "logits_per_char": -1.1839974721272786, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 175, "native_id": "900492bd731f8f615ed7c08155737d44", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.796539306640625, "incorrect_loss_raw": 9.714927434921265, "correct_loss_per_char": 0.48304494222005206, "incorrect_loss_per_char": 0.9227133222988674, "correct_loss_per_token": 2.8982696533203125, "incorrect_loss_per_token": 6.129836479822795, "correct_loss_uncond": -9.672212600708008, "incorrect_loss_uncond": -5.81988263130188}, "model_output": [{"sum_logits": -14.323624610900879, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.28645133972168, "logits_per_token": -4.77454153696696, "logits_per_char": -1.0231160436357771, "num_chars": 14}, {"sum_logits": -9.129283905029297, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.931059837341309, "logits_per_token": -9.129283905029297, "logits_per_char": -1.141160488128662, "num_chars": 8}, {"sum_logits": -9.582561492919922, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.90737533569336, "logits_per_token": -4.791280746459961, "logits_per_char": -0.7985467910766602, "num_chars": 12}, {"sum_logits": -5.824239730834961, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.01435375213623, "logits_per_token": -5.824239730834961, "logits_per_char": -0.7280299663543701, "num_chars": 8}, {"sum_logits": -5.796539306640625, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.468751907348633, "logits_per_token": -2.8982696533203125, "logits_per_char": -0.48304494222005206, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 176, "native_id": "4e3f85dc92eaad4ae6bc6529d62e382c", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.6875152587890625, "incorrect_loss_raw": 11.572192192077637, "correct_loss_per_char": 0.5170468417080966, "incorrect_loss_per_char": 1.3743429462809662, "correct_loss_per_token": 2.8437576293945312, "incorrect_loss_per_token": 8.221877574920654, "correct_loss_uncond": -13.677873611450195, "incorrect_loss_uncond": -6.478695869445801}, "model_output": [{"sum_logits": -11.022492408752441, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.065592765808105, "logits_per_token": -11.022492408752441, "logits_per_char": -2.2044984817504885, "num_chars": 5}, {"sum_logits": -5.6875152587890625, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.365388870239258, "logits_per_token": -2.8437576293945312, "logits_per_char": -0.5170468417080966, "num_chars": 11}, {"sum_logits": -14.420768737792969, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -24.700103759765625, "logits_per_token": -7.210384368896484, "logits_per_char": -1.1092899029071515, "num_chars": 13}, {"sum_logits": -8.463759422302246, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.17293643951416, "logits_per_token": -8.463759422302246, "logits_per_char": -1.0579699277877808, "num_chars": 8}, {"sum_logits": -12.38174819946289, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.26491928100586, "logits_per_token": -6.190874099731445, "logits_per_char": -1.1256134726784446, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 177, "native_id": "fa1f17ca535c7e875f4f58510dc2f430", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 0.9207121133804321, "incorrect_loss_raw": 4.388044148683548, "correct_loss_per_char": 0.13153030191149032, "incorrect_loss_per_char": 0.7604598564761027, "correct_loss_per_token": 0.9207121133804321, "incorrect_loss_per_token": 4.388044148683548, "correct_loss_uncond": -13.406318068504333, "incorrect_loss_uncond": -8.382002085447311}, "model_output": [{"sum_logits": -0.9207121133804321, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -14.327030181884766, "logits_per_token": -0.9207121133804321, "logits_per_char": -0.13153030191149032, "num_chars": 7}, {"sum_logits": -7.0703325271606445, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.74045467376709, "logits_per_token": -7.0703325271606445, "logits_per_char": -1.1783887545267742, "num_chars": 6}, {"sum_logits": -5.409180641174316, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.685454368591309, "logits_per_token": -5.409180641174316, "logits_per_char": -0.9015301068623861, "num_chars": 6}, {"sum_logits": -4.151951313018799, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.327245712280273, "logits_per_token": -4.151951313018799, "logits_per_char": -0.8303902626037598, "num_chars": 5}, {"sum_logits": -0.9207121133804321, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -14.327030181884766, "logits_per_token": -0.9207121133804321, "logits_per_char": -0.13153030191149032, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 178, "native_id": "76b6f0765a3b2fba71021f902142edc0", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.672272682189941, "incorrect_loss_raw": 12.912679672241211, "correct_loss_per_char": 0.852474742465549, "incorrect_loss_per_char": 1.3342182285765298, "correct_loss_per_token": 2.557424227396647, "incorrect_loss_per_token": 9.86726450920105, "correct_loss_uncond": -5.992091178894043, "incorrect_loss_uncond": -2.610851764678955}, "model_output": [{"sum_logits": -13.833008766174316, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.339823722839355, "logits_per_token": -13.833008766174316, "logits_per_char": -1.5370009740193684, "num_chars": 9}, {"sum_logits": -13.454388618469238, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.6738920211792, "logits_per_token": -13.454388618469238, "logits_per_char": -1.4949320687188044, "num_chars": 9}, {"sum_logits": -7.672272682189941, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.664363861083984, "logits_per_token": -2.557424227396647, "logits_per_char": -0.852474742465549, "num_chars": 9}, {"sum_logits": -18.66965675354004, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.555702209472656, "logits_per_token": -9.33482837677002, "logits_per_char": -1.8669656753540038, "num_chars": 10}, {"sum_logits": -5.69366455078125, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.524707794189453, "logits_per_token": -2.846832275390625, "logits_per_char": -0.4379741962139423, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 179, "native_id": "f1368ab1d4ee05d72d555474fcd737d7", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.735559463500977, "incorrect_loss_raw": 9.528146028518677, "correct_loss_per_char": 0.9759599512273615, "incorrect_loss_per_char": 0.7372305525329722, "correct_loss_per_token": 5.367779731750488, "incorrect_loss_per_token": 4.692252953847249, "correct_loss_uncond": -8.73991584777832, "incorrect_loss_uncond": -9.207615613937378}, "model_output": [{"sum_logits": -8.332639694213867, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.096038818359375, "logits_per_token": -4.166319847106934, "logits_per_char": -0.7575126994739879, "num_chars": 11}, {"sum_logits": -10.735559463500977, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.475475311279297, "logits_per_token": -5.367779731750488, "logits_per_char": -0.9759599512273615, "num_chars": 11}, {"sum_logits": -4.1700439453125, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.57154655456543, "logits_per_token": -4.1700439453125, "logits_per_char": -0.5957205636160714, "num_chars": 7}, {"sum_logits": -11.376087188720703, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.895423889160156, "logits_per_token": -5.688043594360352, "logits_per_char": -0.7584058125813802, "num_chars": 15}, {"sum_logits": -14.233813285827637, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.380037307739258, "logits_per_token": -4.744604428609212, "logits_per_char": -0.8372831344604492, "num_chars": 17}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 180, "native_id": "3dee8fc7f0a3fbf4de111b6686fca157", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.9530742168426514, "incorrect_loss_raw": 11.680132061243057, "correct_loss_per_char": 0.09530742168426513, "incorrect_loss_per_char": 1.1980424128827594, "correct_loss_per_token": 0.9530742168426514, "incorrect_loss_per_token": 7.574173003435135, "correct_loss_uncond": -15.261849641799927, "incorrect_loss_uncond": -4.706539005041122}, "model_output": [{"sum_logits": -15.04601764678955, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -18.781972885131836, "logits_per_token": -7.523008823394775, "logits_per_char": -1.2538348038991292, "num_chars": 12}, {"sum_logits": -1.2779737710952759, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.0183687210083, "logits_per_token": -1.2779737710952759, "logits_per_char": -0.2555947542190552, "num_chars": 5}, {"sum_logits": -12.594882011413574, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.554898262023926, "logits_per_token": -12.594882011413574, "logits_per_char": -1.7992688587733678, "num_chars": 7}, {"sum_logits": -0.9530742168426514, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": true, "sum_logits_uncond": -16.214923858642578, "logits_per_token": -0.9530742168426514, "logits_per_char": -0.09530742168426513, "num_chars": 10}, {"sum_logits": -17.801654815673828, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -21.191444396972656, "logits_per_token": -8.900827407836914, "logits_per_char": -1.4834712346394856, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 181, "native_id": "ea0e7771afd86a59fd9f7764b77e3fa4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.755577087402344, "incorrect_loss_raw": 10.632115483283997, "correct_loss_per_char": 1.094447135925293, "incorrect_loss_per_char": 0.9890853358827301, "correct_loss_per_token": 4.377788543701172, "incorrect_loss_per_token": 7.102785587310791, "correct_loss_uncond": -8.066638946533203, "incorrect_loss_uncond": -4.591464877128601}, "model_output": [{"sum_logits": -17.5218563079834, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.838695526123047, "logits_per_token": -8.7609281539917, "logits_per_char": -0.9222029635780736, "num_chars": 19}, {"sum_logits": -7.233476638793945, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.520729064941406, "logits_per_token": -7.233476638793945, "logits_per_char": -1.0333538055419922, "num_chars": 7}, {"sum_logits": -8.755577087402344, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.822216033935547, "logits_per_token": -4.377788543701172, "logits_per_char": -1.094447135925293, "num_chars": 8}, {"sum_logits": -7.0603461265563965, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.641080856323242, "logits_per_token": -7.0603461265563965, "logits_per_char": -1.1767243544260662, "num_chars": 6}, {"sum_logits": -10.712782859802246, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.893815994262695, "logits_per_token": -5.356391429901123, "logits_per_char": -0.8240602199847882, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 182, "native_id": "2c845646032bbf27fb3904330d59d324", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.963132858276367, "incorrect_loss_raw": 12.226229190826416, "correct_loss_per_char": 0.7469277381896973, "incorrect_loss_per_char": 1.1931614070379433, "correct_loss_per_token": 4.481566429138184, "incorrect_loss_per_token": 5.213301817576091, "correct_loss_uncond": -11.15312385559082, "incorrect_loss_uncond": -6.2428789138793945}, "model_output": [{"sum_logits": -9.395891189575195, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.68304443359375, "logits_per_token": -4.697945594787598, "logits_per_char": -1.3422701699393136, "num_chars": 7}, {"sum_logits": -14.226522445678711, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -21.783588409423828, "logits_per_token": -7.1132612228393555, "logits_per_char": -1.422652244567871, "num_chars": 10}, {"sum_logits": -21.59550666809082, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.653310775756836, "logits_per_token": -7.19850222269694, "logits_per_char": -1.2703239216524012, "num_chars": 17}, {"sum_logits": -3.6869964599609375, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.756488800048828, "logits_per_token": -1.8434982299804688, "logits_per_char": -0.7373992919921875, "num_chars": 5}, {"sum_logits": -8.963132858276367, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.116256713867188, "logits_per_token": -4.481566429138184, "logits_per_char": -0.7469277381896973, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 183, "native_id": "bc08c354e5bead6863ea4a29cb8fa359", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.374494552612305, "incorrect_loss_raw": 14.797613143920898, "correct_loss_per_char": 0.3749702678007238, "incorrect_loss_per_char": 1.1017892858484288, "correct_loss_per_token": 2.124831517537435, "incorrect_loss_per_token": 5.896248499552409, "correct_loss_uncond": -14.347551345825195, "incorrect_loss_uncond": -4.2353315353393555}, "model_output": [{"sum_logits": -16.342519760131836, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.23810577392578, "logits_per_token": -5.447506586710612, "logits_per_char": -1.1673228400094169, "num_chars": 14}, {"sum_logits": -10.221216201782227, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.840572357177734, "logits_per_token": -5.110608100891113, "logits_per_char": -0.7300868715558734, "num_chars": 14}, {"sum_logits": -19.718873977661133, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.390138626098633, "logits_per_token": -6.572957992553711, "logits_per_char": -1.5168364598200872, "num_chars": 13}, {"sum_logits": -12.907842636108398, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.662961959838867, "logits_per_token": -6.453921318054199, "logits_per_char": -0.9929109720083383, "num_chars": 13}, {"sum_logits": -6.374494552612305, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.7220458984375, "logits_per_token": -2.124831517537435, "logits_per_char": -0.3749702678007238, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 184, "native_id": "fb35c7aa5694bab2cde4b7257bfae003", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.148746013641357, "incorrect_loss_raw": 10.186453819274902, "correct_loss_per_char": 0.4680678194219416, "incorrect_loss_per_char": 1.2988336516751182, "correct_loss_per_token": 5.148746013641357, "incorrect_loss_per_token": 6.6451577345530195, "correct_loss_uncond": -7.582973957061768, "incorrect_loss_uncond": -3.5401248931884766}, "model_output": [{"sum_logits": -10.18957233428955, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.922264099121094, "logits_per_token": -10.18957233428955, "logits_per_char": -1.2736965417861938, "num_chars": 8}, {"sum_logits": -6.3693742752075195, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.7080659866333, "logits_per_token": -6.3693742752075195, "logits_per_char": -1.0615623792012532, "num_chars": 6}, {"sum_logits": -5.148746013641357, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.731719970703125, "logits_per_token": -5.148746013641357, "logits_per_char": -0.4680678194219416, "num_chars": 11}, {"sum_logits": -11.756368637084961, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.46172046661377, "logits_per_token": -5.8781843185424805, "logits_per_char": -1.306263181898329, "num_chars": 9}, {"sum_logits": -12.430500030517578, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.814264297485352, "logits_per_token": -4.143500010172526, "logits_per_char": -1.5538125038146973, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 185, "native_id": "e2a9f0041d17a9944377a91bef5e0d0d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.694215774536133, "incorrect_loss_raw": 9.18582534790039, "correct_loss_per_char": 0.6347107887268066, "incorrect_loss_per_char": 1.2995685710535423, "correct_loss_per_token": 4.231405258178711, "incorrect_loss_per_token": 7.065919280052185, "correct_loss_uncond": -10.373390197753906, "incorrect_loss_uncond": -4.4324424266815186}, "model_output": [{"sum_logits": -5.856655120849609, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.388481140136719, "logits_per_token": -2.9283275604248047, "logits_per_char": -0.7320818901062012, "num_chars": 8}, {"sum_logits": -8.749034881591797, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.31043529510498, "logits_per_token": -8.749034881591797, "logits_per_char": -1.2498621259416853, "num_chars": 7}, {"sum_logits": -12.694215774536133, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -23.06760597229004, "logits_per_token": -4.231405258178711, "logits_per_char": -0.6347107887268066, "num_chars": 20}, {"sum_logits": -11.035017967224121, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.64957046508789, "logits_per_token": -11.035017967224121, "logits_per_char": -2.207003593444824, "num_chars": 5}, {"sum_logits": -11.102593421936035, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.124584197998047, "logits_per_token": -5.551296710968018, "logits_per_char": -1.0093266747214578, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 186, "native_id": "ae56eff01d05422ddbcb26be7181356a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.707286357879639, "incorrect_loss_raw": 10.409498929977417, "correct_loss_per_char": 0.5928681813753568, "incorrect_loss_per_char": 1.0022665489287603, "correct_loss_per_token": 3.8536431789398193, "incorrect_loss_per_token": 9.021039366722107, "correct_loss_uncond": -9.248949527740479, "incorrect_loss_uncond": -4.656791925430298}, "model_output": [{"sum_logits": -9.098108291625977, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.952046394348145, "logits_per_token": -9.098108291625977, "logits_per_char": -1.0109009212917752, "num_chars": 9}, {"sum_logits": -7.707286357879639, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.956235885620117, "logits_per_token": -3.8536431789398193, "logits_per_char": -0.5928681813753568, "num_chars": 13}, {"sum_logits": -12.690661430358887, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.317315101623535, "logits_per_token": -12.690661430358887, "logits_per_char": -1.4100734922620985, "num_chars": 9}, {"sum_logits": -8.741549491882324, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.330537796020508, "logits_per_token": -8.741549491882324, "logits_per_char": -0.7946863174438477, "num_chars": 11}, {"sum_logits": -11.10767650604248, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.665264129638672, "logits_per_token": -5.55383825302124, "logits_per_char": -0.79340546471732, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 187, "native_id": "895aa97bb84d874d71b2aed572cebfdd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.490522384643555, "incorrect_loss_raw": 10.238036870956421, "correct_loss_per_char": 1.2767247094048395, "incorrect_loss_per_char": 1.1450225353240966, "correct_loss_per_token": 5.745261192321777, "incorrect_loss_per_token": 7.158295154571533, "correct_loss_uncond": -3.9785423278808594, "incorrect_loss_uncond": -5.522120714187622}, "model_output": [{"sum_logits": -4.63809871673584, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.484856605529785, "logits_per_token": -4.63809871673584, "logits_per_char": -1.15952467918396, "num_chars": 4}, {"sum_logits": -11.676115036010742, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.794074058532715, "logits_per_token": -11.676115036010742, "logits_per_char": -1.6680164337158203, "num_chars": 7}, {"sum_logits": -14.224886894226074, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.201946258544922, "logits_per_token": -7.112443447113037, "logits_per_char": -0.7112443447113037, "num_chars": 20}, {"sum_logits": -10.413046836853027, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.55975341796875, "logits_per_token": -5.206523418426514, "logits_per_char": -1.0413046836853028, "num_chars": 10}, {"sum_logits": -11.490522384643555, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.469064712524414, "logits_per_token": -5.745261192321777, "logits_per_char": -1.2767247094048395, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 188, "native_id": "9d625e948e9c3777e7cc54ed8ffea135", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.669084548950195, "incorrect_loss_raw": 12.779544591903687, "correct_loss_per_char": 0.6043177843093872, "incorrect_loss_per_char": 1.8068632179782502, "correct_loss_per_token": 4.834542274475098, "incorrect_loss_per_token": 12.779544591903687, "correct_loss_uncond": -12.263931274414062, "incorrect_loss_uncond": -1.1354146003723145}, "model_output": [{"sum_logits": -9.669084548950195, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -21.933015823364258, "logits_per_token": -4.834542274475098, "logits_per_char": -0.6043177843093872, "num_chars": 16}, {"sum_logits": -16.088830947875977, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.363737106323242, "logits_per_token": -16.088830947875977, "logits_per_char": -2.2984044211251393, "num_chars": 7}, {"sum_logits": -12.35171890258789, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.260344505310059, "logits_per_token": -12.35171890258789, "logits_per_char": -2.4703437805175783, "num_chars": 5}, {"sum_logits": -13.653655052185059, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.878219604492188, "logits_per_token": -13.653655052185059, "logits_per_char": -1.7067068815231323, "num_chars": 8}, {"sum_logits": -9.02397346496582, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -9.02397346496582, "logits_per_char": -0.7519977887471517, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 189, "native_id": "d107d67d525a686fbd8282314d2ea33c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.546212673187256, "incorrect_loss_raw": 18.68369436264038, "correct_loss_per_char": 0.7092425346374511, "incorrect_loss_per_char": 1.6734273225542098, "correct_loss_per_token": 3.546212673187256, "incorrect_loss_per_token": 12.785658836364746, "correct_loss_uncond": -10.322938442230225, "incorrect_loss_uncond": 1.4656782150268555}, "model_output": [{"sum_logits": -3.546212673187256, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.86915111541748, "logits_per_token": -3.546212673187256, "logits_per_char": -0.7092425346374511, "num_chars": 5}, {"sum_logits": -24.24374008178711, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.019716262817383, "logits_per_token": -12.121870040893555, "logits_per_char": -1.7316957201276506, "num_chars": 14}, {"sum_logits": -22.94054412841797, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.372713088989258, "logits_per_token": -11.470272064208984, "logits_per_char": -1.2744746738009982, "num_chars": 18}, {"sum_logits": -13.90176773071289, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.241378784179688, "logits_per_token": -13.90176773071289, "logits_per_char": -1.7377209663391113, "num_chars": 8}, {"sum_logits": -13.648725509643555, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.238256454467773, "logits_per_token": -13.648725509643555, "logits_per_char": -1.9498179299490792, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 190, "native_id": "fee5ff19811750ad019665af7b36b3c4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.8055474758148193, "incorrect_loss_raw": 13.00758409500122, "correct_loss_per_char": 0.36110949516296387, "incorrect_loss_per_char": 1.6385622541109721, "correct_loss_per_token": 1.8055474758148193, "incorrect_loss_per_token": 9.117424964904785, "correct_loss_uncond": -11.936767816543579, "incorrect_loss_uncond": -3.358874559402466}, "model_output": [{"sum_logits": -1.8055474758148193, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": true, "sum_logits_uncond": -13.742315292358398, "logits_per_token": -1.8055474758148193, "logits_per_char": -0.36110949516296387, "num_chars": 5}, {"sum_logits": -6.782768249511719, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.047143936157227, "logits_per_token": -6.782768249511719, "logits_per_char": -1.3565536499023438, "num_chars": 5}, {"sum_logits": -14.12629508972168, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.560240745544434, "logits_per_token": -14.12629508972168, "logits_per_char": -1.76578688621521, "num_chars": 8}, {"sum_logits": -13.571569442749023, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.619625091552734, "logits_per_token": -6.785784721374512, "logits_per_char": -2.2619282404581704, "num_chars": 6}, {"sum_logits": -17.54970359802246, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -21.23882484436035, "logits_per_token": -8.77485179901123, "logits_per_char": -1.169980239868164, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 191, "native_id": "e69da59cbcf2a302e4523571eba8186b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.8661651611328125, "incorrect_loss_raw": 8.169944405555725, "correct_loss_per_char": 0.5618689400809151, "incorrect_loss_per_char": 0.9046649561987984, "correct_loss_per_token": 7.8661651611328125, "incorrect_loss_per_token": 8.169944405555725, "correct_loss_uncond": -7.357640266418457, "incorrect_loss_uncond": -5.119355797767639}, "model_output": [{"sum_logits": -5.919878959655762, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.412413597106934, "logits_per_token": -5.919878959655762, "logits_per_char": -0.5919878959655762, "num_chars": 10}, {"sum_logits": -10.33043098449707, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -11.725833892822266, "logits_per_token": -10.33043098449707, "logits_per_char": -1.147825664944119, "num_chars": 9}, {"sum_logits": -5.504321575164795, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -5.504321575164795, "logits_per_char": -0.7863316535949707, "num_chars": 7}, {"sum_logits": -10.925146102905273, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.455964088439941, "logits_per_token": -10.925146102905273, "logits_per_char": -1.0925146102905274, "num_chars": 10}, {"sum_logits": -7.8661651611328125, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.22380542755127, "logits_per_token": -7.8661651611328125, "logits_per_char": -0.5618689400809151, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 192, "native_id": "2dd138a63b5895cf737ced793cc668e7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.464212417602539, "incorrect_loss_raw": 9.468988418579102, "correct_loss_per_char": 0.4454848640843442, "incorrect_loss_per_char": 0.767155953248342, "correct_loss_per_token": 2.821404139200846, "incorrect_loss_per_token": 4.020651737848918, "correct_loss_uncond": -12.45859146118164, "incorrect_loss_uncond": -8.897171020507812}, "model_output": [{"sum_logits": -4.058828353881836, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.87533950805664, "logits_per_token": -2.029414176940918, "logits_per_char": -0.5073535442352295, "num_chars": 8}, {"sum_logits": -8.727890014648438, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.982330322265625, "logits_per_token": -4.363945007324219, "logits_per_char": -0.6234207153320312, "num_chars": 14}, {"sum_logits": -17.132219314575195, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.469581604003906, "logits_per_token": -5.710739771525065, "logits_per_char": -1.142147954305013, "num_chars": 15}, {"sum_logits": -7.9570159912109375, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.137386322021484, "logits_per_token": -3.9785079956054688, "logits_per_char": -0.7957015991210937, "num_chars": 10}, {"sum_logits": -8.464212417602539, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.92280387878418, "logits_per_token": -2.821404139200846, "logits_per_char": -0.4454848640843442, "num_chars": 19}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 193, "native_id": "b33047f46db680a9b630c13e8ca115cc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.891401767730713, "incorrect_loss_raw": 4.7373155653476715, "correct_loss_per_char": 0.5742834806442261, "incorrect_loss_per_char": 0.8412453519801298, "correct_loss_per_token": 3.4457008838653564, "incorrect_loss_per_token": 4.0280642211437225, "correct_loss_uncond": -9.393781185150146, "incorrect_loss_uncond": -7.651124328374863}, "model_output": [{"sum_logits": -6.869973182678223, "num_tokens": 1, "num_tokens_all": 162, "is_greedy": false, "sum_logits_uncond": -10.994718551635742, "logits_per_token": -6.869973182678223, "logits_per_char": -1.7174932956695557, "num_chars": 4}, {"sum_logits": -5.1931843757629395, "num_tokens": 1, "num_tokens_all": 162, "is_greedy": false, "sum_logits_uncond": -11.102972984313965, "logits_per_token": -5.1931843757629395, "logits_per_char": -0.8655307292938232, "num_chars": 6}, {"sum_logits": -1.2120939493179321, "num_tokens": 1, "num_tokens_all": 162, "is_greedy": true, "sum_logits_uncond": -12.16602897644043, "logits_per_token": -1.2120939493179321, "logits_per_char": -0.15151174366474152, "num_chars": 8}, {"sum_logits": -5.674010753631592, "num_tokens": 2, "num_tokens_all": 163, "is_greedy": false, "sum_logits_uncond": -15.2900390625, "logits_per_token": -2.837005376815796, "logits_per_char": -0.630445639292399, "num_chars": 9}, {"sum_logits": -6.891401767730713, "num_tokens": 2, "num_tokens_all": 163, "is_greedy": false, "sum_logits_uncond": -16.28518295288086, "logits_per_token": -3.4457008838653564, "logits_per_char": -0.5742834806442261, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 194, "native_id": "f20d40bc4af588223e880e0bb58b27b8", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.849234104156494, "incorrect_loss_raw": 14.510250568389893, "correct_loss_per_char": 0.3207695086797078, "incorrect_loss_per_char": 1.3792665524916217, "correct_loss_per_token": 1.924617052078247, "incorrect_loss_per_token": 7.255125284194946, "correct_loss_uncond": -15.090394496917725, "incorrect_loss_uncond": -3.5495619773864746}, "model_output": [{"sum_logits": -15.127052307128906, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.68354034423828, "logits_per_token": -7.563526153564453, "logits_per_char": -1.3751865733753552, "num_chars": 11}, {"sum_logits": -12.196781158447266, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.455045700073242, "logits_per_token": -6.098390579223633, "logits_per_char": -1.2196781158447265, "num_chars": 10}, {"sum_logits": -3.849234104156494, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.93962860107422, "logits_per_token": -1.924617052078247, "logits_per_char": -0.3207695086797078, "num_chars": 12}, {"sum_logits": -16.44668960571289, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.95878791809082, "logits_per_token": -8.223344802856445, "logits_per_char": -1.4951536005193538, "num_chars": 11}, {"sum_logits": -14.270479202270508, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.141876220703125, "logits_per_token": -7.135239601135254, "logits_per_char": -1.4270479202270507, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 195, "native_id": "b6b66d4519a84b8331ea55f84767e9df", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.797572374343872, "incorrect_loss_raw": 18.000261545181274, "correct_loss_per_char": 0.19982659816741943, "incorrect_loss_per_char": 1.7182270927862686, "correct_loss_per_token": 1.398786187171936, "incorrect_loss_per_token": 7.405469655990601, "correct_loss_uncond": -13.145992040634155, "incorrect_loss_uncond": -5.729529619216919}, "model_output": [{"sum_logits": -2.797572374343872, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.943564414978027, "logits_per_token": -1.398786187171936, "logits_per_char": -0.19982659816741943, "num_chars": 14}, {"sum_logits": -12.254526138305664, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.380178451538086, "logits_per_token": -6.127263069152832, "logits_per_char": -1.1140478307550603, "num_chars": 11}, {"sum_logits": -25.324378967285156, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -27.85953140258789, "logits_per_token": -8.441459655761719, "logits_per_char": -2.5324378967285157, "num_chars": 10}, {"sum_logits": -12.947487831115723, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.58068084716797, "logits_per_token": -4.315829277038574, "logits_per_char": -1.0789573192596436, "num_chars": 12}, {"sum_logits": -21.474653244018555, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -26.098773956298828, "logits_per_token": -10.737326622009277, "logits_per_char": -2.1474653244018556, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 196, "native_id": "952cf4b2f7a434b2eeae9f4c7ed89c0a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.2450413703918457, "incorrect_loss_raw": 10.715973615646362, "correct_loss_per_char": 0.4635773386274065, "incorrect_loss_per_char": 1.97449449300766, "correct_loss_per_token": 3.2450413703918457, "incorrect_loss_per_token": 10.715973615646362, "correct_loss_uncond": -8.040570735931396, "incorrect_loss_uncond": -2.396986246109009}, "model_output": [{"sum_logits": -7.596083641052246, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -11.028870582580566, "logits_per_token": -7.596083641052246, "logits_per_char": -1.8990209102630615, "num_chars": 4}, {"sum_logits": -8.942268371582031, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.807665824890137, "logits_per_token": -8.942268371582031, "logits_per_char": -1.7884536743164063, "num_chars": 5}, {"sum_logits": -12.264141082763672, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -12.264141082763672, "logits_per_char": -2.452828216552734, "num_chars": 5}, {"sum_logits": -14.0614013671875, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.78718090057373, "logits_per_token": -14.0614013671875, "logits_per_char": -1.7576751708984375, "num_chars": 8}, {"sum_logits": -3.2450413703918457, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -11.285612106323242, "logits_per_token": -3.2450413703918457, "logits_per_char": -0.4635773386274065, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 197, "native_id": "b63e5cd88bfe75d29ff9fdc6dd97fed6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.274229526519775, "incorrect_loss_raw": 7.847292900085449, "correct_loss_per_char": 0.6274229526519776, "incorrect_loss_per_char": 0.970635381519285, "correct_loss_per_token": 3.1371147632598877, "incorrect_loss_per_token": 5.41522753238678, "correct_loss_uncond": -9.180816173553467, "incorrect_loss_uncond": -7.827578544616699}, "model_output": [{"sum_logits": -6.274229526519775, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.455045700073242, "logits_per_token": -3.1371147632598877, "logits_per_char": -0.6274229526519776, "num_chars": 10}, {"sum_logits": -7.9539337158203125, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.599926948547363, "logits_per_token": -7.9539337158203125, "logits_per_char": -1.3256556193033855, "num_chars": 6}, {"sum_logits": -7.73625373840332, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.376557350158691, "logits_per_token": -3.86812686920166, "logits_per_char": -0.85958374871148, "num_chars": 9}, {"sum_logits": -3.978714942932129, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.596395492553711, "logits_per_token": -3.978714942932129, "logits_per_char": -0.7957429885864258, "num_chars": 5}, {"sum_logits": -11.720269203186035, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.126605987548828, "logits_per_token": -5.860134601593018, "logits_per_char": -0.9015591694758489, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 198, "native_id": "ec5a336080e37fbe95d72ad5f9c65ba7", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.423352241516113, "incorrect_loss_raw": 14.318739414215088, "correct_loss_per_char": 1.2372253735860188, "incorrect_loss_per_char": 1.2478352397680283, "correct_loss_per_token": 7.423352241516113, "incorrect_loss_per_token": 10.406453251838684, "correct_loss_uncond": -8.850050926208496, "incorrect_loss_uncond": -2.852536678314209}, "model_output": [{"sum_logits": -11.749120712280273, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.294889450073242, "logits_per_token": -11.749120712280273, "logits_per_char": -1.4686400890350342, "num_chars": 8}, {"sum_logits": -14.227547645568848, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.513456344604492, "logits_per_token": -14.227547645568848, "logits_per_char": -1.778443455696106, "num_chars": 8}, {"sum_logits": -7.423352241516113, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.27340316772461, "logits_per_token": -7.423352241516113, "logits_per_char": -1.2372253735860188, "num_chars": 6}, {"sum_logits": -16.95085334777832, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.30907440185547, "logits_per_token": -8.47542667388916, "logits_per_char": -0.847542667388916, "num_chars": 20}, {"sum_logits": -14.34743595123291, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.567684173583984, "logits_per_token": -7.173717975616455, "logits_per_char": -0.8967147469520569, "num_chars": 16}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 199, "native_id": "6386bcf080633bc3eeb3317a5435b7b7", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.252244472503662, "incorrect_loss_raw": 9.9577157497406, "correct_loss_per_char": 0.8931777817862374, "incorrect_loss_per_char": 1.7233279168605804, "correct_loss_per_token": 6.252244472503662, "incorrect_loss_per_token": 9.9577157497406, "correct_loss_uncond": -6.312474727630615, "incorrect_loss_uncond": -3.722684621810913}, "model_output": [{"sum_logits": -10.460766792297363, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.373515129089355, "logits_per_token": -10.460766792297363, "logits_per_char": -2.0921533584594725, "num_chars": 5}, {"sum_logits": -8.717879295349121, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.998351097106934, "logits_per_token": -8.717879295349121, "logits_per_char": -1.0897349119186401, "num_chars": 8}, {"sum_logits": -12.570599555969238, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.926148414611816, "logits_per_token": -12.570599555969238, "logits_per_char": -2.095099925994873, "num_chars": 6}, {"sum_logits": -8.08161735534668, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.42358684539795, "logits_per_token": -8.08161735534668, "logits_per_char": -1.616323471069336, "num_chars": 5}, {"sum_logits": -6.252244472503662, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.564719200134277, "logits_per_token": -6.252244472503662, "logits_per_char": -0.8931777817862374, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 200, "native_id": "43ab0ff711e60d51f943bbd2cdd6515a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.137158393859863, "incorrect_loss_raw": 8.094597578048706, "correct_loss_per_char": 0.38357239961624146, "incorrect_loss_per_char": 1.061596307532612, "correct_loss_per_token": 3.0685791969299316, "incorrect_loss_per_token": 8.094597578048706, "correct_loss_uncond": -14.400984764099121, "incorrect_loss_uncond": -5.597669363021851}, "model_output": [{"sum_logits": -7.954168319702148, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.998748779296875, "logits_per_token": -7.954168319702148, "logits_per_char": -1.1363097599574499, "num_chars": 7}, {"sum_logits": -8.177745819091797, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.77853012084961, "logits_per_token": -8.177745819091797, "logits_per_char": -1.3629576365152996, "num_chars": 6}, {"sum_logits": -8.321622848510742, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.968234062194824, "logits_per_token": -8.321622848510742, "logits_per_char": -0.7565111680464311, "num_chars": 11}, {"sum_logits": -7.924853324890137, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.023554801940918, "logits_per_token": -7.924853324890137, "logits_per_char": -0.9906066656112671, "num_chars": 8}, {"sum_logits": -6.137158393859863, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.538143157958984, "logits_per_token": -3.0685791969299316, "logits_per_char": -0.38357239961624146, "num_chars": 16}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 201, "native_id": "11c4c78d61e8212f0984fd07eb22b669", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.169838905334473, "incorrect_loss_raw": 10.556260585784912, "correct_loss_per_char": 0.8814055579049247, "incorrect_loss_per_char": 1.13554469239889, "correct_loss_per_token": 6.169838905334473, "incorrect_loss_per_token": 6.914002418518066, "correct_loss_uncond": -9.83191967010498, "incorrect_loss_uncond": -5.3818089962005615}, "model_output": [{"sum_logits": -6.169838905334473, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.001758575439453, "logits_per_token": -6.169838905334473, "logits_per_char": -0.8814055579049247, "num_chars": 7}, {"sum_logits": -7.07429313659668, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.693686485290527, "logits_per_token": -7.07429313659668, "logits_per_char": -1.414858627319336, "num_chars": 5}, {"sum_logits": -12.022390365600586, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.00240707397461, "logits_per_token": -4.007463455200195, "logits_per_char": -0.9247992588923528, "num_chars": 13}, {"sum_logits": -10.020147323608398, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.470205307006836, "logits_per_token": -10.020147323608398, "logits_per_char": -1.4314496176583427, "num_chars": 7}, {"sum_logits": -13.108211517333984, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.585979461669922, "logits_per_token": -6.554105758666992, "logits_per_char": -0.7710712657255285, "num_chars": 17}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 202, "native_id": "e61891746aa94ab57aaa754614034aef", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 10.262673377990723, "incorrect_loss_raw": 10.928531050682068, "correct_loss_per_char": 0.5131336688995362, "incorrect_loss_per_char": 1.07407079191951, "correct_loss_per_token": 2.5656683444976807, "incorrect_loss_per_token": 9.123928904533386, "correct_loss_uncond": -5.618775367736816, "incorrect_loss_uncond": -4.932490706443787}, "model_output": [{"sum_logits": -14.436817169189453, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.52792739868164, "logits_per_token": -7.218408584594727, "logits_per_char": -0.9023010730743408, "num_chars": 16}, {"sum_logits": -7.208450794219971, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.585420608520508, "logits_per_token": -7.208450794219971, "logits_per_char": -1.0297786848885673, "num_chars": 7}, {"sum_logits": -10.262673377990723, "num_tokens": 4, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.881448745727539, "logits_per_token": -2.5656683444976807, "logits_per_char": -0.5131336688995362, "num_chars": 20}, {"sum_logits": -11.569172859191895, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.20680046081543, "logits_per_token": -11.569172859191895, "logits_per_char": -1.0517429871992632, "num_chars": 11}, {"sum_logits": -10.499683380126953, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.12393856048584, "logits_per_token": -10.499683380126953, "logits_per_char": -1.3124604225158691, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 203, "native_id": "97da9aa4ea4b22744ec51cba49f35bfc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.71394681930542, "incorrect_loss_raw": 9.341336131095886, "correct_loss_per_char": 0.742789363861084, "incorrect_loss_per_char": 1.7269849947520663, "correct_loss_per_token": 3.71394681930542, "incorrect_loss_per_token": 9.341336131095886, "correct_loss_uncond": -10.112477779388428, "incorrect_loss_uncond": -3.619361996650696}, "model_output": [{"sum_logits": -10.23135757446289, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.126147270202637, "logits_per_token": -10.23135757446289, "logits_per_char": -2.5578393936157227, "num_chars": 4}, {"sum_logits": -8.429046630859375, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.77853012084961, "logits_per_token": -8.429046630859375, "logits_per_char": -1.4048411051432292, "num_chars": 6}, {"sum_logits": -3.71394681930542, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.826424598693848, "logits_per_token": -3.71394681930542, "logits_per_char": -0.742789363861084, "num_chars": 5}, {"sum_logits": -11.471256256103516, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -11.972633361816406, "logits_per_token": -11.471256256103516, "logits_per_char": -1.9118760426839192, "num_chars": 6}, {"sum_logits": -7.233684062957764, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.965481758117676, "logits_per_token": -7.233684062957764, "logits_per_char": -1.0333834375653947, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 204, "native_id": "46241bc83e8d81196ae5783b2b9854a4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.163826942443848, "incorrect_loss_raw": 17.229043841362, "correct_loss_per_char": 1.1058024493130771, "incorrect_loss_per_char": 1.682441547130927, "correct_loss_per_token": 6.081913471221924, "incorrect_loss_per_token": 9.56911313533783, "correct_loss_uncond": -10.334113121032715, "incorrect_loss_uncond": -1.5109912157058716}, "model_output": [{"sum_logits": -16.657718658447266, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.773391723632812, "logits_per_token": -8.328859329223633, "logits_per_char": -1.3881432215372722, "num_chars": 12}, {"sum_logits": -20.773983001708984, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.405588150024414, "logits_per_token": -10.386991500854492, "logits_per_char": -1.5979986924391527, "num_chars": 13}, {"sum_logits": -7.636729717254639, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.619281768798828, "logits_per_token": -7.636729717254639, "logits_per_char": -1.9091824293136597, "num_chars": 4}, {"sum_logits": -23.84774398803711, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.16187858581543, "logits_per_token": -11.923871994018555, "logits_per_char": -1.8344418452336237, "num_chars": 13}, {"sum_logits": -12.163826942443848, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.497940063476562, "logits_per_token": -6.081913471221924, "logits_per_char": -1.1058024493130771, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 205, "native_id": "18844d3aa4e52b331b5382c8244cf4db", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.580594539642334, "incorrect_loss_raw": 15.51499080657959, "correct_loss_per_char": 0.4292765030494103, "incorrect_loss_per_char": 1.2175934294350126, "correct_loss_per_token": 2.790297269821167, "incorrect_loss_per_token": 7.0138624509175616, "correct_loss_uncond": -12.580854892730713, "incorrect_loss_uncond": -4.022387981414795}, "model_output": [{"sum_logits": -5.580594539642334, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -18.161449432373047, "logits_per_token": -2.790297269821167, "logits_per_char": -0.4292765030494103, "num_chars": 13}, {"sum_logits": -14.526481628417969, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -18.38909149169922, "logits_per_token": -7.263240814208984, "logits_per_char": -1.4526481628417969, "num_chars": 10}, {"sum_logits": -13.161155700683594, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -18.41927146911621, "logits_per_token": -6.580577850341797, "logits_per_char": -1.0123965923602765, "num_chars": 13}, {"sum_logits": -17.847190856933594, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.38075828552246, "logits_per_token": -5.949063618977864, "logits_per_char": -1.487265904744466, "num_chars": 12}, {"sum_logits": -16.525135040283203, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.96039390563965, "logits_per_token": -8.262567520141602, "logits_per_char": -0.9180630577935113, "num_chars": 18}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 206, "native_id": "056b33c7050c167b0d4348d40d169358", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.435391902923584, "incorrect_loss_raw": 10.081616401672363, "correct_loss_per_char": 0.7392319838205973, "incorrect_loss_per_char": 1.1296896571204775, "correct_loss_per_token": 4.435391902923584, "incorrect_loss_per_token": 6.309333841005961, "correct_loss_uncond": -8.205688953399658, "incorrect_loss_uncond": -7.858960390090942}, "model_output": [{"sum_logits": -7.667721748352051, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.3165283203125, "logits_per_token": -3.8338608741760254, "logits_per_char": -0.8519690831502279, "num_chars": 9}, {"sum_logits": -4.435391902923584, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.641080856323242, "logits_per_token": -4.435391902923584, "logits_per_char": -0.7392319838205973, "num_chars": 6}, {"sum_logits": -6.263541221618652, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -6.263541221618652, "logits_per_char": -1.2527082443237305, "num_chars": 5}, {"sum_logits": -9.512298583984375, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.53508186340332, "logits_per_token": -9.512298583984375, "logits_per_char": -1.3588997977120536, "num_chars": 7}, {"sum_logits": -16.882904052734375, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -25.08257484436035, "logits_per_token": -5.627634684244792, "logits_per_char": -1.0551815032958984, "num_chars": 16}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 207, "native_id": "31d7dd1d00aabe411568df3e72d5b5e0", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.148486137390137, "incorrect_loss_raw": 10.011012077331543, "correct_loss_per_char": 0.794276237487793, "incorrect_loss_per_char": 1.4395441278234706, "correct_loss_per_token": 7.148486137390137, "incorrect_loss_per_token": 6.328765630722046, "correct_loss_uncond": -6.8271379470825195, "incorrect_loss_uncond": -5.689363956451416}, "model_output": [{"sum_logits": -10.920919418334961, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.818208694458008, "logits_per_token": -5.4604597091674805, "logits_per_char": -0.9928108562122692, "num_chars": 11}, {"sum_logits": -10.586076736450195, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.245282173156738, "logits_per_token": -10.586076736450195, "logits_per_char": -2.1172153472900392, "num_chars": 5}, {"sum_logits": -12.61715316772461, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.977118492126465, "logits_per_token": -6.308576583862305, "logits_per_char": -1.802450452532087, "num_chars": 7}, {"sum_logits": -7.148486137390137, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.975624084472656, "logits_per_token": -7.148486137390137, "logits_per_char": -0.794276237487793, "num_chars": 9}, {"sum_logits": -5.919898986816406, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.760894775390625, "logits_per_token": -2.959949493408203, "logits_per_char": -0.8456998552594867, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 208, "native_id": "cbf3dd48b4d591fc872a53cd4b9dd3af", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.677254676818848, "incorrect_loss_raw": 17.1464102268219, "correct_loss_per_char": 0.4451503117879232, "incorrect_loss_per_char": 1.5164475392072627, "correct_loss_per_token": 3.338627338409424, "incorrect_loss_per_token": 7.934636783599854, "correct_loss_uncond": -14.224387168884277, "incorrect_loss_uncond": -6.117231845855713}, "model_output": [{"sum_logits": -14.162313461303711, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.145263671875, "logits_per_token": -7.0811567306518555, "logits_per_char": -1.0894087277925932, "num_chars": 13}, {"sum_logits": -26.94200897216797, "num_tokens": 5, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -36.96610641479492, "logits_per_token": -5.388401794433594, "logits_per_char": -1.1225837071736653, "num_chars": 24}, {"sum_logits": -6.677254676818848, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.901641845703125, "logits_per_token": -3.338627338409424, "logits_per_char": -0.4451503117879232, "num_chars": 15}, {"sum_logits": -16.424659729003906, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.115076065063477, "logits_per_token": -8.212329864501953, "logits_per_char": -1.6424659729003905, "num_chars": 10}, {"sum_logits": -11.056658744812012, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -11.056658744812012, "logits_per_char": -2.2113317489624023, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 209, "native_id": "60e8f1a86d4063895f340cd1e3c55f50", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 10.161947250366211, "incorrect_loss_raw": 12.531329989433289, "correct_loss_per_char": 0.7816882500281701, "incorrect_loss_per_char": 0.8999823695137388, "correct_loss_per_token": 5.0809736251831055, "incorrect_loss_per_token": 7.621115326881409, "correct_loss_uncond": -8.106891632080078, "incorrect_loss_uncond": -5.464595437049866}, "model_output": [{"sum_logits": -12.904565811157227, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.996978759765625, "logits_per_token": -6.452282905578613, "logits_per_char": -0.8603043874104818, "num_chars": 15}, {"sum_logits": -11.90770149230957, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.220017433166504, "logits_per_token": -11.90770149230957, "logits_per_char": -0.8505501065935407, "num_chars": 14}, {"sum_logits": -10.161947250366211, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.26883888244629, "logits_per_token": -5.0809736251831055, "logits_per_char": -0.7816882500281701, "num_chars": 13}, {"sum_logits": -19.78286361694336, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -28.14845085144043, "logits_per_token": -6.594287872314453, "logits_per_char": -1.0990479787190754, "num_chars": 18}, {"sum_logits": -5.530189037322998, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.618254661560059, "logits_per_token": -5.530189037322998, "logits_per_char": -0.7900270053318569, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 210, "native_id": "eee8cb7a0d806a62d2de24831f82e3e1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.918236255645752, "incorrect_loss_raw": 11.789433479309082, "correct_loss_per_char": 0.5380214777859774, "incorrect_loss_per_char": 1.2730590848609655, "correct_loss_per_token": 5.918236255645752, "incorrect_loss_per_token": 11.789433479309082, "correct_loss_uncond": -8.8330979347229, "incorrect_loss_uncond": -2.35274076461792}, "model_output": [{"sum_logits": -5.918236255645752, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.751334190368652, "logits_per_token": -5.918236255645752, "logits_per_char": -0.5380214777859774, "num_chars": 11}, {"sum_logits": -10.858805656433105, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.959312438964844, "logits_per_token": -10.858805656433105, "logits_per_char": -1.2065339618259006, "num_chars": 9}, {"sum_logits": -8.88048267364502, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.379131317138672, "logits_per_token": -8.88048267364502, "logits_per_char": -0.9867202970716689, "num_chars": 9}, {"sum_logits": -11.920952796936035, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.990349769592285, "logits_per_token": -11.920952796936035, "logits_per_char": -1.4901190996170044, "num_chars": 8}, {"sum_logits": -15.497492790222168, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.239903450012207, "logits_per_token": -15.497492790222168, "logits_per_char": -1.408862980929288, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 211, "native_id": "9a23a7f04e63bf9f4c7dfe50c58abfd2", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.189241409301758, "incorrect_loss_raw": 8.627453565597534, "correct_loss_per_char": 0.8986551761627197, "incorrect_loss_per_char": 1.3851367990175882, "correct_loss_per_token": 7.189241409301758, "incorrect_loss_per_token": 8.627453565597534, "correct_loss_uncond": -5.843804359436035, "incorrect_loss_uncond": -4.687921762466431}, "model_output": [{"sum_logits": -10.285850524902344, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.349469184875488, "logits_per_token": -10.285850524902344, "logits_per_char": -1.7143084208170574, "num_chars": 6}, {"sum_logits": -9.60682201385498, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.687784194946289, "logits_per_token": -9.60682201385498, "logits_per_char": -1.3724031448364258, "num_chars": 7}, {"sum_logits": -7.189241409301758, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.033045768737793, "logits_per_token": -7.189241409301758, "logits_per_char": -0.8986551761627197, "num_chars": 8}, {"sum_logits": -8.355905532836914, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.625797271728516, "logits_per_token": -8.355905532836914, "logits_per_char": -1.6711811065673827, "num_chars": 5}, {"sum_logits": -6.261236190795898, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.598450660705566, "logits_per_token": -6.261236190795898, "logits_per_char": -0.7826545238494873, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 212, "native_id": "e3426e4f60c142aa3d813479f79d6305", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.958600997924805, "incorrect_loss_raw": 10.273872137069702, "correct_loss_per_char": 0.5416909998113458, "incorrect_loss_per_char": 1.1068493383271352, "correct_loss_per_token": 5.958600997924805, "incorrect_loss_per_token": 4.512687702973683, "correct_loss_uncond": -7.995020866394043, "incorrect_loss_uncond": -6.459433078765869}, "model_output": [{"sum_logits": -12.685562133789062, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.732297897338867, "logits_per_token": -3.1713905334472656, "logits_per_char": -0.906111580984933, "num_chars": 14}, {"sum_logits": -16.362411499023438, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.08732795715332, "logits_per_token": -5.4541371663411455, "logits_per_char": -1.6362411499023437, "num_chars": 10}, {"sum_logits": -5.958600997924805, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.953621864318848, "logits_per_token": -5.958600997924805, "logits_per_char": -0.5416909998113458, "num_chars": 11}, {"sum_logits": -5.244583606719971, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.285472869873047, "logits_per_token": -2.6222918033599854, "logits_per_char": -0.524458360671997, "num_chars": 10}, {"sum_logits": -6.802931308746338, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -6.802931308746338, "logits_per_char": -1.3605862617492677, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 213, "native_id": "3526550b02d9594abd4fc43553010fc6", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.509763717651367, "incorrect_loss_raw": 10.020610451698303, "correct_loss_per_char": 1.3585376739501953, "incorrect_loss_per_char": 0.9713445408476724, "correct_loss_per_token": 9.509763717651367, "incorrect_loss_per_token": 5.010305225849152, "correct_loss_uncond": -6.491994857788086, "incorrect_loss_uncond": -6.689138054847717}, "model_output": [{"sum_logits": -8.065287590026855, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.659629821777344, "logits_per_token": -4.032643795013428, "logits_per_char": -0.5376858393351237, "num_chars": 15}, {"sum_logits": -11.443345069885254, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.726911544799805, "logits_per_token": -5.721672534942627, "logits_per_char": -0.9536120891571045, "num_chars": 12}, {"sum_logits": -12.790505409240723, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.602316856384277, "logits_per_token": -6.395252704620361, "logits_per_char": -1.4211672676934137, "num_chars": 9}, {"sum_logits": -9.509763717651367, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.001758575439453, "logits_per_token": -9.509763717651367, "logits_per_char": -1.3585376739501953, "num_chars": 7}, {"sum_logits": -7.783303737640381, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.850135803222656, "logits_per_token": -3.8916518688201904, "logits_per_char": -0.9729129672050476, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 214, "native_id": "e567c94d88829fb07a30e3d46c02e664", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.520834922790527, "incorrect_loss_raw": 14.036985158920288, "correct_loss_per_char": 0.9315478461129325, "incorrect_loss_per_char": 1.2549603133278098, "correct_loss_per_token": 6.520834922790527, "incorrect_loss_per_token": 6.132656693458557, "correct_loss_uncond": -8.798017501831055, "incorrect_loss_uncond": -2.9089207649230957}, "model_output": [{"sum_logits": -15.32901668548584, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.87303924560547, "logits_per_token": -3.83225417137146, "logits_per_char": -0.9017068638521082, "num_chars": 17}, {"sum_logits": -15.32901668548584, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.87303924560547, "logits_per_token": -3.83225417137146, "logits_per_char": -0.9017068638521082, "num_chars": 17}, {"sum_logits": -8.242329597473145, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.346129417419434, "logits_per_token": -8.242329597473145, "logits_per_char": -1.648465919494629, "num_chars": 5}, {"sum_logits": -17.247577667236328, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.691415786743164, "logits_per_token": -8.623788833618164, "logits_per_char": -1.5679616061123935, "num_chars": 11}, {"sum_logits": -6.520834922790527, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.318852424621582, "logits_per_token": -6.520834922790527, "logits_per_char": -0.9315478461129325, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 215, "native_id": "cf5a710c931779fb3dde198e0ace3b6a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.442594051361084, "incorrect_loss_raw": 13.368313789367676, "correct_loss_per_char": 0.49478127739646216, "incorrect_loss_per_char": 1.1602402735840192, "correct_loss_per_token": 2.721297025680542, "incorrect_loss_per_token": 5.670761704444885, "correct_loss_uncond": -10.665710926055908, "incorrect_loss_uncond": -7.813099384307861}, "model_output": [{"sum_logits": -10.816003799438477, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.251426696777344, "logits_per_token": -5.408001899719238, "logits_per_char": -0.9832730726762251, "num_chars": 11}, {"sum_logits": -15.267942428588867, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.47018814086914, "logits_per_token": -7.633971214294434, "logits_per_char": -1.5267942428588868, "num_chars": 10}, {"sum_logits": -16.214323043823242, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -24.47524642944336, "logits_per_token": -4.0535807609558105, "logits_per_char": -1.0133951902389526, "num_chars": 16}, {"sum_logits": -11.174985885620117, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.528791427612305, "logits_per_token": -5.587492942810059, "logits_per_char": -1.1174985885620117, "num_chars": 10}, {"sum_logits": -5.442594051361084, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.108304977416992, "logits_per_token": -2.721297025680542, "logits_per_char": -0.49478127739646216, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 216, "native_id": "0f2377604e628c55ba588366139396b9", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.204090595245361, "incorrect_loss_raw": 7.417636513710022, "correct_loss_per_char": 0.5782322883605957, "incorrect_loss_per_char": 0.7510321627060573, "correct_loss_per_token": 2.6020452976226807, "incorrect_loss_per_token": 4.902555664380391, "correct_loss_uncond": -9.449851512908936, "incorrect_loss_uncond": -7.5532296895980835}, "model_output": [{"sum_logits": -5.5337724685668945, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.885485649108887, "logits_per_token": -5.5337724685668945, "logits_per_char": -0.6917215585708618, "num_chars": 8}, {"sum_logits": -5.4743523597717285, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.888142585754395, "logits_per_token": -5.4743523597717285, "logits_per_char": -0.6842940449714661, "num_chars": 8}, {"sum_logits": -14.287744522094727, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.36506462097168, "logits_per_token": -7.143872261047363, "logits_per_char": -1.1906453768412273, "num_chars": 12}, {"sum_logits": -4.374676704406738, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.744771957397461, "logits_per_token": -1.4582255681355794, "logits_per_char": -0.4374676704406738, "num_chars": 10}, {"sum_logits": -5.204090595245361, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.653942108154297, "logits_per_token": -2.6020452976226807, "logits_per_char": -0.5782322883605957, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 217, "native_id": "ada088b7c97de80336ad043757c2db16", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.3261232376098633, "incorrect_loss_raw": 14.171004295349121, "correct_loss_per_char": 0.6652246475219726, "incorrect_loss_per_char": 1.9860355436801909, "correct_loss_per_token": 3.3261232376098633, "incorrect_loss_per_token": 11.812968015670776, "correct_loss_uncond": -8.934221267700195, "incorrect_loss_uncond": 0.028393268585205078}, "model_output": [{"sum_logits": -3.3261232376098633, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.260344505310059, "logits_per_token": -3.3261232376098633, "logits_per_char": -0.6652246475219726, "num_chars": 5}, {"sum_logits": -18.864290237426758, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.641115188598633, "logits_per_token": -9.432145118713379, "logits_per_char": -1.1790181398391724, "num_chars": 16}, {"sum_logits": -10.168764114379883, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.888583183288574, "logits_per_token": -10.168764114379883, "logits_per_char": -1.6947940190633137, "num_chars": 6}, {"sum_logits": -13.855086326599121, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -13.855086326599121, "logits_per_char": -2.7710172653198244, "num_chars": 5}, {"sum_logits": -13.795876502990723, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.786017417907715, "logits_per_token": -13.795876502990723, "logits_per_char": -2.2993127504984536, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 218, "native_id": "beef0aa2058297904bb4acc1dc340c85", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.134594917297363, "incorrect_loss_raw": 14.701910734176636, "correct_loss_per_char": 0.8304177197543058, "incorrect_loss_per_char": 1.6065731366475422, "correct_loss_per_token": 4.567297458648682, "incorrect_loss_per_token": 7.510329604148865, "correct_loss_uncond": -9.38297176361084, "incorrect_loss_uncond": -1.6964890956878662}, "model_output": [{"sum_logits": -12.322022438049316, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.697668075561523, "logits_per_token": -6.161011219024658, "logits_per_char": -1.2322022438049316, "num_chars": 10}, {"sum_logits": -9.134594917297363, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.517566680908203, "logits_per_token": -4.567297458648682, "logits_per_char": -0.8304177197543058, "num_chars": 11}, {"sum_logits": -12.352890014648438, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.094871520996094, "logits_per_token": -12.352890014648438, "logits_per_char": -3.0882225036621094, "num_chars": 4}, {"sum_logits": -11.976938247680664, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.009998321533203, "logits_per_token": -5.988469123840332, "logits_per_char": -0.998078187306722, "num_chars": 12}, {"sum_logits": -22.155792236328125, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.791061401367188, "logits_per_token": -5.538948059082031, "logits_per_char": -1.1077896118164063, "num_chars": 20}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 219, "native_id": "ba9a05bd2086c0d37733e26479d6630f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.1009392738342285, "incorrect_loss_raw": 12.278109192848206, "correct_loss_per_char": 0.7889932526482476, "incorrect_loss_per_char": 1.2906579146018395, "correct_loss_per_token": 3.5504696369171143, "incorrect_loss_per_token": 7.815909326076508, "correct_loss_uncond": -12.441731929779053, "incorrect_loss_uncond": -3.549930691719055}, "model_output": [{"sum_logits": -11.69611930847168, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.37514877319336, "logits_per_token": -5.84805965423584, "logits_per_char": -0.9746766090393066, "num_chars": 12}, {"sum_logits": -16.51744842529297, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.363466262817383, "logits_per_token": -8.258724212646484, "logits_per_char": -1.3764540354410808, "num_chars": 12}, {"sum_logits": -13.414837837219238, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -13.414837837219238, "logits_per_char": -2.2358063062032065, "num_chars": 6}, {"sum_logits": -7.4840312004089355, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.502153396606445, "logits_per_token": -3.7420156002044678, "logits_per_char": -0.5756947077237643, "num_chars": 13}, {"sum_logits": -7.1009392738342285, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.54267120361328, "logits_per_token": -3.5504696369171143, "logits_per_char": -0.7889932526482476, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 220, "native_id": "6b0bf501aa68b06ddc5ad72ac5ff68fc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.772165298461914, "incorrect_loss_raw": 11.153960704803467, "correct_loss_per_char": 0.8245950426374163, "incorrect_loss_per_char": 1.4509581441122792, "correct_loss_per_token": 5.772165298461914, "incorrect_loss_per_token": 7.099451422691345, "correct_loss_uncond": -7.271130561828613, "incorrect_loss_uncond": -4.7674946784973145}, "model_output": [{"sum_logits": -12.179768562316895, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.197964668273926, "logits_per_token": -12.179768562316895, "logits_per_char": -3.0449421405792236, "num_chars": 4}, {"sum_logits": -11.936720848083496, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.58877944946289, "logits_per_token": -5.968360424041748, "logits_per_char": -0.7021600498872644, "num_chars": 17}, {"sum_logits": -5.772165298461914, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.043295860290527, "logits_per_token": -5.772165298461914, "logits_per_char": -0.8245950426374163, "num_chars": 7}, {"sum_logits": -10.938289642333984, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.34946060180664, "logits_per_token": -5.469144821166992, "logits_per_char": -0.9943899674849077, "num_chars": 11}, {"sum_logits": -9.561063766479492, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.549616813659668, "logits_per_token": -4.780531883239746, "logits_per_char": -1.0623404184977214, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 221, "native_id": "926298bbdd03ce96acfeb4408b888b61", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.091476917266846, "incorrect_loss_raw": 7.687372028827667, "correct_loss_per_char": 1.0228692293167114, "incorrect_loss_per_char": 1.291778642932574, "correct_loss_per_token": 4.091476917266846, "incorrect_loss_per_token": 7.687372028827667, "correct_loss_uncond": -9.309890270233154, "incorrect_loss_uncond": -6.225731074810028}, "model_output": [{"sum_logits": -4.5828118324279785, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.404218673706055, "logits_per_token": -4.5828118324279785, "logits_per_char": -0.5728514790534973, "num_chars": 8}, {"sum_logits": -4.091476917266846, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.4013671875, "logits_per_token": -4.091476917266846, "logits_per_char": -1.0228692293167114, "num_chars": 4}, {"sum_logits": -9.71074390411377, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.794175148010254, "logits_per_token": -9.71074390411377, "logits_per_char": -1.942148780822754, "num_chars": 5}, {"sum_logits": -12.653206825256348, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -11.941009521484375, "logits_per_token": -12.653206825256348, "logits_per_char": -2.108867804209391, "num_chars": 6}, {"sum_logits": -3.8027255535125732, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.513009071350098, "logits_per_token": -3.8027255535125732, "logits_per_char": -0.5432465076446533, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 222, "native_id": "faa0aa438b94c19be8ff52ee80d9e298", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.016671180725098, "incorrect_loss_raw": 8.406794548034668, "correct_loss_per_char": 0.728788289156827, "incorrect_loss_per_char": 0.7792609190984523, "correct_loss_per_token": 4.008335590362549, "incorrect_loss_per_token": 4.203397274017334, "correct_loss_uncond": -9.8089017868042, "incorrect_loss_uncond": -10.244590044021606}, "model_output": [{"sum_logits": -8.016671180725098, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.825572967529297, "logits_per_token": -4.008335590362549, "logits_per_char": -0.728788289156827, "num_chars": 11}, {"sum_logits": -7.3569231033325195, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.376557350158691, "logits_per_token": -3.6784615516662598, "logits_per_char": -0.8174359003702799, "num_chars": 9}, {"sum_logits": -8.610445022583008, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.900634765625, "logits_per_token": -4.305222511291504, "logits_per_char": -0.6150317873273577, "num_chars": 14}, {"sum_logits": -6.78348445892334, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.87533950805664, "logits_per_token": -3.39174222946167, "logits_per_char": -0.8479355573654175, "num_chars": 8}, {"sum_logits": -10.876325607299805, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.453006744384766, "logits_per_token": -5.438162803649902, "logits_per_char": -0.8366404313307542, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 223, "native_id": "9310c39a0752f28640c3a05cba1d5ca7", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.4605512619018555, "incorrect_loss_raw": 12.274987578392029, "correct_loss_per_char": 0.8075689077377319, "incorrect_loss_per_char": 1.5005984699571286, "correct_loss_per_token": 3.2302756309509277, "incorrect_loss_per_token": 9.85405147075653, "correct_loss_uncond": -9.852189064025879, "incorrect_loss_uncond": -3.1448625326156616}, "model_output": [{"sum_logits": -7.1576666831970215, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.05156421661377, "logits_per_token": -7.1576666831970215, "logits_per_char": -1.4315333366394043, "num_chars": 5}, {"sum_logits": -12.896339416503906, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.048408508300781, "logits_per_token": -12.896339416503906, "logits_per_char": -1.842334202357701, "num_chars": 7}, {"sum_logits": -19.367488861083984, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.803192138671875, "logits_per_token": -9.683744430541992, "logits_per_char": -1.7606808055530896, "num_chars": 11}, {"sum_logits": -9.678455352783203, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.776235580444336, "logits_per_token": -9.678455352783203, "logits_per_char": -0.9678455352783203, "num_chars": 10}, {"sum_logits": -6.4605512619018555, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.312740325927734, "logits_per_token": -3.2302756309509277, "logits_per_char": -0.8075689077377319, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 224, "native_id": "fee5f4e9d8e37f0183e36eb9b8dbcbb9", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.239611625671387, "incorrect_loss_raw": 10.15221917629242, "correct_loss_per_char": 0.5171151161193848, "incorrect_loss_per_char": 0.9552131474018097, "correct_loss_per_token": 3.6198058128356934, "incorrect_loss_per_token": 7.705674052238464, "correct_loss_uncond": -7.698152542114258, "incorrect_loss_uncond": -5.062328457832336}, "model_output": [{"sum_logits": -7.239611625671387, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.937764167785645, "logits_per_token": -3.6198058128356934, "logits_per_char": -0.5171151161193848, "num_chars": 14}, {"sum_logits": -9.620744705200195, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.26487159729004, "logits_per_token": -4.810372352600098, "logits_per_char": -0.9620744705200195, "num_chars": 10}, {"sum_logits": -13.868185997009277, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.558809280395508, "logits_per_token": -13.868185997009277, "logits_per_char": -1.1556821664174397, "num_chars": 12}, {"sum_logits": -7.16832971572876, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.484892845153809, "logits_per_token": -7.16832971572876, "logits_per_char": -0.5973608096440634, "num_chars": 12}, {"sum_logits": -9.951616287231445, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.549616813659668, "logits_per_token": -4.975808143615723, "logits_per_char": -1.105735143025716, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 225, "native_id": "5392af3f1c4665e95ff3354e5115de42", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.123784065246582, "incorrect_loss_raw": 7.63398939371109, "correct_loss_per_char": 0.5103153387705485, "incorrect_loss_per_char": 0.9360098205861591, "correct_loss_per_token": 6.123784065246582, "incorrect_loss_per_token": 5.560879707336426, "correct_loss_uncond": -8.542768478393555, "incorrect_loss_uncond": -6.720660626888275}, "model_output": [{"sum_logits": -10.178778648376465, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.193441390991211, "logits_per_token": -10.178778648376465, "logits_per_char": -1.4541112354823522, "num_chars": 7}, {"sum_logits": -3.772301435470581, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.641695976257324, "logits_per_token": -3.772301435470581, "logits_per_char": -0.7544602870941162, "num_chars": 5}, {"sum_logits": -7.381199359893799, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.050267219543457, "logits_per_token": -3.6905996799468994, "logits_per_char": -0.6150999466578165, "num_chars": 12}, {"sum_logits": -6.123784065246582, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.666552543640137, "logits_per_token": -6.123784065246582, "logits_per_char": -0.5103153387705485, "num_chars": 12}, {"sum_logits": -9.203678131103516, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.53319549560547, "logits_per_token": -4.601839065551758, "logits_per_char": -0.9203678131103515, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 226, "native_id": "4c5c74b3287492d6ddb2da4c8c0fd51a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 13.088909149169922, "incorrect_loss_raw": 11.091082096099854, "correct_loss_per_char": 0.769935832304113, "incorrect_loss_per_char": 1.3449179021138993, "correct_loss_per_token": 4.362969716389974, "incorrect_loss_per_token": 5.545541048049927, "correct_loss_uncond": -6.564401626586914, "incorrect_loss_uncond": -5.2420148849487305}, "model_output": [{"sum_logits": -13.544167518615723, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.77659797668457, "logits_per_token": -6.772083759307861, "logits_per_char": -1.5049075020684137, "num_chars": 9}, {"sum_logits": -14.653682708740234, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -20.116256713867188, "logits_per_token": -7.326841354370117, "logits_per_char": -1.2211402257283528, "num_chars": 12}, {"sum_logits": -13.088909149169922, "num_tokens": 3, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.653310775756836, "logits_per_token": -4.362969716389974, "logits_per_char": -0.769935832304113, "num_chars": 17}, {"sum_logits": -10.144255638122559, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -16.68304443359375, "logits_per_token": -5.072127819061279, "logits_per_char": -1.4491793768746513, "num_chars": 7}, {"sum_logits": -6.022222518920898, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.756488800048828, "logits_per_token": -3.011111259460449, "logits_per_char": -1.2044445037841798, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 227, "native_id": "52f3eb6c9a6b9671050fc769d465ed03", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.651975631713867, "incorrect_loss_raw": 12.88572645187378, "correct_loss_per_char": 0.7608554022652763, "incorrect_loss_per_char": 1.4321553196225847, "correct_loss_per_token": 5.325987815856934, "incorrect_loss_per_token": 10.757354497909546, "correct_loss_uncond": -8.130277633666992, "incorrect_loss_uncond": -1.143083095550537}, "model_output": [{"sum_logits": -10.651975631713867, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.78225326538086, "logits_per_token": -5.325987815856934, "logits_per_char": -0.7608554022652763, "num_chars": 14}, {"sum_logits": -8.489041328430176, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.421607971191406, "logits_per_token": -8.489041328430176, "logits_per_char": -1.4148402214050293, "num_chars": 6}, {"sum_logits": -14.882955551147461, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.95219898223877, "logits_per_token": -14.882955551147461, "logits_per_char": -1.240246295928955, "num_chars": 12}, {"sum_logits": -17.026975631713867, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.05597686767578, "logits_per_token": -8.513487815856934, "logits_per_char": -1.216212545122419, "num_chars": 14}, {"sum_logits": -11.143933296203613, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.685454368591309, "logits_per_token": -11.143933296203613, "logits_per_char": -1.8573222160339355, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 228, "native_id": "03ee30b5801b61aee791a551a9d9a49f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.633195638656616, "incorrect_loss_raw": 14.619199514389038, "correct_loss_per_char": 0.3302905126051469, "incorrect_loss_per_char": 1.2026226230513997, "correct_loss_per_token": 3.633195638656616, "incorrect_loss_per_token": 8.109776417414347, "correct_loss_uncond": -11.292089700698853, "incorrect_loss_uncond": -4.839273929595947}, "model_output": [{"sum_logits": -3.633195638656616, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.925285339355469, "logits_per_token": -3.633195638656616, "logits_per_char": -0.3302905126051469, "num_chars": 11}, {"sum_logits": -9.84083080291748, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.151235580444336, "logits_per_token": -9.84083080291748, "logits_per_char": -0.9840830802917481, "num_chars": 10}, {"sum_logits": -9.757152557373047, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.54856014251709, "logits_per_token": -4.878576278686523, "logits_per_char": -0.8870138688520952, "num_chars": 11}, {"sum_logits": -7.140140533447266, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -9.458080291748047, "logits_per_token": -7.140140533447266, "logits_per_char": -1.4280281066894531, "num_chars": 5}, {"sum_logits": -31.73867416381836, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -38.67601776123047, "logits_per_token": -10.57955805460612, "logits_per_char": -1.5113654363723028, "num_chars": 21}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 229, "native_id": "6d1d483745bc0aae0f4dd04e851ceffb", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.678018093109131, "incorrect_loss_raw": 10.34680950641632, "correct_loss_per_char": 0.516183463009921, "incorrect_loss_per_char": 0.9691495117687043, "correct_loss_per_token": 5.678018093109131, "incorrect_loss_per_token": 6.712079028288524, "correct_loss_uncond": -8.681960582733154, "incorrect_loss_uncond": -5.413280367851257}, "model_output": [{"sum_logits": -3.743734836578369, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.92875862121582, "logits_per_token": -1.8718674182891846, "logits_per_char": -0.31197790304819745, "num_chars": 12}, {"sum_logits": -5.678018093109131, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.359978675842285, "logits_per_token": -5.678018093109131, "logits_per_char": -0.516183463009921, "num_chars": 11}, {"sum_logits": -6.214594841003418, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -6.214594841003418, "logits_per_char": -0.7768243551254272, "num_chars": 8}, {"sum_logits": -19.000581741333008, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.310949325561523, "logits_per_token": -6.333527247111003, "logits_per_char": -1.9000581741333007, "num_chars": 10}, {"sum_logits": -12.428326606750488, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.22380542755127, "logits_per_token": -12.428326606750488, "logits_per_char": -0.887737614767892, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 230, "native_id": "bf10bfda7328c8671e15adf8546b64d7", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.993711471557617, "incorrect_loss_raw": 8.929557800292969, "correct_loss_per_char": 0.45397377014160156, "incorrect_loss_per_char": 1.066749290057591, "correct_loss_per_token": 2.4968557357788086, "incorrect_loss_per_token": 7.102002382278442, "correct_loss_uncond": -12.801485061645508, "incorrect_loss_uncond": -4.6621410846710205}, "model_output": [{"sum_logits": -5.024827003479004, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.786955833435059, "logits_per_token": -5.024827003479004, "logits_per_char": -0.8374711672465006, "num_chars": 6}, {"sum_logits": -14.620443344116211, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.53737735748291, "logits_per_token": -7.3102216720581055, "logits_per_char": -1.4620443344116212, "num_chars": 10}, {"sum_logits": -4.993711471557617, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.795196533203125, "logits_per_token": -2.4968557357788086, "logits_per_char": -0.45397377014160156, "num_chars": 11}, {"sum_logits": -10.352651596069336, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.510857582092285, "logits_per_token": -10.352651596069336, "logits_per_char": -1.1502946217854817, "num_chars": 9}, {"sum_logits": -5.720309257507324, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.531604766845703, "logits_per_token": -5.720309257507324, "logits_per_char": -0.8171870367867606, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 231, "native_id": "0b3a3ee40dd25be9735ac5e3342ca4dd", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.372159957885742, "incorrect_loss_raw": 12.461779356002808, "correct_loss_per_char": 0.4857955508761936, "incorrect_loss_per_char": 1.3419411210288779, "correct_loss_per_token": 1.4573866526285808, "incorrect_loss_per_token": 6.6217353741327925, "correct_loss_uncond": -11.272722244262695, "incorrect_loss_uncond": -4.5156004428863525}, "model_output": [{"sum_logits": -12.578841209411621, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.671432495117188, "logits_per_token": -6.2894206047058105, "logits_per_char": -1.397649023267958, "num_chars": 9}, {"sum_logits": -18.849348068237305, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.1663761138916, "logits_per_token": -6.2831160227457685, "logits_per_char": -0.9920709509598581, "num_chars": 19}, {"sum_logits": -9.409881591796875, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.876571655273438, "logits_per_token": -9.409881591796875, "logits_per_char": -1.1762351989746094, "num_chars": 8}, {"sum_logits": -9.00904655456543, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.195138931274414, "logits_per_token": -4.504523277282715, "logits_per_char": -1.801809310913086, "num_chars": 5}, {"sum_logits": -4.372159957885742, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.644882202148438, "logits_per_token": -1.4573866526285808, "logits_per_char": -0.4857955508761936, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 232, "native_id": "77e2a0b469b56bea81921a4a945ffcb5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.229731559753418, "incorrect_loss_raw": 10.310014963150024, "correct_loss_per_char": 0.9229731559753418, "incorrect_loss_per_char": 0.8584265992754982, "correct_loss_per_token": 9.229731559753418, "incorrect_loss_per_token": 4.10112327337265, "correct_loss_uncond": -5.182682037353516, "incorrect_loss_uncond": -6.554788589477539}, "model_output": [{"sum_logits": -9.229731559753418, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.412413597106934, "logits_per_token": -9.229731559753418, "logits_per_char": -0.9229731559753418, "num_chars": 10}, {"sum_logits": -5.285490989685059, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.99624252319336, "logits_per_token": -5.285490989685059, "logits_per_char": -0.5872767766316732, "num_chars": 9}, {"sum_logits": -8.521439552307129, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.236899375915527, "logits_per_token": -4.2607197761535645, "logits_per_char": -0.9468266169230143, "num_chars": 9}, {"sum_logits": -12.58030891418457, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.18499183654785, "logits_per_token": -3.1450772285461426, "logits_per_char": -0.8386872609456381, "num_chars": 15}, {"sum_logits": -14.85282039642334, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.041080474853516, "logits_per_token": -3.713205099105835, "logits_per_char": -1.060915742601667, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 233, "native_id": "dc964e4f6df6b70815e81e466d0ff717", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.320435643196106, "incorrect_loss_raw": 8.65977656841278, "correct_loss_per_char": 0.3301089107990265, "incorrect_loss_per_char": 1.3617338963917325, "correct_loss_per_token": 1.320435643196106, "incorrect_loss_per_token": 6.294003486633301, "correct_loss_uncond": -11.672932505607605, "incorrect_loss_uncond": -6.305201411247253}, "model_output": [{"sum_logits": -9.607098579406738, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.965145111083984, "logits_per_token": -4.803549289703369, "logits_per_char": -1.3724426542009627, "num_chars": 7}, {"sum_logits": -1.320435643196106, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -12.993368148803711, "logits_per_token": -1.320435643196106, "logits_per_char": -0.3301089107990265, "num_chars": 4}, {"sum_logits": -9.61587905883789, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.615674018859863, "logits_per_token": -9.61587905883789, "logits_per_char": -1.9231758117675781, "num_chars": 5}, {"sum_logits": -9.319086074829102, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.2066650390625, "logits_per_token": -4.659543037414551, "logits_per_char": -0.9319086074829102, "num_chars": 10}, {"sum_logits": -6.097042560577393, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.072427749633789, "logits_per_token": -6.097042560577393, "logits_per_char": -1.2194085121154785, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 234, "native_id": "6b9221c1af583ffb43580857d6fde38a", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 1.4840474128723145, "incorrect_loss_raw": 6.436711341142654, "correct_loss_per_char": 0.24734123547871908, "incorrect_loss_per_char": 0.6513911374977657, "correct_loss_per_token": 1.4840474128723145, "incorrect_loss_per_token": 4.8911323845386505, "correct_loss_uncond": -10.622459888458252, "incorrect_loss_uncond": -8.226169794797897}, "model_output": [{"sum_logits": -9.25714111328125, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.076704978942871, "logits_per_token": -9.25714111328125, "logits_per_char": -0.925714111328125, "num_chars": 10}, {"sum_logits": -1.4840474128723145, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.106507301330566, "logits_per_token": -1.4840474128723145, "logits_per_char": -0.24734123547871908, "num_chars": 6}, {"sum_logits": -1.05294668674469, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -11.413346290588379, "logits_per_token": -1.05294668674469, "logits_per_char": -0.21058933734893798, "num_chars": 5}, {"sum_logits": -3.0721259117126465, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.45373249053955, "logits_per_token": -3.0721259117126465, "logits_per_char": -0.4388751302446638, "num_chars": 7}, {"sum_logits": -12.364631652832031, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.707740783691406, "logits_per_token": -6.182315826416016, "logits_per_char": -1.030385971069336, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 235, "native_id": "4dc2c4596b08e9bfd893174e67bff40a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.636416912078857, "incorrect_loss_raw": 12.340230464935303, "correct_loss_per_char": 0.5151574346754286, "incorrect_loss_per_char": 1.0583437416288588, "correct_loss_per_token": 2.3182084560394287, "incorrect_loss_per_token": 7.343843340873718, "correct_loss_uncond": -11.693281650543213, "incorrect_loss_uncond": -5.0467798709869385}, "model_output": [{"sum_logits": -13.496559143066406, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.160051345825195, "logits_per_token": -6.748279571533203, "logits_per_char": -1.1247132619222004, "num_chars": 12}, {"sum_logits": -10.247044563293457, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.587039947509766, "logits_per_token": -5.1235222816467285, "logits_per_char": -0.8539203802744547, "num_chars": 12}, {"sum_logits": -13.446698188781738, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.439885139465332, "logits_per_token": -13.446698188781738, "logits_per_char": -1.4940775765313044, "num_chars": 9}, {"sum_logits": -4.636416912078857, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.32969856262207, "logits_per_token": -2.3182084560394287, "logits_per_char": -0.5151574346754286, "num_chars": 9}, {"sum_logits": -12.17061996459961, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.361064910888672, "logits_per_token": -4.056873321533203, "logits_per_char": -0.7606637477874756, "num_chars": 16}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 236, "native_id": "8ae24d3ff199077a59e0d970feb665b7", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.351217269897461, "incorrect_loss_raw": 14.413194179534912, "correct_loss_per_char": 1.0292681058247883, "incorrect_loss_per_char": 1.4186670660972596, "correct_loss_per_token": 6.1756086349487305, "incorrect_loss_per_token": 7.848213195800781, "correct_loss_uncond": -8.032560348510742, "incorrect_loss_uncond": -3.4486396312713623}, "model_output": [{"sum_logits": -12.351217269897461, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.383777618408203, "logits_per_token": -6.1756086349487305, "logits_per_char": -1.0292681058247883, "num_chars": 12}, {"sum_logits": -19.750904083251953, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.91944122314453, "logits_per_token": -9.875452041625977, "logits_per_char": -1.234431505203247, "num_chars": 16}, {"sum_logits": -12.74991226196289, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.87533950805664, "logits_per_token": -6.374956130981445, "logits_per_char": -1.5937390327453613, "num_chars": 8}, {"sum_logits": -20.019031524658203, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.171215057373047, "logits_per_token": -10.009515762329102, "logits_per_char": -1.8199119567871094, "num_chars": 11}, {"sum_logits": -5.132928848266602, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.481339454650879, "logits_per_token": -5.132928848266602, "logits_per_char": -1.0265857696533203, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 237, "native_id": "d64a676e9d22e7edd12e7f4ce267a9f0", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.083345413208008, "incorrect_loss_raw": 6.026143550872803, "correct_loss_per_char": 0.6166690826416016, "incorrect_loss_per_char": 0.7180310409693491, "correct_loss_per_token": 3.083345413208008, "incorrect_loss_per_token": 4.9328906536102295, "correct_loss_uncond": -7.934757232666016, "incorrect_loss_uncond": -9.79432988166809}, "model_output": [{"sum_logits": -4.313330173492432, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.480264663696289, "logits_per_token": -4.313330173492432, "logits_per_char": -0.7188883622487386, "num_chars": 6}, {"sum_logits": -3.083345413208008, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.018102645874023, "logits_per_token": -3.083345413208008, "logits_per_char": -0.6166690826416016, "num_chars": 5}, {"sum_logits": -8.746023178100586, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.388912200927734, "logits_per_token": -4.373011589050293, "logits_per_char": -0.624715941292899, "num_chars": 14}, {"sum_logits": -8.280566215515137, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.657292366027832, "logits_per_token": -8.280566215515137, "logits_per_char": -1.1829380307878767, "num_chars": 7}, {"sum_logits": -2.7646546363830566, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.755424499511719, "logits_per_token": -2.7646546363830566, "logits_per_char": -0.3455818295478821, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 238, "native_id": "54ecb521df1d0f5b130a393c42b4126d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.2791409492492676, "incorrect_loss_raw": 13.121507167816162, "correct_loss_per_char": 0.32791409492492674, "incorrect_loss_per_char": 1.550570336198495, "correct_loss_per_token": 3.2791409492492676, "incorrect_loss_per_token": 6.730711142222087, "correct_loss_uncond": -11.133272647857666, "incorrect_loss_uncond": -3.81040096282959}, "model_output": [{"sum_logits": -13.79173469543457, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -18.048126220703125, "logits_per_token": -6.895867347717285, "logits_per_char": -1.5324149661593967, "num_chars": 9}, {"sum_logits": -12.186056137084961, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -16.80486488342285, "logits_per_token": -6.0930280685424805, "logits_per_char": -2.0310093561808267, "num_chars": 6}, {"sum_logits": -3.2791409492492676, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -14.412413597106934, "logits_per_token": -3.2791409492492676, "logits_per_char": -0.32791409492492674, "num_chars": 10}, {"sum_logits": -18.861433029174805, "num_tokens": 3, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -20.294143676757812, "logits_per_token": -6.2871443430582685, "logits_per_char": -1.1094960605396944, "num_chars": 17}, {"sum_logits": -7.6468048095703125, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -12.580497741699219, "logits_per_token": -7.6468048095703125, "logits_per_char": -1.5293609619140625, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 239, "native_id": "b7276bb9139ec25c98c7e3822404eb6c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 3.8032565116882324, "incorrect_loss_raw": 5.657403588294983, "correct_loss_per_char": 0.5433223588126046, "incorrect_loss_per_char": 0.7007323735290103, "correct_loss_per_token": 3.8032565116882324, "incorrect_loss_per_token": 5.657403588294983, "correct_loss_uncond": -10.216419696807861, "incorrect_loss_uncond": -8.762388825416565}, "model_output": [{"sum_logits": -3.410261631011963, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.608536720275879, "logits_per_token": -3.410261631011963, "logits_per_char": -0.487180233001709, "num_chars": 7}, {"sum_logits": -3.8032565116882324, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.019676208496094, "logits_per_token": -3.8032565116882324, "logits_per_char": -0.5433223588126046, "num_chars": 7}, {"sum_logits": -8.456602096557617, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.99624252319336, "logits_per_token": -8.456602096557617, "logits_per_char": -0.9396224551730685, "num_chars": 9}, {"sum_logits": -5.011979579925537, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -5.011979579925537, "logits_per_char": -0.41766496499379474, "num_chars": 12}, {"sum_logits": -5.7507710456848145, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.916854858398438, "logits_per_token": -5.7507710456848145, "logits_per_char": -0.9584618409474691, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 240, "native_id": "ecb8758b0d088f9aedc182a516dd1190", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.04884934425354, "incorrect_loss_raw": 8.56305193901062, "correct_loss_per_char": 0.209769868850708, "incorrect_loss_per_char": 0.9084315796263117, "correct_loss_per_token": 1.04884934425354, "incorrect_loss_per_token": 7.509311079978943, "correct_loss_uncond": -12.484188795089722, "incorrect_loss_uncond": -6.666235446929932}, "model_output": [{"sum_logits": -7.932543754577637, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.608536720275879, "logits_per_token": -7.932543754577637, "logits_per_char": -1.1332205363682337, "num_chars": 7}, {"sum_logits": -8.76014518737793, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.463623046875, "logits_per_token": -8.76014518737793, "logits_per_char": -0.9733494652642144, "num_chars": 9}, {"sum_logits": -8.429926872253418, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.687454223632812, "logits_per_token": -4.214963436126709, "logits_per_char": -0.7663569883866743, "num_chars": 11}, {"sum_logits": -9.129591941833496, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -9.129591941833496, "logits_per_char": -0.7607993284861246, "num_chars": 12}, {"sum_logits": -1.04884934425354, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": true, "sum_logits_uncond": -13.533038139343262, "logits_per_token": -1.04884934425354, "logits_per_char": -0.209769868850708, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 241, "native_id": "f2645d0ee8662b6553954cee7e77979e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.375492095947266, "incorrect_loss_raw": 13.268719911575317, "correct_loss_per_char": 0.5972768995496962, "incorrect_loss_per_char": 1.9014690325373695, "correct_loss_per_token": 2.687746047973633, "incorrect_loss_per_token": 9.592129349708557, "correct_loss_uncond": -10.295940399169922, "incorrect_loss_uncond": -1.2893133163452148}, "model_output": [{"sum_logits": -11.715320587158203, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.282496452331543, "logits_per_token": -11.715320587158203, "logits_per_char": -1.952553431193034, "num_chars": 6}, {"sum_logits": -5.375492095947266, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.671432495117188, "logits_per_token": -2.687746047973633, "logits_per_char": -0.5972768995496962, "num_chars": 9}, {"sum_logits": -11.946834564208984, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.132861137390137, "logits_per_token": -11.946834564208984, "logits_per_char": -2.3893669128417967, "num_chars": 5}, {"sum_logits": -13.65664291381836, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -13.961478233337402, "logits_per_token": -6.82832145690918, "logits_per_char": -1.950948987688337, "num_chars": 7}, {"sum_logits": -15.756081581115723, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -18.855297088623047, "logits_per_token": -7.878040790557861, "logits_per_char": -1.3130067984263103, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 242, "native_id": "ea6d1a739ea841be282e13789270651e", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.83172607421875, "incorrect_loss_raw": 17.880738735198975, "correct_loss_per_char": 0.6793635441706731, "incorrect_loss_per_char": 1.4603997866312664, "correct_loss_per_token": 2.94390869140625, "incorrect_loss_per_token": 10.08657153447469, "correct_loss_uncond": -11.713232040405273, "incorrect_loss_uncond": -0.6707587242126465}, "model_output": [{"sum_logits": -19.023555755615234, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.569263458251953, "logits_per_token": -9.511777877807617, "logits_per_char": -1.0568642086452908, "num_chars": 18}, {"sum_logits": -21.06735610961914, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -17.75715446472168, "logits_per_token": -10.53367805480957, "logits_per_char": -2.340817345513238, "num_chars": 9}, {"sum_logits": -8.83172607421875, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.544958114624023, "logits_per_token": -2.94390869140625, "logits_per_char": -0.6793635441706731, "num_chars": 13}, {"sum_logits": -16.696819305419922, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.39266014099121, "logits_per_token": -5.565606435139974, "logits_per_char": -1.3914016087849934, "num_chars": 12}, {"sum_logits": -14.735223770141602, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.48691177368164, "logits_per_token": -14.735223770141602, "logits_per_char": -1.052515983581543, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 243, "native_id": "c82ed0c2a2e115452b4d596c5faafbcf", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.245693206787109, "incorrect_loss_raw": 10.027623891830444, "correct_loss_per_char": 1.049138641357422, "incorrect_loss_per_char": 1.1239332176390149, "correct_loss_per_token": 5.245693206787109, "incorrect_loss_per_token": 6.466615200042725, "correct_loss_uncond": -7.590933799743652, "incorrect_loss_uncond": -7.099881172180176}, "model_output": [{"sum_logits": -7.775966167449951, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.321028709411621, "logits_per_token": -7.775966167449951, "logits_per_char": -1.2959943612416585, "num_chars": 6}, {"sum_logits": -12.021028518676758, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.662477493286133, "logits_per_token": -6.010514259338379, "logits_per_char": -1.7172897883823939, "num_chars": 7}, {"sum_logits": -7.963220119476318, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.244270324707031, "logits_per_token": -7.963220119476318, "logits_per_char": -0.7963220119476319, "num_chars": 10}, {"sum_logits": -5.245693206787109, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.836627006530762, "logits_per_token": -5.245693206787109, "logits_per_char": -1.049138641357422, "num_chars": 5}, {"sum_logits": -12.35028076171875, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.282243728637695, "logits_per_token": -4.11676025390625, "logits_per_char": -0.686126708984375, "num_chars": 18}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 244, "native_id": "163d83851ecd4a4144b31b8738e4c335", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.8588533997535706, "incorrect_loss_raw": 12.147014617919922, "correct_loss_per_char": 0.14314223329226175, "incorrect_loss_per_char": 1.5689198155423778, "correct_loss_per_token": 0.8588533997535706, "incorrect_loss_per_token": 9.254623532295227, "correct_loss_uncond": -13.21768182516098, "incorrect_loss_uncond": -5.240952253341675}, "model_output": [{"sum_logits": -13.089726448059082, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -23.032155990600586, "logits_per_token": -6.544863224029541, "logits_per_char": -1.4544140497843425, "num_chars": 9}, {"sum_logits": -0.8588533997535706, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": true, "sum_logits_uncond": -14.07653522491455, "logits_per_token": -0.8588533997535706, "logits_per_char": -0.14314223329226175, "num_chars": 6}, {"sum_logits": -11.429115295410156, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.429719924926758, "logits_per_token": -11.429115295410156, "logits_per_char": -1.9048525492350261, "num_chars": 6}, {"sum_logits": -14.019814491271973, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.96494197845459, "logits_per_token": -14.019814491271973, "logits_per_char": -2.0028306416102817, "num_chars": 7}, {"sum_logits": -10.049402236938477, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.125049591064453, "logits_per_token": -5.024701118469238, "logits_per_char": -0.9135820215398615, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 245, "native_id": "095767956c500ca1af7cf7671556de5b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.70163631439209, "incorrect_loss_raw": 12.069331884384155, "correct_loss_per_char": 0.9502727190653483, "incorrect_loss_per_char": 1.3128999537891812, "correct_loss_per_token": 5.70163631439209, "incorrect_loss_per_token": 10.048452615737915, "correct_loss_uncond": -8.894302368164062, "incorrect_loss_uncond": -2.1139018535614014}, "model_output": [{"sum_logits": -5.70163631439209, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -14.595938682556152, "logits_per_token": -5.70163631439209, "logits_per_char": -0.9502727190653483, "num_chars": 6}, {"sum_logits": -15.244307518005371, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -13.452823638916016, "logits_per_token": -15.244307518005371, "logits_per_char": -2.17775821685791, "num_chars": 7}, {"sum_logits": -7.150046348571777, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -12.413294792175293, "logits_per_token": -7.150046348571777, "logits_per_char": -0.794449594285753, "num_chars": 9}, {"sum_logits": -9.71593952178955, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -13.768731117248535, "logits_per_token": -9.71593952178955, "logits_per_char": -0.8096616268157959, "num_chars": 12}, {"sum_logits": -16.167034149169922, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -17.098085403442383, "logits_per_token": -8.083517074584961, "logits_per_char": -1.4697303771972656, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 246, "native_id": "d31ee38f67d1173275e120b8ad36039c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.70799446105957, "incorrect_loss_raw": 12.082920789718628, "correct_loss_per_char": 0.7916358600963246, "incorrect_loss_per_char": 1.1583004544942808, "correct_loss_per_token": 4.353997230529785, "incorrect_loss_per_token": 7.38123881816864, "correct_loss_uncond": -11.829713821411133, "incorrect_loss_uncond": -4.400959730148315}, "model_output": [{"sum_logits": -10.574318885803223, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.76109504699707, "logits_per_token": -5.287159442901611, "logits_per_char": -0.8811932404836019, "num_chars": 12}, {"sum_logits": -14.399517059326172, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.598116874694824, "logits_per_token": -7.199758529663086, "logits_per_char": -1.4399517059326172, "num_chars": 10}, {"sum_logits": -8.70799446105957, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.537708282470703, "logits_per_token": -4.353997230529785, "logits_per_char": -0.7916358600963246, "num_chars": 11}, {"sum_logits": -10.71822738647461, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.170722007751465, "logits_per_token": -10.71822738647461, "logits_per_char": -1.3397784233093262, "num_chars": 8}, {"sum_logits": -12.639619827270508, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.405588150024414, "logits_per_token": -6.319809913635254, "logits_per_char": -0.9722784482515775, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 247, "native_id": "c410a4626dfce4b4cfd3e5937602cd77", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.266540050506592, "incorrect_loss_raw": 9.65077555179596, "correct_loss_per_char": 0.533317506313324, "incorrect_loss_per_char": 0.9819247573614122, "correct_loss_per_token": 4.266540050506592, "incorrect_loss_per_token": 7.102834582328796, "correct_loss_uncond": -8.58174467086792, "incorrect_loss_uncond": -5.3468257188797}, "model_output": [{"sum_logits": -4.266540050506592, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.848284721374512, "logits_per_token": -4.266540050506592, "logits_per_char": -0.533317506313324, "num_chars": 8}, {"sum_logits": -20.383527755737305, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.183984756469727, "logits_per_token": -10.191763877868652, "logits_per_char": -1.358901850382487, "num_chars": 15}, {"sum_logits": -5.457853317260742, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.289494514465332, "logits_per_token": -5.457853317260742, "logits_per_char": -1.0915706634521485, "num_chars": 5}, {"sum_logits": -8.4951810836792, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.668641090393066, "logits_per_token": -8.4951810836792, "logits_per_char": -0.9439090092976888, "num_chars": 9}, {"sum_logits": -4.266540050506592, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.848284721374512, "logits_per_token": -4.266540050506592, "logits_per_char": -0.533317506313324, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 248, "native_id": "14d760e43728e9e4643c414627f2b596", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.769903182983398, "incorrect_loss_raw": 10.187228441238403, "correct_loss_per_char": 1.0855447981092665, "incorrect_loss_per_char": 1.3798253838978116, "correct_loss_per_token": 9.769903182983398, "incorrect_loss_per_token": 10.187228441238403, "correct_loss_uncond": -4.666494369506836, "incorrect_loss_uncond": -2.994318962097168}, "model_output": [{"sum_logits": -9.512113571166992, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -12.941615104675293, "logits_per_token": -9.512113571166992, "logits_per_char": -1.0569015079074435, "num_chars": 9}, {"sum_logits": -11.21611213684082, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -12.376116752624512, "logits_per_token": -11.21611213684082, "logits_per_char": -1.602301733834403, "num_chars": 7}, {"sum_logits": -8.680251121520996, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -14.075064659118652, "logits_per_token": -8.680251121520996, "logits_per_char": -1.2400358745029993, "num_chars": 7}, {"sum_logits": -11.340436935424805, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -13.333393096923828, "logits_per_token": -11.340436935424805, "logits_per_char": -1.6200624193464006, "num_chars": 7}, {"sum_logits": -9.769903182983398, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -14.436397552490234, "logits_per_token": -9.769903182983398, "logits_per_char": -1.0855447981092665, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 249, "native_id": "abcf1b550b4d44f46d4f68b8e1d98ec8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.495405673980713, "incorrect_loss_raw": 9.547902226448059, "correct_loss_per_char": 0.5495405673980713, "incorrect_loss_per_char": 1.0063998691382863, "correct_loss_per_token": 2.7477028369903564, "incorrect_loss_per_token": 6.412873703241348, "correct_loss_uncond": -12.650407314300537, "incorrect_loss_uncond": -7.949609398841858}, "model_output": [{"sum_logits": -9.890153884887695, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.580500602722168, "logits_per_token": -9.890153884887695, "logits_per_char": -1.236269235610962, "num_chars": 8}, {"sum_logits": -12.210245132446289, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.998748779296875, "logits_per_token": -12.210245132446289, "logits_per_char": -1.7443207332066126, "num_chars": 7}, {"sum_logits": -9.434133529663086, "num_tokens": 5, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.27989959716797, "logits_per_token": -1.8868267059326171, "logits_per_char": -0.6289422353108723, "num_chars": 15}, {"sum_logits": -5.495405673980713, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.14581298828125, "logits_per_token": -2.7477028369903564, "logits_per_char": -0.5495405673980713, "num_chars": 10}, {"sum_logits": -6.657076358795166, "num_tokens": 4, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -22.130897521972656, "logits_per_token": -1.6642690896987915, "logits_per_char": -0.4160672724246979, "num_chars": 16}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 250, "native_id": "5b8af6f26335dbd501b0104c71e26d9e", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 11.309404373168945, "incorrect_loss_raw": 13.905933856964111, "correct_loss_per_char": 1.884900728861491, "incorrect_loss_per_char": 1.5357564853899406, "correct_loss_per_token": 5.654702186584473, "incorrect_loss_per_token": 12.360042810440063, "correct_loss_uncond": -6.253454208374023, "incorrect_loss_uncond": -0.814342737197876}, "model_output": [{"sum_logits": -12.367128372192383, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.095481872558594, "logits_per_token": -6.183564186096191, "logits_per_char": -1.2367128372192382, "num_chars": 10}, {"sum_logits": -11.309404373168945, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.56285858154297, "logits_per_token": -5.654702186584473, "logits_per_char": -1.884900728861491, "num_chars": 6}, {"sum_logits": -13.665077209472656, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.192317008972168, "logits_per_token": -13.665077209472656, "logits_per_char": -1.3665077209472656, "num_chars": 10}, {"sum_logits": -11.215595245361328, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.668022155761719, "logits_per_token": -11.215595245361328, "logits_per_char": -1.869265874226888, "num_chars": 6}, {"sum_logits": -18.375934600830078, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.925285339355469, "logits_per_token": -18.375934600830078, "logits_per_char": -1.6705395091663708, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 251, "native_id": "4364b4b342fb7b44434bd6694bf8fd51", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.899679183959961, "incorrect_loss_raw": 9.060266256332397, "correct_loss_per_char": 0.36872994899749756, "incorrect_loss_per_char": 0.6193799566138875, "correct_loss_per_token": 1.9665597279866536, "incorrect_loss_per_token": 3.51069974899292, "correct_loss_uncond": -11.58828353881836, "incorrect_loss_uncond": -10.68341326713562}, "model_output": [{"sum_logits": -7.451767921447754, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -18.67377471923828, "logits_per_token": -3.725883960723877, "logits_per_char": -0.6774334474043413, "num_chars": 11}, {"sum_logits": -8.587182998657227, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -16.108304977416992, "logits_per_token": -4.293591499328613, "logits_per_char": -0.7806529998779297, "num_chars": 11}, {"sum_logits": -11.673540115356445, "num_tokens": 3, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -22.463226318359375, "logits_per_token": -3.8911800384521484, "logits_per_char": -0.48639750480651855, "num_chars": 24}, {"sum_logits": -5.899679183959961, "num_tokens": 3, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -17.48796272277832, "logits_per_token": -1.9665597279866536, "logits_per_char": -0.36872994899749756, "num_chars": 16}, {"sum_logits": -8.528573989868164, "num_tokens": 4, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -21.729412078857422, "logits_per_token": -2.132143497467041, "logits_per_char": -0.5330358743667603, "num_chars": 16}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 252, "native_id": "3ffe67fb009529d9b0c49ccd7141ee4a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.961875915527344, "incorrect_loss_raw": 11.850300192832947, "correct_loss_per_char": 0.7961875915527343, "incorrect_loss_per_char": 0.9551013835838863, "correct_loss_per_token": 3.980937957763672, "incorrect_loss_per_token": 7.178628385066986, "correct_loss_uncond": -9.302995681762695, "incorrect_loss_uncond": -5.244999051094055}, "model_output": [{"sum_logits": -10.894340515136719, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.937764167785645, "logits_per_token": -5.447170257568359, "logits_per_char": -0.7781671796526227, "num_chars": 14}, {"sum_logits": -7.757659435272217, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.762283325195312, "logits_per_token": -3.8788297176361084, "logits_per_char": -0.6464716196060181, "num_chars": 12}, {"sum_logits": -18.72137451171875, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.196256637573242, "logits_per_token": -9.360687255859375, "logits_per_char": -1.5601145426432292, "num_chars": 12}, {"sum_logits": -7.961875915527344, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.26487159729004, "logits_per_token": -3.980937957763672, "logits_per_char": -0.7961875915527343, "num_chars": 10}, {"sum_logits": -10.027826309204102, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.484892845153809, "logits_per_token": -10.027826309204102, "logits_per_char": -0.8356521924336752, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 253, "native_id": "f372587fa4c99d5bebf0d0eb987c44e2", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 1.1259357929229736, "incorrect_loss_raw": 8.306787312030792, "correct_loss_per_char": 0.12510397699144152, "incorrect_loss_per_char": 1.2462461396342233, "correct_loss_per_token": 1.1259357929229736, "incorrect_loss_per_token": 7.952567845582962, "correct_loss_uncond": -13.337687253952026, "incorrect_loss_uncond": -6.882237136363983}, "model_output": [{"sum_logits": -9.80331039428711, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.241388320922852, "logits_per_token": -9.80331039428711, "logits_per_char": -1.2254137992858887, "num_chars": 8}, {"sum_logits": -2.8337557315826416, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.658267974853516, "logits_per_token": -1.4168778657913208, "logits_per_char": -0.23614631096522012, "num_chars": 12}, {"sum_logits": -10.184720039367676, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.802440643310547, "logits_per_token": -10.184720039367676, "logits_per_char": -2.0369440078735352, "num_chars": 5}, {"sum_logits": -10.405363082885742, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.054000854492188, "logits_per_token": -10.405363082885742, "logits_per_char": -1.486480440412249, "num_chars": 7}, {"sum_logits": -1.1259357929229736, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -14.463623046875, "logits_per_token": -1.1259357929229736, "logits_per_char": -0.12510397699144152, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 254, "native_id": "d35a8a3bd560fdd651ecf314878ed30f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.961526393890381, "incorrect_loss_raw": 8.789930701255798, "correct_loss_per_char": 0.5419569448991255, "incorrect_loss_per_char": 1.3226812729468713, "correct_loss_per_token": 2.9807631969451904, "incorrect_loss_per_token": 7.29351544380188, "correct_loss_uncond": -10.519859790802002, "incorrect_loss_uncond": -5.738423943519592}, "model_output": [{"sum_logits": -9.07120418548584, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.566267967224121, "logits_per_token": -9.07120418548584, "logits_per_char": -2.26780104637146, "num_chars": 4}, {"sum_logits": -9.61848258972168, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.04529857635498, "logits_per_token": -9.61848258972168, "logits_per_char": -1.20231032371521, "num_chars": 8}, {"sum_logits": -11.971322059631348, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.701396942138672, "logits_per_token": -5.985661029815674, "logits_per_char": -0.9208709276639498, "num_chars": 13}, {"sum_logits": -4.498713970184326, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.800455093383789, "logits_per_token": -4.498713970184326, "logits_per_char": -0.8997427940368652, "num_chars": 5}, {"sum_logits": -5.961526393890381, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.481386184692383, "logits_per_token": -2.9807631969451904, "logits_per_char": -0.5419569448991255, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 255, "native_id": "0542414710025f56b0c26e1bae5c4d06", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 13.465478897094727, "incorrect_loss_raw": 9.563528418540955, "correct_loss_per_char": 1.0358060690072866, "incorrect_loss_per_char": 1.599586508009169, "correct_loss_per_token": 3.3663697242736816, "incorrect_loss_per_token": 7.647557616233826, "correct_loss_uncond": -4.937290191650391, "incorrect_loss_uncond": -4.108645796775818}, "model_output": [{"sum_logits": -13.465478897094727, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.402769088745117, "logits_per_token": -3.3663697242736816, "logits_per_char": -1.0358060690072866, "num_chars": 13}, {"sum_logits": -7.304233074188232, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.908795356750488, "logits_per_token": -7.304233074188232, "logits_per_char": -1.4608466148376464, "num_chars": 5}, {"sum_logits": -15.327766418457031, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -7.663883209228516, "logits_per_char": -1.7030851576063368, "num_chars": 9}, {"sum_logits": -8.05337142944336, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.859078407287598, "logits_per_token": -8.05337142944336, "logits_per_char": -1.3422285715738933, "num_chars": 6}, {"sum_logits": -7.568742752075195, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.642197608947754, "logits_per_token": -7.568742752075195, "logits_per_char": -1.8921856880187988, "num_chars": 4}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 256, "native_id": "1875f70cf736c68c7a9df3ef870224a1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.877775192260742, "incorrect_loss_raw": 10.508459329605103, "correct_loss_per_char": 1.312962532043457, "incorrect_loss_per_char": 0.7633021092567689, "correct_loss_per_token": 7.877775192260742, "incorrect_loss_per_token": 5.254229664802551, "correct_loss_uncond": -4.543832778930664, "incorrect_loss_uncond": -6.609637975692749}, "model_output": [{"sum_logits": -7.877775192260742, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.421607971191406, "logits_per_token": -7.877775192260742, "logits_per_char": -1.312962532043457, "num_chars": 6}, {"sum_logits": -10.985610008239746, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.32870101928711, "logits_per_token": -5.492805004119873, "logits_per_char": -0.6866006255149841, "num_chars": 16}, {"sum_logits": -9.50096321105957, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.710956573486328, "logits_per_token": -4.750481605529785, "logits_per_char": -0.7308433239276593, "num_chars": 13}, {"sum_logits": -9.59045124053955, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.929718017578125, "logits_per_token": -4.795225620269775, "logits_per_char": -0.63936341603597, "num_chars": 15}, {"sum_logits": -11.956812858581543, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.503013610839844, "logits_per_token": -5.9784064292907715, "logits_per_char": -0.9964010715484619, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 257, "native_id": "83250ae2dfeb2e3886ead4cde8e1290f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.896306991577148, "incorrect_loss_raw": 13.801980257034302, "correct_loss_per_char": 0.5664908091227213, "incorrect_loss_per_char": 1.205788369556685, "correct_loss_per_token": 3.9654356638590493, "incorrect_loss_per_token": 6.765712896982829, "correct_loss_uncond": -9.65422248840332, "incorrect_loss_uncond": -4.270262718200684}, "model_output": [{"sum_logits": -9.450639724731445, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.09526538848877, "logits_per_token": -9.450639724731445, "logits_per_char": -0.9450639724731446, "num_chars": 10}, {"sum_logits": -14.158708572387695, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.67966079711914, "logits_per_token": -7.079354286193848, "logits_per_char": -1.1798923810323079, "num_chars": 12}, {"sum_logits": -11.896306991577148, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.55052947998047, "logits_per_token": -3.9654356638590493, "logits_per_char": -0.5664908091227213, "num_chars": 21}, {"sum_logits": -19.128944396972656, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.12808609008789, "logits_per_token": -6.376314798990886, "logits_per_char": -1.7389949451793323, "num_chars": 11}, {"sum_logits": -12.46962833404541, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.38595962524414, "logits_per_token": -4.156542778015137, "logits_per_char": -0.9592021795419546, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 258, "native_id": "70c39372c0d50566554fd72c768b75f6", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.817432880401611, "incorrect_loss_raw": 10.253177642822266, "correct_loss_per_char": 0.831061840057373, "incorrect_loss_per_char": 1.0365984078609582, "correct_loss_per_token": 5.817432880401611, "incorrect_loss_per_token": 10.253177642822266, "correct_loss_uncond": -10.215190410614014, "incorrect_loss_uncond": -3.468165159225464}, "model_output": [{"sum_logits": -8.633358001708984, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.98448371887207, "logits_per_token": -8.633358001708984, "logits_per_char": -0.9592620001898872, "num_chars": 9}, {"sum_logits": -10.91998291015625, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.793869018554688, "logits_per_token": -10.91998291015625, "logits_per_char": -0.9927257191051136, "num_chars": 11}, {"sum_logits": -12.055929183959961, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.02486801147461, "logits_per_token": -12.055929183959961, "logits_per_char": -1.3395476871066623, "num_chars": 9}, {"sum_logits": -9.403440475463867, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.08215045928955, "logits_per_token": -9.403440475463867, "logits_per_char": -0.8548582250421698, "num_chars": 11}, {"sum_logits": -5.817432880401611, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.032623291015625, "logits_per_token": -5.817432880401611, "logits_per_char": -0.831061840057373, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 259, "native_id": "c21ec5b367f409a0288d616f626555ae", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.635545253753662, "incorrect_loss_raw": 12.818125486373901, "correct_loss_per_char": 0.6032313867048784, "incorrect_loss_per_char": 1.3360476688905196, "correct_loss_per_token": 3.317772626876831, "incorrect_loss_per_token": 9.725036859512329, "correct_loss_uncond": -11.33730936050415, "incorrect_loss_uncond": -2.968047857284546}, "model_output": [{"sum_logits": -6.635545253753662, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.972854614257812, "logits_per_token": -3.317772626876831, "logits_per_char": -0.6032313867048784, "num_chars": 11}, {"sum_logits": -11.551948547363281, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.68854331970215, "logits_per_token": -5.775974273681641, "logits_per_char": -1.1551948547363282, "num_chars": 10}, {"sum_logits": -12.416638374328613, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.590031623840332, "logits_per_token": -12.416638374328613, "logits_per_char": -1.1287853067571467, "num_chars": 11}, {"sum_logits": -13.192760467529297, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.974193572998047, "logits_per_token": -6.596380233764648, "logits_per_char": -1.649095058441162, "num_chars": 8}, {"sum_logits": -14.111154556274414, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.891924858093262, "logits_per_token": -14.111154556274414, "logits_per_char": -1.4111154556274415, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 260, "native_id": "a2cd03ed068f6d613e85f3a60f4db0a1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.685940742492676, "incorrect_loss_raw": 9.584753692150116, "correct_loss_per_char": 0.5857425928115845, "incorrect_loss_per_char": 1.299133674664931, "correct_loss_per_token": 4.685940742492676, "incorrect_loss_per_token": 6.959805031617482, "correct_loss_uncond": -9.023407936096191, "incorrect_loss_uncond": -5.1047192215919495}, "model_output": [{"sum_logits": -6.414332389831543, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -11.587641716003418, "logits_per_token": -6.414332389831543, "logits_per_char": -1.6035830974578857, "num_chars": 4}, {"sum_logits": -12.832087516784668, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.755424499511719, "logits_per_token": -12.832087516784668, "logits_per_char": -1.6040109395980835, "num_chars": 8}, {"sum_logits": -4.685940742492676, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.709348678588867, "logits_per_token": -4.685940742492676, "logits_per_char": -0.5857425928115845, "num_chars": 8}, {"sum_logits": -3.342902898788452, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.90507698059082, "logits_per_token": -3.342902898788452, "logits_per_char": -0.5571504831314087, "num_chars": 6}, {"sum_logits": -15.7496919631958, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -18.509748458862305, "logits_per_token": -5.249897321065267, "logits_per_char": -1.4317901784723455, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 261, "native_id": "d2871dc28c82471e5d7f71f79e49c257", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 2.5634186267852783, "incorrect_loss_raw": 7.662663638591766, "correct_loss_per_char": 0.4272364377975464, "incorrect_loss_per_char": 0.5470806366265422, "correct_loss_per_token": 2.5634186267852783, "incorrect_loss_per_token": 3.9157431721687317, "correct_loss_uncond": -10.215111494064331, "incorrect_loss_uncond": -10.872075617313385}, "model_output": [{"sum_logits": -3.2647387981414795, "num_tokens": 1, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -14.463623046875, "logits_per_token": -3.2647387981414795, "logits_per_char": -0.36274875534905326, "num_chars": 9}, {"sum_logits": -8.480132102966309, "num_tokens": 2, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -19.346532821655273, "logits_per_token": -4.240066051483154, "logits_per_char": -0.7709211002696644, "num_chars": 11}, {"sum_logits": -14.330154418945312, "num_tokens": 4, "num_tokens_all": 162, "is_greedy": false, "sum_logits_uncond": -25.87283706665039, "logits_per_token": -3.582538604736328, "logits_per_char": -0.5970897674560547, "num_chars": 24}, {"sum_logits": -2.5634186267852783, "num_tokens": 1, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -12.77853012084961, "logits_per_token": -2.5634186267852783, "logits_per_char": -0.4272364377975464, "num_chars": 6}, {"sum_logits": -4.575629234313965, "num_tokens": 1, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -14.455964088439941, "logits_per_token": -4.575629234313965, "logits_per_char": -0.4575629234313965, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 262, "native_id": "94770e75c4e2000e717b4218ddff19e8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.380584716796875, "incorrect_loss_raw": 11.00818920135498, "correct_loss_per_char": 0.425372314453125, "incorrect_loss_per_char": 1.0193863888799926, "correct_loss_per_token": 2.126861572265625, "incorrect_loss_per_token": 5.336746732393901, "correct_loss_uncond": -13.74398422241211, "incorrect_loss_uncond": -6.641164541244507}, "model_output": [{"sum_logits": -8.927705764770508, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.859078407287598, "logits_per_token": -8.927705764770508, "logits_per_char": -1.4879509607950847, "num_chars": 6}, {"sum_logits": -10.572608947753906, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.469568252563477, "logits_per_token": -2.6431522369384766, "logits_per_char": -0.7551863534109933, "num_chars": 14}, {"sum_logits": -14.940552711486816, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -23.635160446166992, "logits_per_token": -4.980184237162272, "logits_per_char": -1.1492732854989858, "num_chars": 13}, {"sum_logits": -6.380584716796875, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.124568939208984, "logits_per_token": -2.126861572265625, "logits_per_char": -0.425372314453125, "num_chars": 15}, {"sum_logits": -9.591889381408691, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.633607864379883, "logits_per_token": -4.795944690704346, "logits_per_char": -0.6851349558149066, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 263, "native_id": "08ad17d3ca1838b8724d21cf5921ec52", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.972289085388184, "incorrect_loss_raw": 11.442442655563354, "correct_loss_per_char": 0.45940685272216797, "incorrect_loss_per_char": 0.932885843632268, "correct_loss_per_token": 2.986144542694092, "incorrect_loss_per_token": 6.124893426895142, "correct_loss_uncond": -13.093804359436035, "incorrect_loss_uncond": -6.733677864074707}, "model_output": [{"sum_logits": -11.660942077636719, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.19476890563965, "logits_per_token": -5.830471038818359, "logits_per_char": -0.7773961385091146, "num_chars": 15}, {"sum_logits": -16.234256744384766, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.319358825683594, "logits_per_token": -8.117128372192383, "logits_per_char": -1.1595897674560547, "num_chars": 14}, {"sum_logits": -6.8906755447387695, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.11971378326416, "logits_per_token": -6.8906755447387695, "logits_per_char": -1.1484459241231282, "num_chars": 6}, {"sum_logits": -5.972289085388184, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.06609344482422, "logits_per_token": -2.986144542694092, "logits_per_char": -0.45940685272216797, "num_chars": 13}, {"sum_logits": -10.983896255493164, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.070640563964844, "logits_per_token": -3.6612987518310547, "logits_per_char": -0.6461115444407743, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 264, "native_id": "21fb76bd8349628b441c76f47c33e77b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.44423770904541, "incorrect_loss_raw": 13.946560382843018, "correct_loss_per_char": 0.531731264931815, "incorrect_loss_per_char": 1.770260641972224, "correct_loss_per_token": 1.8610594272613525, "incorrect_loss_per_token": 10.09866750240326, "correct_loss_uncond": -10.288060188293457, "incorrect_loss_uncond": -1.60221266746521}, "model_output": [{"sum_logits": -7.44423770904541, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.732297897338867, "logits_per_token": -1.8610594272613525, "logits_per_char": -0.531731264931815, "num_chars": 14}, {"sum_logits": -12.803777694702148, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.563986778259277, "logits_per_token": -12.803777694702148, "logits_per_char": -1.0669814745585124, "num_chars": 12}, {"sum_logits": -12.199320793151855, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.154126167297363, "logits_per_token": -12.199320793151855, "logits_per_char": -2.439864158630371, "num_chars": 5}, {"sum_logits": -19.70610809326172, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.164363861083984, "logits_per_token": -9.85305404663086, "logits_per_char": -2.1895675659179688, "num_chars": 9}, {"sum_logits": -11.077034950256348, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.312615394592285, "logits_per_token": -5.538517475128174, "logits_per_char": -1.3846293687820435, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 265, "native_id": "e151b44e0a7bf08a1dd3c861eef09161", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.556668281555176, "incorrect_loss_raw": 9.441478252410889, "correct_loss_per_char": 0.819583535194397, "incorrect_loss_per_char": 1.2769040238289606, "correct_loss_per_token": 6.556668281555176, "incorrect_loss_per_token": 6.764204502105713, "correct_loss_uncond": -7.199007987976074, "incorrect_loss_uncond": -5.5064537525177}, "model_output": [{"sum_logits": -7.167939186096191, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.551885604858398, "logits_per_token": -7.167939186096191, "logits_per_char": -1.7919847965240479, "num_chars": 4}, {"sum_logits": -16.063642501831055, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.074176788330078, "logits_per_token": -5.354547500610352, "logits_per_char": -1.0709095001220703, "num_chars": 15}, {"sum_logits": -6.556668281555176, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.75567626953125, "logits_per_token": -6.556668281555176, "logits_per_char": -0.819583535194397, "num_chars": 8}, {"sum_logits": -7.072327613830566, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.695460319519043, "logits_per_token": -7.072327613830566, "logits_per_char": -1.178721268971761, "num_chars": 6}, {"sum_logits": -7.462003707885742, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.470205307006836, "logits_per_token": -7.462003707885742, "logits_per_char": -1.066000529697963, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 266, "native_id": "46351b3a6beb694c5f623583a3b1473d", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.151378631591797, "incorrect_loss_raw": 15.89088773727417, "correct_loss_per_char": 2.0302757263183593, "incorrect_loss_per_char": 2.2244958100896897, "correct_loss_per_token": 5.075689315795898, "incorrect_loss_per_token": 13.162890911102295, "correct_loss_uncond": -7.366352081298828, "incorrect_loss_uncond": 0.916041374206543}, "model_output": [{"sum_logits": -14.372925758361816, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.080846786499023, "logits_per_token": -14.372925758361816, "logits_per_char": -2.395487626393636, "num_chars": 6}, {"sum_logits": -10.151378631591797, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.517730712890625, "logits_per_token": -5.075689315795898, "logits_per_char": -2.0302757263183593, "num_chars": 5}, {"sum_logits": -13.80313777923584, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.126147270202637, "logits_per_token": -13.80313777923584, "logits_per_char": -3.45078444480896, "num_chars": 4}, {"sum_logits": -21.823974609375, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.34261131286621, "logits_per_token": -10.9119873046875, "logits_per_char": -1.81866455078125, "num_chars": 12}, {"sum_logits": -13.563512802124023, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.349780082702637, "logits_per_token": -13.563512802124023, "logits_per_char": -1.2330466183749111, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 267, "native_id": "db75e16788cf56d5dfb9773eaf91fe7e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.247753143310547, "incorrect_loss_raw": 11.501623392105103, "correct_loss_per_char": 0.9164170159233941, "incorrect_loss_per_char": 0.9367376055982377, "correct_loss_per_token": 8.247753143310547, "incorrect_loss_per_token": 5.457339843114218, "correct_loss_uncond": -6.620765686035156, "incorrect_loss_uncond": -5.753584384918213}, "model_output": [{"sum_logits": -6.9036664962768555, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.241378784179688, "logits_per_token": -6.9036664962768555, "logits_per_char": -0.8629583120346069, "num_chars": 8}, {"sum_logits": -14.698524475097656, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -17.605648040771484, "logits_per_token": -4.899508158365886, "logits_per_char": -0.8165846930609809, "num_chars": 18}, {"sum_logits": -11.348503112792969, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.327265739440918, "logits_per_token": -5.674251556396484, "logits_per_char": -1.134850311279297, "num_chars": 10}, {"sum_logits": -13.05579948425293, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -21.846538543701172, "logits_per_token": -4.3519331614176435, "logits_per_char": -0.9325571060180664, "num_chars": 14}, {"sum_logits": -8.247753143310547, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.868518829345703, "logits_per_token": -8.247753143310547, "logits_per_char": -0.9164170159233941, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 268, "native_id": "ffd89796a9b09bef56c5803f188764c6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.07303524017334, "incorrect_loss_raw": 13.63730263710022, "correct_loss_per_char": 0.607303524017334, "incorrect_loss_per_char": 1.042004576945678, "correct_loss_per_token": 3.03651762008667, "incorrect_loss_per_token": 6.81865131855011, "correct_loss_uncond": -13.998278617858887, "incorrect_loss_uncond": -8.801324605941772}, "model_output": [{"sum_logits": -6.07303524017334, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.071313858032227, "logits_per_token": -3.03651762008667, "logits_per_char": -0.607303524017334, "num_chars": 10}, {"sum_logits": -14.309762954711914, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -24.036039352416992, "logits_per_token": -7.154881477355957, "logits_per_char": -1.0221259253365653, "num_chars": 14}, {"sum_logits": -12.721796035766602, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -21.538015365600586, "logits_per_token": -6.360898017883301, "logits_per_char": -1.1565269123424182, "num_chars": 11}, {"sum_logits": -15.098398208618164, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.88605499267578, "logits_per_token": -7.549199104309082, "logits_per_char": -1.1614152468167818, "num_chars": 13}, {"sum_logits": -12.4192533493042, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -23.29439926147461, "logits_per_token": -6.2096266746521, "logits_per_char": -0.8279502232869466, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 269, "native_id": "5622e49306bb82ec1cec817ad0506c60", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.520051956176758, "incorrect_loss_raw": 11.491410970687866, "correct_loss_per_char": 0.5520051956176758, "incorrect_loss_per_char": 1.6125649826867239, "correct_loss_per_token": 5.520051956176758, "incorrect_loss_per_token": 11.491410970687866, "correct_loss_uncond": -7.381200790405273, "incorrect_loss_uncond": -1.9992527961730957}, "model_output": [{"sum_logits": -7.497803688049316, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.551656723022461, "logits_per_token": -7.497803688049316, "logits_per_char": -1.4995607376098632, "num_chars": 5}, {"sum_logits": -12.612251281738281, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.002641677856445, "logits_per_token": -12.612251281738281, "logits_per_char": -1.1465682983398438, "num_chars": 11}, {"sum_logits": -5.520051956176758, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.901252746582031, "logits_per_token": -5.520051956176758, "logits_per_char": -0.5520051956176758, "num_chars": 10}, {"sum_logits": -14.894691467285156, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.484378814697266, "logits_per_token": -14.894691467285156, "logits_per_char": -1.063906533377511, "num_chars": 14}, {"sum_logits": -10.960897445678711, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.923977851867676, "logits_per_token": -10.960897445678711, "logits_per_char": -2.7402243614196777, "num_chars": 4}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 270, "native_id": "6efaeb796307036719635242fa5ad0f3", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.965036392211914, "incorrect_loss_raw": 11.9794282913208, "correct_loss_per_char": 0.994172732035319, "incorrect_loss_per_char": 0.892409574796283, "correct_loss_per_token": 5.965036392211914, "incorrect_loss_per_token": 6.375303904215495, "correct_loss_uncond": -8.593667030334473, "incorrect_loss_uncond": -6.412398099899292}, "model_output": [{"sum_logits": -8.48745346069336, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.866621971130371, "logits_per_token": -8.48745346069336, "logits_per_char": -1.06093168258667, "num_chars": 8}, {"sum_logits": -10.598152160644531, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.2464542388916, "logits_per_token": -5.299076080322266, "logits_per_char": -0.7065434773763021, "num_chars": 15}, {"sum_logits": -12.6239013671875, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.775146484375, "logits_per_token": -6.31195068359375, "logits_per_char": -0.9017072405133929, "num_chars": 14}, {"sum_logits": -16.208206176757812, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.6790828704834, "logits_per_token": -5.4027353922526045, "logits_per_char": -0.9004558987087674, "num_chars": 18}, {"sum_logits": -5.965036392211914, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.558703422546387, "logits_per_token": -5.965036392211914, "logits_per_char": -0.994172732035319, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 271, "native_id": "114d310d1198abffaf8b88dab5a55aa7", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.479604244232178, "incorrect_loss_raw": 14.809026002883911, "correct_loss_per_char": 0.5890549312938343, "incorrect_loss_per_char": 1.1617208153217824, "correct_loss_per_token": 3.239802122116089, "incorrect_loss_per_token": 7.211652874946594, "correct_loss_uncond": -7.616129398345947, "incorrect_loss_uncond": -4.620267391204834}, "model_output": [{"sum_logits": -20.572174072265625, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.3974666595459, "logits_per_token": -6.857391357421875, "logits_per_char": -0.9350988214666193, "num_chars": 22}, {"sum_logits": -13.032614707946777, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.646121978759766, "logits_per_token": -6.516307353973389, "logits_per_char": -1.0860512256622314, "num_chars": 12}, {"sum_logits": -6.479604244232178, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.095733642578125, "logits_per_token": -3.239802122116089, "logits_per_char": -0.5890549312938343, "num_chars": 11}, {"sum_logits": -20.316804885864258, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.089855194091797, "logits_per_token": -10.158402442932129, "logits_per_char": -1.5628311450664814, "num_chars": 13}, {"sum_logits": -5.314510345458984, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.58372974395752, "logits_per_token": -5.314510345458984, "logits_per_char": -1.062902069091797, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 272, "native_id": "0f79faf5337706f2e0e39c15bbd2e99a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.490487575531006, "incorrect_loss_raw": 10.726374387741089, "correct_loss_per_char": 0.5490487575531006, "incorrect_loss_per_char": 1.42776949206988, "correct_loss_per_token": 2.745243787765503, "incorrect_loss_per_token": 9.619213581085205, "correct_loss_uncond": -13.56035852432251, "incorrect_loss_uncond": -5.019579172134399}, "model_output": [{"sum_logits": -10.936688423156738, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.644584655761719, "logits_per_token": -10.936688423156738, "logits_per_char": -1.3670860528945923, "num_chars": 8}, {"sum_logits": -5.490487575531006, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.050846099853516, "logits_per_token": -2.745243787765503, "logits_per_char": -0.5490487575531006, "num_chars": 10}, {"sum_logits": -8.85728645324707, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.276601791381836, "logits_per_token": -4.428643226623535, "logits_per_char": -0.4920714696248372, "num_chars": 18}, {"sum_logits": -12.239616394042969, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.646322250366211, "logits_per_token": -12.239616394042969, "logits_per_char": -2.039936065673828, "num_chars": 6}, {"sum_logits": -10.871906280517578, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.416305541992188, "logits_per_token": -10.871906280517578, "logits_per_char": -1.811984380086263, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 273, "native_id": "b62d7d1b5eec31be0b65146a9fc069e0", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.203527450561523, "incorrect_loss_raw": 10.136844635009766, "correct_loss_per_char": 0.4002713423508864, "incorrect_loss_per_char": 0.9350118619194967, "correct_loss_per_token": 2.6017637252807617, "incorrect_loss_per_token": 7.470747470855713, "correct_loss_uncond": -15.949007034301758, "incorrect_loss_uncond": -7.535529613494873}, "model_output": [{"sum_logits": -8.356307983398438, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.669290542602539, "logits_per_token": -8.356307983398438, "logits_per_char": -0.9284786648220487, "num_chars": 9}, {"sum_logits": -5.203527450561523, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.15253448486328, "logits_per_token": -2.6017637252807617, "logits_per_char": -0.4002713423508864, "num_chars": 13}, {"sum_logits": -10.807655334472656, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.941911697387695, "logits_per_token": -5.403827667236328, "logits_per_char": -0.7719753810337612, "num_chars": 14}, {"sum_logits": -10.521121978759766, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -24.259071350097656, "logits_per_token": -5.260560989379883, "logits_per_char": -1.0521121978759767, "num_chars": 10}, {"sum_logits": -10.862293243408203, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.819223403930664, "logits_per_token": -10.862293243408203, "logits_per_char": -0.9874812039462003, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 274, "native_id": "1342c6aec9f5179d6ea6fa5fefbe5188", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.320453643798828, "incorrect_loss_raw": 9.533268809318542, "correct_loss_per_char": 0.7371752602713448, "incorrect_loss_per_char": 0.9862645692550219, "correct_loss_per_token": 2.580113410949707, "incorrect_loss_per_token": 4.373826126257578, "correct_loss_uncond": -10.738990783691406, "incorrect_loss_uncond": -8.0506831407547}, "model_output": [{"sum_logits": -9.427398681640625, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.15640640258789, "logits_per_token": -3.1424662272135415, "logits_per_char": -1.1784248352050781, "num_chars": 8}, {"sum_logits": -15.205821990966797, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.625776290893555, "logits_per_token": -7.602910995483398, "logits_per_char": -1.5205821990966797, "num_chars": 10}, {"sum_logits": -4.318098545074463, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.906187057495117, "logits_per_token": -2.1590492725372314, "logits_per_char": -0.5397623181343079, "num_chars": 8}, {"sum_logits": -10.320453643798828, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.059444427490234, "logits_per_token": -2.580113410949707, "logits_per_char": -0.7371752602713448, "num_chars": 14}, {"sum_logits": -9.181756019592285, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.647438049316406, "logits_per_token": -4.590878009796143, "logits_per_char": -0.7062889245840219, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 275, "native_id": "c74ae684ba6c76e2a913493483678c9d", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.055957794189453, "incorrect_loss_raw": 10.181506514549255, "correct_loss_per_char": 0.5879964828491211, "incorrect_loss_per_char": 1.2158092330706443, "correct_loss_per_token": 3.5279788970947266, "incorrect_loss_per_token": 8.068368792533875, "correct_loss_uncond": -8.305681228637695, "incorrect_loss_uncond": -4.795185208320618}, "model_output": [{"sum_logits": -7.055957794189453, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.361639022827148, "logits_per_token": -3.5279788970947266, "logits_per_char": -0.5879964828491211, "num_chars": 12}, {"sum_logits": -8.99341869354248, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.721421241760254, "logits_per_token": -8.99341869354248, "logits_per_char": -1.2847740990774972, "num_chars": 7}, {"sum_logits": -16.905101776123047, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.025089263916016, "logits_per_token": -8.452550888061523, "logits_per_char": -0.9944177515366498, "num_chars": 17}, {"sum_logits": -10.536125183105469, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.21864128112793, "logits_per_token": -10.536125183105469, "logits_per_char": -2.107225036621094, "num_chars": 5}, {"sum_logits": -4.291380405426025, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.941615104675293, "logits_per_token": -4.291380405426025, "logits_per_char": -0.47682004504733616, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 276, "native_id": "411e50225637b76187cc36b24fe3127c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.699788808822632, "incorrect_loss_raw": 7.9640339612960815, "correct_loss_per_char": 0.3363444371656938, "incorrect_loss_per_char": 1.2928854337105384, "correct_loss_per_token": 1.849894404411316, "incorrect_loss_per_token": 7.9640339612960815, "correct_loss_uncond": -14.833952188491821, "incorrect_loss_uncond": -4.771504521369934}, "model_output": [{"sum_logits": -8.925078392028809, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.676141738891602, "logits_per_token": -8.925078392028809, "logits_per_char": -1.7850156784057618, "num_chars": 5}, {"sum_logits": -9.747444152832031, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.202109336853027, "logits_per_token": -9.747444152832031, "logits_per_char": -0.7498033963716947, "num_chars": 13}, {"sum_logits": -3.699788808822632, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.533740997314453, "logits_per_token": -1.849894404411316, "logits_per_char": -0.3363444371656938, "num_chars": 11}, {"sum_logits": -5.43976354598999, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.951799392700195, "logits_per_token": -5.43976354598999, "logits_per_char": -1.0879527091979981, "num_chars": 5}, {"sum_logits": -7.743849754333496, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.112103462219238, "logits_per_token": -7.743849754333496, "logits_per_char": -1.5487699508666992, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 277, "native_id": "2a0e82bbf1471290c93c8f2a11af197f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.23670768737793, "incorrect_loss_raw": 10.357438564300537, "correct_loss_per_char": 0.823670768737793, "incorrect_loss_per_char": 1.1207995239849928, "correct_loss_per_token": 4.118353843688965, "incorrect_loss_per_token": 6.809979557991028, "correct_loss_uncond": -10.38062858581543, "incorrect_loss_uncond": -5.755365610122681}, "model_output": [{"sum_logits": -10.832453727722168, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.669736862182617, "logits_per_token": -5.416226863861084, "logits_per_char": -1.5474933896745955, "num_chars": 7}, {"sum_logits": -10.249826431274414, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.928194999694824, "logits_per_token": -10.249826431274414, "logits_per_char": -1.0249826431274414, "num_chars": 10}, {"sum_logits": -8.23670768737793, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.61733627319336, "logits_per_token": -4.118353843688965, "logits_per_char": -0.823670768737793, "num_chars": 10}, {"sum_logits": -7.187060356140137, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.16602897644043, "logits_per_token": -7.187060356140137, "logits_per_char": -0.8983825445175171, "num_chars": 8}, {"sum_logits": -13.16041374206543, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.687255859375, "logits_per_token": -4.386804580688477, "logits_per_char": -1.0123395186204176, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 278, "native_id": "eaadd7a4b18cb48c00f85c3975750fe7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.514162540435791, "incorrect_loss_raw": 12.624337196350098, "correct_loss_per_char": 0.3224401814596994, "incorrect_loss_per_char": 1.722903714577357, "correct_loss_per_token": 4.514162540435791, "incorrect_loss_per_token": 10.91062605381012, "correct_loss_uncond": -8.320957660675049, "incorrect_loss_uncond": -1.5200004577636719}, "model_output": [{"sum_logits": -4.514162540435791, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.83512020111084, "logits_per_token": -4.514162540435791, "logits_per_char": -0.3224401814596994, "num_chars": 14}, {"sum_logits": -9.698518753051758, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.492706298828125, "logits_per_token": -9.698518753051758, "logits_per_char": -1.616419792175293, "num_chars": 6}, {"sum_logits": -13.709689140319824, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.906187057495117, "logits_per_token": -6.854844570159912, "logits_per_char": -1.713711142539978, "num_chars": 8}, {"sum_logits": -14.300593376159668, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.802566528320312, "logits_per_token": -14.300593376159668, "logits_per_char": -1.4300593376159667, "num_chars": 10}, {"sum_logits": -12.78854751586914, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.375890731811523, "logits_per_token": -12.78854751586914, "logits_per_char": -2.13142458597819, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 279, "native_id": "403c9b067ef7363efffa822bb08c5426", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.3707683086395264, "incorrect_loss_raw": 11.716630578041077, "correct_loss_per_char": 0.3064334826035933, "incorrect_loss_per_char": 1.2357185144316065, "correct_loss_per_token": 1.1235894362131755, "incorrect_loss_per_token": 7.777974923451741, "correct_loss_uncond": -11.00831913948059, "incorrect_loss_uncond": -5.908828854560852}, "model_output": [{"sum_logits": -14.323765754699707, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.00579833984375, "logits_per_token": -7.1618828773498535, "logits_per_char": -0.7538824081420898, "num_chars": 19}, {"sum_logits": -3.3707683086395264, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.379087448120117, "logits_per_token": -1.1235894362131755, "logits_per_char": -0.3064334826035933, "num_chars": 11}, {"sum_logits": -12.88910961151123, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -23.492816925048828, "logits_per_token": -4.296369870503743, "logits_per_char": -1.1717372374101118, "num_chars": 11}, {"sum_logits": -6.20048189163208, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.307762145996094, "logits_per_token": -6.20048189163208, "logits_per_char": -0.77506023645401, "num_chars": 8}, {"sum_logits": -13.453165054321289, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.695460319519043, "logits_per_token": -13.453165054321289, "logits_per_char": -2.242194175720215, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 280, "native_id": "adf228312401c9ff421a4da1b46bb70a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.674755096435547, "incorrect_loss_raw": 11.375971794128418, "correct_loss_per_char": 0.7624825068882534, "incorrect_loss_per_char": 1.089586388402515, "correct_loss_per_token": 3.558251698811849, "incorrect_loss_per_token": 5.143716653188069, "correct_loss_uncond": -7.372594833374023, "incorrect_loss_uncond": -6.676075458526611}, "model_output": [{"sum_logits": -13.669015884399414, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.983741760253906, "logits_per_token": -6.834507942199707, "logits_per_char": -1.3669015884399414, "num_chars": 10}, {"sum_logits": -8.284639358520508, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.54778289794922, "logits_per_token": -4.142319679260254, "logits_per_char": -0.5177899599075317, "num_chars": 16}, {"sum_logits": -10.487770080566406, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.709785461425781, "logits_per_token": -5.243885040283203, "logits_per_char": -1.7479616800944011, "num_chars": 6}, {"sum_logits": -13.062461853027344, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.96687889099121, "logits_per_token": -4.354153951009114, "logits_per_char": -0.7256923251681857, "num_chars": 18}, {"sum_logits": -10.674755096435547, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.04734992980957, "logits_per_token": -3.558251698811849, "logits_per_char": -0.7624825068882534, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 281, "native_id": "57c85e4c7ea2501ef9d8f304b524e2e4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.354869842529297, "incorrect_loss_raw": 9.004387140274048, "correct_loss_per_char": 0.4462391535441081, "incorrect_loss_per_char": 0.8979352065495082, "correct_loss_per_token": 2.6774349212646484, "incorrect_loss_per_token": 5.110095739364624, "correct_loss_uncond": -13.740835189819336, "incorrect_loss_uncond": -7.625872611999512}, "model_output": [{"sum_logits": -8.909114837646484, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.036041259765625, "logits_per_token": -4.454557418823242, "logits_per_char": -0.6363653455461774, "num_chars": 14}, {"sum_logits": -5.354869842529297, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.095705032348633, "logits_per_token": -2.6774349212646484, "logits_per_char": -0.4462391535441081, "num_chars": 12}, {"sum_logits": -11.76766586303711, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.759206771850586, "logits_per_token": -5.883832931518555, "logits_per_char": -1.176766586303711, "num_chars": 10}, {"sum_logits": -4.863217353820801, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.641695976257324, "logits_per_token": -4.863217353820801, "logits_per_char": -0.9726434707641601, "num_chars": 5}, {"sum_logits": -10.477550506591797, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.084095001220703, "logits_per_token": -5.238775253295898, "logits_per_char": -0.8059654235839844, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 282, "native_id": "c22f30eee57f7191ee07e9a916460f68", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.8379266262054443, "incorrect_loss_raw": 9.234782099723816, "correct_loss_per_char": 0.42643629180060494, "incorrect_loss_per_char": 1.3672604231607348, "correct_loss_per_token": 3.8379266262054443, "incorrect_loss_per_token": 8.26279991865158, "correct_loss_uncond": -10.146557092666626, "incorrect_loss_uncond": -5.063538670539856}, "model_output": [{"sum_logits": -3.8379266262054443, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.98448371887207, "logits_per_token": -3.8379266262054443, "logits_per_char": -0.42643629180060494, "num_chars": 9}, {"sum_logits": -9.591617584228516, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.06324291229248, "logits_per_token": -9.591617584228516, "logits_per_char": -1.3702310834612166, "num_chars": 7}, {"sum_logits": -9.554336547851562, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.375931739807129, "logits_per_token": -9.554336547851562, "logits_per_char": -1.9108673095703126, "num_chars": 5}, {"sum_logits": -7.775857448577881, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.929718017578125, "logits_per_token": -3.8879287242889404, "logits_per_char": -0.5183904965718588, "num_chars": 15}, {"sum_logits": -10.017316818237305, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.824390411376953, "logits_per_token": -10.017316818237305, "logits_per_char": -1.6695528030395508, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 283, "native_id": "026cb9c07a583ec933f2c4c67ae73836", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.003998279571533, "incorrect_loss_raw": 11.270932912826538, "correct_loss_per_char": 1.4007996559143066, "incorrect_loss_per_char": 0.8360007647524543, "correct_loss_per_token": 7.003998279571533, "incorrect_loss_per_token": 5.716971278190613, "correct_loss_uncond": -6.15524435043335, "incorrect_loss_uncond": -8.18482518196106}, "model_output": [{"sum_logits": -6.5970563888549805, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.65196990966797, "logits_per_token": -3.2985281944274902, "logits_per_char": -0.5997323989868164, "num_chars": 11}, {"sum_logits": -7.003998279571533, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.159242630004883, "logits_per_token": -7.003998279571533, "logits_per_char": -1.4007996559143066, "num_chars": 5}, {"sum_logits": -21.128562927246094, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -24.1092586517334, "logits_per_token": -7.042854309082031, "logits_per_char": -1.242856642779182, "num_chars": 17}, {"sum_logits": -7.247414588928223, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.904268264770508, "logits_per_token": -2.415804862976074, "logits_per_char": -0.6588558717207476, "num_chars": 11}, {"sum_logits": -10.110697746276855, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -10.110697746276855, "logits_per_char": -0.8425581455230713, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 284, "native_id": "c57ed32566a2db1ec3d6e4fd595b9d05", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.727682113647461, "incorrect_loss_raw": 14.69028902053833, "correct_loss_per_char": 0.39574600668514476, "incorrect_loss_per_char": 0.9343260384569265, "correct_loss_per_token": 2.242560704549154, "incorrect_loss_per_token": 6.175295972824097, "correct_loss_uncond": -11.839418411254883, "incorrect_loss_uncond": -4.746050834655762}, "model_output": [{"sum_logits": -11.59266471862793, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.462339401245117, "logits_per_token": -5.796332359313965, "logits_per_char": -1.0538786107843572, "num_chars": 11}, {"sum_logits": -15.597980499267578, "num_tokens": 5, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -24.571910858154297, "logits_per_token": -3.1195960998535157, "logits_per_char": -0.7798990249633789, "num_chars": 20}, {"sum_logits": -6.727682113647461, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.567100524902344, "logits_per_token": -2.242560704549154, "logits_per_char": -0.39574600668514476, "num_chars": 17}, {"sum_logits": -18.10567855834961, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.89319610595703, "logits_per_token": -9.052839279174805, "logits_per_char": -1.0058710310194228, "num_chars": 18}, {"sum_logits": -13.464832305908203, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.817913055419922, "logits_per_token": -6.732416152954102, "logits_per_char": -0.8976554870605469, "num_chars": 15}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 285, "native_id": "93b52e7ea1acf10db891e9355e234123", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.6574918031692505, "incorrect_loss_raw": 9.705471515655518, "correct_loss_per_char": 0.10359323769807816, "incorrect_loss_per_char": 1.0477451597358667, "correct_loss_per_token": 0.5524972677230835, "incorrect_loss_per_token": 5.9274821281433105, "correct_loss_uncond": -15.83047091960907, "incorrect_loss_uncond": -7.613163948059082}, "model_output": [{"sum_logits": -1.6574918031692505, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -17.48796272277832, "logits_per_token": -0.5524972677230835, "logits_per_char": -0.10359323769807816, "num_chars": 16}, {"sum_logits": -6.914262771606445, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.279050827026367, "logits_per_token": -3.4571313858032227, "logits_per_char": -0.4067213395062615, "num_chars": 17}, {"sum_logits": -14.769814491271973, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -23.63684844970703, "logits_per_token": -7.384907245635986, "logits_per_char": -0.9231134057044983, "num_chars": 16}, {"sum_logits": -8.597970962524414, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -10.994718551635742, "logits_per_token": -8.597970962524414, "logits_per_char": -2.1494927406311035, "num_chars": 4}, {"sum_logits": -8.539837837219238, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.363924026489258, "logits_per_token": -4.269918918609619, "logits_per_char": -0.7116531531016032, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 286, "native_id": "dbdad44029098d4b1d202d6d857d6092", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.8631887435913086, "incorrect_loss_raw": 9.307536244392395, "correct_loss_per_char": 0.6438647905985514, "incorrect_loss_per_char": 1.2827905884810857, "correct_loss_per_token": 3.8631887435913086, "incorrect_loss_per_token": 9.307536244392395, "correct_loss_uncond": -8.026464462280273, "incorrect_loss_uncond": -5.636591792106628}, "model_output": [{"sum_logits": -3.8631887435913086, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -3.8631887435913086, "logits_per_char": -0.6438647905985514, "num_chars": 6}, {"sum_logits": -10.496068000793457, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.241378784179688, "logits_per_token": -10.496068000793457, "logits_per_char": -1.3120085000991821, "num_chars": 8}, {"sum_logits": -7.931298732757568, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.001758575439453, "logits_per_token": -7.931298732757568, "logits_per_char": -1.133042676108224, "num_chars": 7}, {"sum_logits": -8.94685173034668, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.901138305664062, "logits_per_token": -8.94685173034668, "logits_per_char": -1.2781216757638114, "num_chars": 7}, {"sum_logits": -9.855926513671875, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.63223648071289, "logits_per_token": -9.855926513671875, "logits_per_char": -1.407989501953125, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 287, "native_id": "69d0f70c173dda17934836d618ca7093", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.638731002807617, "incorrect_loss_raw": 7.1051353216171265, "correct_loss_per_char": 0.6170522144862584, "incorrect_loss_per_char": 1.1495604758461315, "correct_loss_per_token": 2.8795770009358725, "incorrect_loss_per_token": 6.220205744107565, "correct_loss_uncond": -9.552453994750977, "incorrect_loss_uncond": -7.079553246498108}, "model_output": [{"sum_logits": -10.673983573913574, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.32010555267334, "logits_per_token": -10.673983573913574, "logits_per_char": -1.778997262318929, "num_chars": 6}, {"sum_logits": -5.309577465057373, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -16.066938400268555, "logits_per_token": -1.7698591550191243, "logits_per_char": -0.3318485915660858, "num_chars": 16}, {"sum_logits": -8.638731002807617, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -18.191184997558594, "logits_per_token": -2.8795770009358725, "logits_per_char": -0.6170522144862584, "num_chars": 14}, {"sum_logits": -9.176878929138184, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.743276596069336, "logits_per_token": -9.176878929138184, "logits_per_char": -1.8353757858276367, "num_chars": 5}, {"sum_logits": -3.260101318359375, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.608433723449707, "logits_per_token": -3.260101318359375, "logits_per_char": -0.652020263671875, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 288, "native_id": "e5697a25935c5249d2108f55e245f3e4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.38434898853302, "incorrect_loss_raw": 11.540038585662842, "correct_loss_per_char": 0.346087247133255, "incorrect_loss_per_char": 1.1810810211993732, "correct_loss_per_token": 1.38434898853302, "incorrect_loss_per_token": 6.786720037460327, "correct_loss_uncond": -9.7882479429245, "incorrect_loss_uncond": -5.392791748046875}, "model_output": [{"sum_logits": -7.241119384765625, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.608536720275879, "logits_per_token": -7.241119384765625, "logits_per_char": -1.0344456263950892, "num_chars": 7}, {"sum_logits": -6.189873695373535, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.892474174499512, "logits_per_token": -6.189873695373535, "logits_per_char": -0.8842676707676479, "num_chars": 7}, {"sum_logits": -1.38434898853302, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": true, "sum_logits_uncond": -11.17259693145752, "logits_per_token": -1.38434898853302, "logits_per_char": -0.346087247133255, "num_chars": 4}, {"sum_logits": -15.89216136932373, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -21.1709041595459, "logits_per_token": -5.29738712310791, "logits_per_char": -0.9348330217249253, "num_chars": 17}, {"sum_logits": -16.836999893188477, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -20.059406280517578, "logits_per_token": -8.418499946594238, "logits_per_char": -1.8707777659098308, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 289, "native_id": "99af85081085e6228c6d78c95be01968", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.368926048278809, "incorrect_loss_raw": 8.118208050727844, "correct_loss_per_char": 0.7368926048278809, "incorrect_loss_per_char": 1.100808018807209, "correct_loss_per_token": 7.368926048278809, "incorrect_loss_per_token": 6.249011814594269, "correct_loss_uncond": -6.855681419372559, "incorrect_loss_uncond": -8.35011374950409}, "model_output": [{"sum_logits": -10.028120040893555, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.032623291015625, "logits_per_token": -10.028120040893555, "logits_per_char": -1.4325885772705078, "num_chars": 7}, {"sum_logits": -6.009286403656006, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.772855758666992, "logits_per_token": -3.004643201828003, "logits_per_char": -0.5007738669713339, "num_chars": 12}, {"sum_logits": -7.491142272949219, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.781240463256836, "logits_per_token": -7.491142272949219, "logits_per_char": -0.6810129339044745, "num_chars": 11}, {"sum_logits": -8.944283485412598, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.28656768798828, "logits_per_token": -4.472141742706299, "logits_per_char": -1.7888566970825195, "num_chars": 5}, {"sum_logits": -7.368926048278809, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.224607467651367, "logits_per_token": -7.368926048278809, "logits_per_char": -0.7368926048278809, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 290, "native_id": "235094c966bcbdc94701b41b969f9c75", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 15.207695007324219, "incorrect_loss_raw": 12.334667682647705, "correct_loss_per_char": 0.6336539586385092, "incorrect_loss_per_char": 1.1524298330148062, "correct_loss_per_token": 5.069231669108073, "incorrect_loss_per_token": 9.776789983113606, "correct_loss_uncond": -4.242105484008789, "incorrect_loss_uncond": -1.3009521961212158}, "model_output": [{"sum_logits": -15.34726619720459, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.487008094787598, "logits_per_token": -5.115755399068196, "logits_per_char": -0.8526258998446994, "num_chars": 18}, {"sum_logits": -15.207695007324219, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.449800491333008, "logits_per_token": -5.069231669108073, "logits_per_char": -0.6336539586385092, "num_chars": 24}, {"sum_logits": -10.923850059509277, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.593781471252441, "logits_per_token": -10.923850059509277, "logits_per_char": -1.213761117723253, "num_chars": 9}, {"sum_logits": -13.604479789733887, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.104439735412598, "logits_per_token": -13.604479789733887, "logits_per_char": -1.3604479789733888, "num_chars": 10}, {"sum_logits": -9.463074684143066, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.357250213623047, "logits_per_token": -9.463074684143066, "logits_per_char": -1.1828843355178833, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 291, "native_id": "99789083502af9bf111876a00fae44ac", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.360715866088867, "incorrect_loss_raw": 6.629060328006744, "correct_loss_per_char": 0.8739012204683744, "incorrect_loss_per_char": 0.7600327550418793, "correct_loss_per_token": 11.360715866088867, "incorrect_loss_per_token": 4.248309075832367, "correct_loss_uncond": -1.8413934707641602, "incorrect_loss_uncond": -8.064732730388641}, "model_output": [{"sum_logits": -5.22668981552124, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.927764892578125, "logits_per_token": -5.22668981552124, "logits_per_char": -0.7466699736458915, "num_chars": 7}, {"sum_logits": -6.1518707275390625, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.236899375915527, "logits_per_token": -3.0759353637695312, "logits_per_char": -0.6835411919487847, "num_chars": 9}, {"sum_logits": -11.360715866088867, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.202109336853027, "logits_per_token": -11.360715866088867, "logits_per_char": -0.8739012204683744, "num_chars": 13}, {"sum_logits": -12.894139289855957, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.262165069580078, "logits_per_token": -6.4470696449279785, "logits_per_char": -1.2894139289855957, "num_chars": 10}, {"sum_logits": -2.2435414791107178, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.348342895507812, "logits_per_token": -2.2435414791107178, "logits_per_char": -0.3205059255872454, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 292, "native_id": "1d44fb5f4b7f1e23ff6c1c083db81ba1", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.7418103218078613, "incorrect_loss_raw": 13.914364099502563, "correct_loss_per_char": 0.24925548380071466, "incorrect_loss_per_char": 1.2495048575931125, "correct_loss_per_token": 1.3709051609039307, "incorrect_loss_per_token": 5.791190346082051, "correct_loss_uncond": -12.936875820159912, "incorrect_loss_uncond": -5.082871675491333}, "model_output": [{"sum_logits": -16.772600173950195, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.917970657348633, "logits_per_token": -8.386300086975098, "logits_per_char": -1.8636222415500217, "num_chars": 9}, {"sum_logits": -14.417675971984863, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.204669952392578, "logits_per_token": -7.208837985992432, "logits_per_char": -1.601963996887207, "num_chars": 9}, {"sum_logits": -7.033241271972656, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.955930709838867, "logits_per_token": -1.758310317993164, "logits_per_char": -0.37017059326171875, "num_chars": 19}, {"sum_logits": -17.43393898010254, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.910371780395508, "logits_per_token": -5.811312993367513, "logits_per_char": -1.1622625986735027, "num_chars": 15}, {"sum_logits": -2.7418103218078613, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.678686141967773, "logits_per_token": -1.3709051609039307, "logits_per_char": -0.24925548380071466, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 293, "native_id": "194b66240f6fab75749c1e30ed09ea09", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.640993595123291, "incorrect_loss_raw": 16.665488958358765, "correct_loss_per_char": 0.5801241993904114, "incorrect_loss_per_char": 1.4505925406466473, "correct_loss_per_token": 4.640993595123291, "incorrect_loss_per_token": 7.565150459607442, "correct_loss_uncond": -7.281270503997803, "incorrect_loss_uncond": -3.717843770980835}, "model_output": [{"sum_logits": -15.354374885559082, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.247764587402344, "logits_per_token": -7.677187442779541, "logits_per_char": -1.09674106325422, "num_chars": 14}, {"sum_logits": -13.995016098022461, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.218246459960938, "logits_per_token": -6.9975080490112305, "logits_per_char": -1.3995016098022461, "num_chars": 10}, {"sum_logits": -18.422256469726562, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.404645919799805, "logits_per_token": -6.1407521565755205, "logits_per_char": -1.417096651517428, "num_chars": 13}, {"sum_logits": -18.890308380126953, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.662673950195312, "logits_per_token": -9.445154190063477, "logits_per_char": -1.8890308380126952, "num_chars": 10}, {"sum_logits": -4.640993595123291, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.922264099121094, "logits_per_token": -4.640993595123291, "logits_per_char": -0.5801241993904114, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 294, "native_id": "83dad4fe630fddbdcd5b18ef890c66f2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.525606155395508, "incorrect_loss_raw": 7.731634974479675, "correct_loss_per_char": 0.5019697042611929, "incorrect_loss_per_char": 0.5203053091253553, "correct_loss_per_token": 2.1752020517985025, "incorrect_loss_per_token": 4.117393523454666, "correct_loss_uncond": -12.886735916137695, "incorrect_loss_uncond": -9.016002535820007}, "model_output": [{"sum_logits": -7.7861104011535645, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.106239318847656, "logits_per_token": -1.9465276002883911, "logits_per_char": -0.37076716195969356, "num_chars": 21}, {"sum_logits": -6.630470275878906, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.929718017578125, "logits_per_token": -3.315235137939453, "logits_per_char": -0.4420313517252604, "num_chars": 15}, {"sum_logits": -5.90566349029541, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.331343650817871, "logits_per_token": -5.90566349029541, "logits_per_char": -0.7382079362869263, "num_chars": 8}, {"sum_logits": -10.60429573059082, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.623249053955078, "logits_per_token": -5.30214786529541, "logits_per_char": -0.530214786529541, "num_chars": 20}, {"sum_logits": -6.525606155395508, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.412342071533203, "logits_per_token": -2.1752020517985025, "logits_per_char": -0.5019697042611929, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 295, "native_id": "3ebc5ddd2e97fe37fcb52aa2a9e2e1a7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.648179054260254, "incorrect_loss_raw": 16.045883655548096, "correct_loss_per_char": 0.5134708231145685, "incorrect_loss_per_char": 1.0561243408529093, "correct_loss_per_token": 2.824089527130127, "incorrect_loss_per_token": 11.425895690917969, "correct_loss_uncond": -13.842818260192871, "incorrect_loss_uncond": -1.6228315830230713}, "model_output": [{"sum_logits": -11.986504554748535, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.627424240112305, "logits_per_token": -11.986504554748535, "logits_per_char": -1.089682232249867, "num_chars": 11}, {"sum_logits": -5.648179054260254, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.490997314453125, "logits_per_token": -2.824089527130127, "logits_per_char": -0.5134708231145685, "num_chars": 11}, {"sum_logits": -15.237126350402832, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.904881477355957, "logits_per_token": -15.237126350402832, "logits_per_char": -1.0158084233601887, "num_chars": 15}, {"sum_logits": -18.320167541503906, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.131317138671875, "logits_per_token": -9.160083770751953, "logits_per_char": -1.3085833958217077, "num_chars": 14}, {"sum_logits": -18.63973617553711, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.01123809814453, "logits_per_token": -9.319868087768555, "logits_per_char": -0.8104233119798743, "num_chars": 23}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 296, "native_id": "9ed019338a48216de9eadf64faaf1ce0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.206809043884277, "incorrect_loss_raw": 10.330630540847778, "correct_loss_per_char": 0.7460735494440253, "incorrect_loss_per_char": 1.1331218450497358, "correct_loss_per_token": 2.7356030146280923, "incorrect_loss_per_token": 7.422492623329163, "correct_loss_uncond": -6.17227840423584, "incorrect_loss_uncond": -3.998960494995117}, "model_output": [{"sum_logits": -10.034099578857422, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -10.034099578857422, "logits_per_char": -1.6723499298095703, "num_chars": 6}, {"sum_logits": -12.719626426696777, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.010452270507812, "logits_per_token": -6.359813213348389, "logits_per_char": -0.9784328020535983, "num_chars": 13}, {"sum_logits": -8.023319244384766, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.307762145996094, "logits_per_token": -8.023319244384766, "logits_per_char": -1.0029149055480957, "num_chars": 8}, {"sum_logits": -8.206809043884277, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.379087448120117, "logits_per_token": -2.7356030146280923, "logits_per_char": -0.7460735494440253, "num_chars": 11}, {"sum_logits": -10.545476913452148, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.92875862121582, "logits_per_token": -5.272738456726074, "logits_per_char": -0.8787897427876791, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 297, "native_id": "d1d2585e0ba1160948b7c5822a99b7a1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.053129196166992, "incorrect_loss_raw": 11.430331707000732, "correct_loss_per_char": 0.6755215326944987, "incorrect_loss_per_char": 1.6852520184857505, "correct_loss_per_token": 4.053129196166992, "incorrect_loss_per_token": 11.430331707000732, "correct_loss_uncond": -9.637990951538086, "incorrect_loss_uncond": -1.8049039840698242}, "model_output": [{"sum_logits": -10.534385681152344, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -14.04529857635498, "logits_per_token": -10.534385681152344, "logits_per_char": -1.316798210144043, "num_chars": 8}, {"sum_logits": -4.053129196166992, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -13.691120147705078, "logits_per_token": -4.053129196166992, "logits_per_char": -0.6755215326944987, "num_chars": 6}, {"sum_logits": -9.225071907043457, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -11.10049819946289, "logits_per_token": -9.225071907043457, "logits_per_char": -1.8450143814086915, "num_chars": 5}, {"sum_logits": -18.701862335205078, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -14.218299865722656, "logits_per_token": -18.701862335205078, "logits_per_char": -2.6716946193150113, "num_chars": 7}, {"sum_logits": -7.260006904602051, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -7.260006904602051, "logits_per_char": -0.9075008630752563, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 298, "native_id": "e34a0d1331c6bd4574ffe308e3fbd389", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 13.254002571105957, "incorrect_loss_raw": 14.874681234359741, "correct_loss_per_char": 0.8283751606941223, "incorrect_loss_per_char": 1.2909069818163674, "correct_loss_per_token": 4.418000857035319, "incorrect_loss_per_token": 8.487499157587688, "correct_loss_uncond": -8.513854026794434, "incorrect_loss_uncond": -2.073636293411255}, "model_output": [{"sum_logits": -14.249592781066895, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.068875312805176, "logits_per_token": -14.249592781066895, "logits_per_char": -1.5832880867852106, "num_chars": 9}, {"sum_logits": -17.544973373413086, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.8701171875, "logits_per_token": -5.848324457804362, "logits_per_char": -1.2532123838152205, "num_chars": 14}, {"sum_logits": -12.189064025878906, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.598116874694824, "logits_per_token": -6.094532012939453, "logits_per_char": -1.2189064025878906, "num_chars": 10}, {"sum_logits": -15.515094757080078, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.256160736083984, "logits_per_token": -7.757547378540039, "logits_per_char": -1.1082210540771484, "num_chars": 14}, {"sum_logits": -13.254002571105957, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.76785659790039, "logits_per_token": -4.418000857035319, "logits_per_char": -0.8283751606941223, "num_chars": 16}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 299, "native_id": "4858669d0193e5d9384dc37d4bb5c00c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.51904296875, "incorrect_loss_raw": 11.479133367538452, "correct_loss_per_char": 0.35986328125, "incorrect_loss_per_char": 1.0076769325468276, "correct_loss_per_token": 2.51904296875, "incorrect_loss_per_token": 5.739566683769226, "correct_loss_uncond": -12.138249397277832, "incorrect_loss_uncond": -7.147981405258179}, "model_output": [{"sum_logits": -2.51904296875, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.657292366027832, "logits_per_token": -2.51904296875, "logits_per_char": -0.35986328125, "num_chars": 7}, {"sum_logits": -12.198646545410156, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.535062789916992, "logits_per_token": -6.099323272705078, "logits_per_char": -0.7624154090881348, "num_chars": 16}, {"sum_logits": -11.064876556396484, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.408836364746094, "logits_per_token": -5.532438278198242, "logits_per_char": -1.2294307284884982, "num_chars": 9}, {"sum_logits": -13.586366653442383, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.263147354125977, "logits_per_token": -6.793183326721191, "logits_per_char": -1.1321972211201985, "num_chars": 12}, {"sum_logits": -9.066643714904785, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.30141258239746, "logits_per_token": -4.533321857452393, "logits_per_char": -0.9066643714904785, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 300, "native_id": "8fd82cdc253835814153fe7222e9967c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.7396063804626465, "incorrect_loss_raw": 18.033427476882935, "correct_loss_per_char": 0.43087330731478607, "incorrect_loss_per_char": 1.349431945909365, "correct_loss_per_token": 2.3698031902313232, "incorrect_loss_per_token": 7.53462495803833, "correct_loss_uncond": -13.748234272003174, "incorrect_loss_uncond": -3.0000863075256348}, "model_output": [{"sum_logits": -14.756711959838867, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.047258377075195, "logits_per_token": -7.378355979919434, "logits_per_char": -1.1351316892183745, "num_chars": 13}, {"sum_logits": -10.676867485046387, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.445282936096191, "logits_per_token": -10.676867485046387, "logits_per_char": -1.5252667835780553, "num_chars": 7}, {"sum_logits": -26.12575340270996, "num_tokens": 5, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -30.778980255126953, "logits_per_token": -5.225150680541992, "logits_per_char": -1.4514307445949979, "num_chars": 18}, {"sum_logits": -20.574377059936523, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -23.862533569335938, "logits_per_token": -6.858125686645508, "logits_per_char": -1.2858985662460327, "num_chars": 16}, {"sum_logits": -4.7396063804626465, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.48784065246582, "logits_per_token": -2.3698031902313232, "logits_per_char": -0.43087330731478607, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 301, "native_id": "66458bf8599c3ef1e7b50fa527531882", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.253689765930176, "incorrect_loss_raw": 12.196953058242798, "correct_loss_per_char": 0.4835793177286784, "incorrect_loss_per_char": 1.0512547301736108, "correct_loss_per_token": 1.4507379531860352, "incorrect_loss_per_token": 7.349976181983948, "correct_loss_uncond": -11.810351371765137, "incorrect_loss_uncond": -3.790163516998291}, "model_output": [{"sum_logits": -13.051183700561523, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -10.885873794555664, "logits_per_token": -13.051183700561523, "logits_per_char": -1.186471245505593, "num_chars": 11}, {"sum_logits": -9.117559432983398, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -19.280977249145508, "logits_per_token": -3.039186477661133, "logits_per_char": -0.5363270254696116, "num_chars": 17}, {"sum_logits": -18.487049102783203, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -20.80449676513672, "logits_per_token": -9.243524551391602, "logits_per_char": -1.3205035073416573, "num_chars": 14}, {"sum_logits": -8.132019996643066, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.977118492126465, "logits_per_token": -4.066009998321533, "logits_per_char": -1.161717142377581, "num_chars": 7}, {"sum_logits": -7.253689765930176, "num_tokens": 5, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -19.064041137695312, "logits_per_token": -1.4507379531860352, "logits_per_char": -0.4835793177286784, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 302, "native_id": "879239b8a788f3c9e3dfdd0862f3d7c5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.023448944091797, "incorrect_loss_raw": 8.345336139202118, "correct_loss_per_char": 0.7023448944091797, "incorrect_loss_per_char": 0.9830331739151117, "correct_loss_per_token": 2.341149648030599, "incorrect_loss_per_token": 5.34949575861295, "correct_loss_uncond": -12.448183059692383, "incorrect_loss_uncond": -8.17813092470169}, "model_output": [{"sum_logits": -2.6725857257843018, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -15.714458465576172, "logits_per_token": -1.3362928628921509, "logits_per_char": -0.19089898041316442, "num_chars": 14}, {"sum_logits": -7.724215030670166, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -13.289554595947266, "logits_per_token": -7.724215030670166, "logits_per_char": -1.287369171778361, "num_chars": 6}, {"sum_logits": -7.023448944091797, "num_tokens": 3, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -19.47163200378418, "logits_per_token": -2.341149648030599, "logits_per_char": -0.7023448944091797, "num_chars": 10}, {"sum_logits": -7.013940811157227, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -14.53508186340332, "logits_per_token": -7.013940811157227, "logits_per_char": -1.0019915444510323, "num_chars": 7}, {"sum_logits": -15.970602989196777, "num_tokens": 3, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -22.554773330688477, "logits_per_token": -5.323534329732259, "logits_per_char": -1.451872999017889, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 303, "native_id": "8a69e6df5e8ad6c9e6828aa66c59d046", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.076193809509277, "incorrect_loss_raw": 17.47591280937195, "correct_loss_per_char": 0.725170544215611, "incorrect_loss_per_char": 1.593872981842118, "correct_loss_per_token": 5.076193809509277, "incorrect_loss_per_token": 11.491288979848225, "correct_loss_uncond": -10.112006187438965, "incorrect_loss_uncond": -1.0493402481079102}, "model_output": [{"sum_logits": -11.871225357055664, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -13.49835205078125, "logits_per_token": -11.871225357055664, "logits_per_char": -1.9785375595092773, "num_chars": 6}, {"sum_logits": -28.726194381713867, "num_tokens": 6, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -30.737266540527344, "logits_per_token": -4.787699063618978, "logits_per_char": -1.3057361082597212, "num_chars": 22}, {"sum_logits": -14.453559875488281, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.072489738464355, "logits_per_token": -14.453559875488281, "logits_per_char": -1.6059510972764757, "num_chars": 9}, {"sum_logits": -5.076193809509277, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.188199996948242, "logits_per_token": -5.076193809509277, "logits_per_char": -0.725170544215611, "num_chars": 7}, {"sum_logits": -14.85267162322998, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.792903900146484, "logits_per_token": -14.85267162322998, "logits_per_char": -1.485267162322998, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 304, "native_id": "8d275acea05fd16295c659c504576a9b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.697771072387695, "incorrect_loss_raw": 9.904390573501587, "correct_loss_per_char": 0.3355550765991211, "incorrect_loss_per_char": 1.425310675543968, "correct_loss_per_token": 2.3488855361938477, "incorrect_loss_per_token": 8.341550707817078, "correct_loss_uncond": -13.054704666137695, "incorrect_loss_uncond": -4.943194389343262}, "model_output": [{"sum_logits": -9.540739059448242, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.03783893585205, "logits_per_token": -9.540739059448242, "logits_per_char": -2.3851847648620605, "num_chars": 4}, {"sum_logits": -4.697771072387695, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.75247573852539, "logits_per_token": -2.3488855361938477, "logits_per_char": -0.3355550765991211, "num_chars": 14}, {"sum_logits": -10.265434265136719, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.434986114501953, "logits_per_token": -10.265434265136719, "logits_per_char": -1.2831792831420898, "num_chars": 8}, {"sum_logits": -8.335145950317383, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.45389175415039, "logits_per_token": -2.0837864875793457, "logits_per_char": -0.7577405409379439, "num_chars": 11}, {"sum_logits": -11.476243019104004, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.463623046875, "logits_per_token": -11.476243019104004, "logits_per_char": -1.2751381132337782, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 305, "native_id": "91629c6f9e4af3e6acf385eb23fd8068", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.761026382446289, "incorrect_loss_raw": 7.684666872024536, "correct_loss_per_char": 0.6918250813203699, "incorrect_loss_per_char": 0.9711445185873244, "correct_loss_per_token": 5.8805131912231445, "incorrect_loss_per_token": 5.236738085746765, "correct_loss_uncond": -8.59968376159668, "incorrect_loss_uncond": -6.779720783233643}, "model_output": [{"sum_logits": -13.515857696533203, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.407781600952148, "logits_per_token": -6.757928848266602, "logits_per_char": -1.501761966281467, "num_chars": 9}, {"sum_logits": -6.067572593688965, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.342933654785156, "logits_per_token": -3.0337862968444824, "logits_per_char": -0.5056310494740804, "num_chars": 12}, {"sum_logits": -3.9809517860412598, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.831037521362305, "logits_per_token": -3.9809517860412598, "logits_per_char": -0.44232797622680664, "num_chars": 9}, {"sum_logits": -11.761026382446289, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.36071014404297, "logits_per_token": -5.8805131912231445, "logits_per_char": -0.6918250813203699, "num_chars": 17}, {"sum_logits": -7.174285411834717, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.275797843933105, "logits_per_token": -7.174285411834717, "logits_per_char": -1.4348570823669433, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 306, "native_id": "59eb56f366407ac7db72996be265883b", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.58890438079834, "incorrect_loss_raw": 11.0508873462677, "correct_loss_per_char": 0.9486130475997925, "incorrect_loss_per_char": 1.712482722600301, "correct_loss_per_token": 7.58890438079834, "incorrect_loss_per_token": 11.0508873462677, "correct_loss_uncond": -3.059138298034668, "incorrect_loss_uncond": -1.6805870532989502}, "model_output": [{"sum_logits": -15.173641204833984, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.367981910705566, "logits_per_token": -15.173641204833984, "logits_per_char": -1.6859601338704426, "num_chars": 9}, {"sum_logits": -9.630163192749023, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.04476261138916, "logits_per_token": -9.630163192749023, "logits_per_char": -1.605027198791504, "num_chars": 6}, {"sum_logits": -9.76958179473877, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.468390464782715, "logits_per_token": -9.76958179473877, "logits_per_char": -1.953916358947754, "num_chars": 5}, {"sum_logits": -9.630163192749023, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.04476261138916, "logits_per_token": -9.630163192749023, "logits_per_char": -1.605027198791504, "num_chars": 6}, {"sum_logits": -7.58890438079834, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -10.648042678833008, "logits_per_token": -7.58890438079834, "logits_per_char": -0.9486130475997925, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 307, "native_id": "4ab069f2e979d51f2c5929f590d09982", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.154491424560547, "incorrect_loss_raw": 9.544083714485168, "correct_loss_per_char": 0.2967493874686105, "incorrect_loss_per_char": 1.5019880056381225, "correct_loss_per_token": 2.0772457122802734, "incorrect_loss_per_token": 9.544083714485168, "correct_loss_uncond": -12.423849105834961, "incorrect_loss_uncond": -4.231480717658997}, "model_output": [{"sum_logits": -7.977757930755615, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.636223793029785, "logits_per_token": -7.977757930755615, "logits_per_char": -0.7252507209777832, "num_chars": 11}, {"sum_logits": -9.322087287902832, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.903160095214844, "logits_per_token": -9.322087287902832, "logits_per_char": -1.553681214650472, "num_chars": 6}, {"sum_logits": -4.154491424560547, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.578340530395508, "logits_per_token": -2.0772457122802734, "logits_per_char": -0.2967493874686105, "num_chars": 14}, {"sum_logits": -13.388335227966309, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.056529998779297, "logits_per_token": -13.388335227966309, "logits_per_char": -2.2313892046610513, "num_chars": 6}, {"sum_logits": -7.488154411315918, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.506343841552734, "logits_per_token": -7.488154411315918, "logits_per_char": -1.4976308822631836, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 308, "native_id": "d6bb990e8c409d2b3af37a2da198e01f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.991146087646484, "incorrect_loss_raw": 14.380805253982544, "correct_loss_per_char": 0.7685496990497296, "incorrect_loss_per_char": 1.3334464837634374, "correct_loss_per_token": 4.995573043823242, "incorrect_loss_per_token": 9.946019887924194, "correct_loss_uncond": -7.9873809814453125, "incorrect_loss_uncond": -2.2857563495635986}, "model_output": [{"sum_logits": -8.892208099365234, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.567606925964355, "logits_per_token": -8.892208099365234, "logits_per_char": -1.1115260124206543, "num_chars": 8}, {"sum_logits": -9.991146087646484, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.978527069091797, "logits_per_token": -4.995573043823242, "logits_per_char": -0.7685496990497296, "num_chars": 13}, {"sum_logits": -19.041322708129883, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.034700393676758, "logits_per_token": -9.520661354064941, "logits_per_char": -1.586776892344157, "num_chars": 12}, {"sum_logits": -13.152729988098145, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.986479759216309, "logits_per_token": -13.152729988098145, "logits_per_char": -1.461414443122016, "num_chars": 9}, {"sum_logits": -16.436960220336914, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.07745933532715, "logits_per_token": -8.218480110168457, "logits_per_char": -1.1740685871669225, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 309, "native_id": "c5ad166ab5c5f5f067aa02b20f482523", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.0279011726379395, "incorrect_loss_raw": 7.885058522224426, "correct_loss_per_char": 0.7534876465797424, "incorrect_loss_per_char": 1.1216909408569335, "correct_loss_per_token": 6.0279011726379395, "incorrect_loss_per_token": 6.50628936290741, "correct_loss_uncond": -7.969919681549072, "incorrect_loss_uncond": -6.736929297447205}, "model_output": [{"sum_logits": -11.030153274536133, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.057565689086914, "logits_per_token": -5.515076637268066, "logits_per_char": -0.6127862930297852, "num_chars": 18}, {"sum_logits": -7.763443470001221, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.923977851867676, "logits_per_token": -7.763443470001221, "logits_per_char": -1.9408608675003052, "num_chars": 4}, {"sum_logits": -7.464829921722412, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.551656723022461, "logits_per_token": -7.464829921722412, "logits_per_char": -1.4929659843444825, "num_chars": 5}, {"sum_logits": -5.2818074226379395, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.954751014709473, "logits_per_token": -5.2818074226379395, "logits_per_char": -0.4401506185531616, "num_chars": 12}, {"sum_logits": -6.0279011726379395, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.997820854187012, "logits_per_token": -6.0279011726379395, "logits_per_char": -0.7534876465797424, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 310, "native_id": "ceafca2445b1b974d085a8cce38e8e44", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.008744716644287, "incorrect_loss_raw": 11.615788340568542, "correct_loss_per_char": 0.8760930895805359, "incorrect_loss_per_char": 0.8210684564378525, "correct_loss_per_token": 3.5043723583221436, "incorrect_loss_per_token": 6.597470780213674, "correct_loss_uncond": -8.245102405548096, "incorrect_loss_uncond": -7.572184920310974}, "model_output": [{"sum_logits": -7.008744716644287, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.253847122192383, "logits_per_token": -3.5043723583221436, "logits_per_char": -0.8760930895805359, "num_chars": 8}, {"sum_logits": -5.82612943649292, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.843746185302734, "logits_per_token": -2.91306471824646, "logits_per_char": -0.64734771516588, "num_chars": 9}, {"sum_logits": -15.973712921142578, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -25.448528289794922, "logits_per_token": -5.324570973714192, "logits_per_char": -0.7986856460571289, "num_chars": 20}, {"sum_logits": -11.641183853149414, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.641446113586426, "logits_per_token": -11.641183853149414, "logits_per_char": -0.9700986544291178, "num_chars": 12}, {"sum_logits": -13.022127151489258, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.818172454833984, "logits_per_token": -6.511063575744629, "logits_per_char": -0.8681418100992838, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 311, "native_id": "2ef2ae21a2d3a9ecbd5c45ff378d10e3", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.190307140350342, "incorrect_loss_raw": 14.123646259307861, "correct_loss_per_char": 0.5986153057643345, "incorrect_loss_per_char": 1.421133204972073, "correct_loss_per_token": 4.190307140350342, "incorrect_loss_per_token": 9.655750274658203, "correct_loss_uncond": -8.481656551361084, "incorrect_loss_uncond": -3.5339393615722656}, "model_output": [{"sum_logits": -23.172929763793945, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.484886169433594, "logits_per_token": -11.586464881896973, "logits_per_char": -1.931077480316162, "num_chars": 12}, {"sum_logits": -9.607292175292969, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.016968727111816, "logits_per_token": -9.607292175292969, "logits_per_char": -1.3724703107561385, "num_chars": 7}, {"sum_logits": -12.57023811340332, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.169246673583984, "logits_per_token": -6.28511905670166, "logits_per_char": -1.1427489194003018, "num_chars": 11}, {"sum_logits": -4.190307140350342, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.671963691711426, "logits_per_token": -4.190307140350342, "logits_per_char": -0.5986153057643345, "num_chars": 7}, {"sum_logits": -11.144124984741211, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.959240913391113, "logits_per_token": -11.144124984741211, "logits_per_char": -1.2382361094156902, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 312, "native_id": "793672da43fbc609e8c5760630c7e239", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.292351722717285, "incorrect_loss_raw": 12.27445363998413, "correct_loss_per_char": 0.7292351722717285, "incorrect_loss_per_char": 1.3708005672925476, "correct_loss_per_token": 7.292351722717285, "incorrect_loss_per_token": 6.137226819992065, "correct_loss_uncond": -6.511507987976074, "incorrect_loss_uncond": -3.185544490814209}, "model_output": [{"sum_logits": -14.792984962463379, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.59100914001465, "logits_per_token": -7.3964924812316895, "logits_per_char": -1.4792984962463378, "num_chars": 10}, {"sum_logits": -8.270295143127441, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.190385818481445, "logits_per_token": -4.135147571563721, "logits_per_char": -1.1814707347324915, "num_chars": 7}, {"sum_logits": -12.668590545654297, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.837525367736816, "logits_per_token": -6.334295272827148, "logits_per_char": -1.151690049604936, "num_chars": 11}, {"sum_logits": -13.365943908691406, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.22107219696045, "logits_per_token": -6.682971954345703, "logits_per_char": -1.6707429885864258, "num_chars": 8}, {"sum_logits": -7.292351722717285, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.80385971069336, "logits_per_token": -7.292351722717285, "logits_per_char": -0.7292351722717285, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 313, "native_id": "558cb0bc25387ce38d71f64ef6f1fa57", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.87071704864502, "incorrect_loss_raw": 17.839195489883423, "correct_loss_per_char": 0.8973379135131836, "incorrect_loss_per_char": 1.7706642986041063, "correct_loss_per_token": 4.93535852432251, "incorrect_loss_per_token": 7.042412757873535, "correct_loss_uncond": -10.865153312683105, "incorrect_loss_uncond": -2.2122037410736084}, "model_output": [{"sum_logits": -16.306568145751953, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -16.857107162475586, "logits_per_token": -8.153284072875977, "logits_per_char": -1.8118409050835504, "num_chars": 9}, {"sum_logits": -9.87071704864502, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -20.735870361328125, "logits_per_token": -4.93535852432251, "logits_per_char": -0.8973379135131836, "num_chars": 11}, {"sum_logits": -27.840288162231445, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -23.035747528076172, "logits_per_token": -9.280096054077148, "logits_per_char": -3.0933653513590493, "num_chars": 9}, {"sum_logits": -11.474767684936523, "num_tokens": 4, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -19.955930709838867, "logits_per_token": -2.868691921234131, "logits_per_char": -0.6039351413124486, "num_chars": 19}, {"sum_logits": -15.73515796661377, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -20.3568115234375, "logits_per_token": -7.867578983306885, "logits_per_char": -1.5735157966613769, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 314, "native_id": "2c9f4a98ce774cd734b6e384d95051a7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.1869988441467285, "incorrect_loss_raw": 9.820461750030518, "correct_loss_per_char": 0.39899991108820987, "incorrect_loss_per_char": 0.9773844495996251, "correct_loss_per_token": 5.1869988441467285, "incorrect_loss_per_token": 6.2040323416392, "correct_loss_uncond": -9.609232425689697, "incorrect_loss_uncond": -6.630231857299805}, "model_output": [{"sum_logits": -8.655877113342285, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.848115921020508, "logits_per_token": -8.655877113342285, "logits_per_char": -1.2365538733346122, "num_chars": 7}, {"sum_logits": -5.1869988441467285, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.796231269836426, "logits_per_token": -5.1869988441467285, "logits_per_char": -0.39899991108820987, "num_chars": 13}, {"sum_logits": -11.409252166748047, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -21.223140716552734, "logits_per_token": -3.803084055582682, "logits_per_char": -0.8149465833391462, "num_chars": 14}, {"sum_logits": -13.719099044799805, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.553735733032227, "logits_per_token": -6.859549522399902, "logits_per_char": -1.2471908222545276, "num_chars": 11}, {"sum_logits": -5.497618675231934, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.17778205871582, "logits_per_token": -5.497618675231934, "logits_per_char": -0.6108465194702148, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 315, "native_id": "33c84708785f88c19737ef5b0e31a64b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.788102149963379, "incorrect_loss_raw": 10.951920866966248, "correct_loss_per_char": 0.7529309346125677, "incorrect_loss_per_char": 1.3379520638521774, "correct_loss_per_token": 4.8940510749816895, "incorrect_loss_per_token": 8.88331401348114, "correct_loss_uncond": -9.651209831237793, "incorrect_loss_uncond": -4.4393357038497925}, "model_output": [{"sum_logits": -10.372274398803711, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.63223648071289, "logits_per_token": -10.372274398803711, "logits_per_char": -1.4817534855433874, "num_chars": 7}, {"sum_logits": -16.54885482788086, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -20.726016998291016, "logits_per_token": -8.27442741394043, "logits_per_char": -1.2729888329139123, "num_chars": 13}, {"sum_logits": -7.757443904876709, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.163476943969727, "logits_per_token": -7.757443904876709, "logits_per_char": -1.2929073174794514, "num_chars": 6}, {"sum_logits": -9.788102149963379, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -19.439311981201172, "logits_per_token": -4.8940510749816895, "logits_per_char": -0.7529309346125677, "num_chars": 13}, {"sum_logits": -9.129110336303711, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.043295860290527, "logits_per_token": -9.129110336303711, "logits_per_char": -1.3041586194719588, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 316, "native_id": "d867f76d000bdb59b9b4cb982bd7f0a0", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.240470886230469, "incorrect_loss_raw": 13.161996960639954, "correct_loss_per_char": 1.4050588607788086, "incorrect_loss_per_char": 1.1642581521296034, "correct_loss_per_token": 5.620235443115234, "incorrect_loss_per_token": 5.23186441262563, "correct_loss_uncond": -4.805559158325195, "incorrect_loss_uncond": -3.463418126106262}, "model_output": [{"sum_logits": -22.964061737060547, "num_tokens": 3, "num_tokens_all": 172, "is_greedy": false, "sum_logits_uncond": -19.653310775756836, "logits_per_token": -7.654687245686849, "logits_per_char": -1.3508271610035616, "num_chars": 17}, {"sum_logits": -14.80781364440918, "num_tokens": 3, "num_tokens_all": 172, "is_greedy": false, "sum_logits_uncond": -17.67752456665039, "logits_per_token": -4.935937881469727, "logits_per_char": -1.480781364440918, "num_chars": 10}, {"sum_logits": -9.808919906616211, "num_tokens": 3, "num_tokens_all": 172, "is_greedy": false, "sum_logits_uncond": -16.337684631347656, "logits_per_token": -3.2696399688720703, "logits_per_char": -0.9808919906616211, "num_chars": 10}, {"sum_logits": -11.240470886230469, "num_tokens": 2, "num_tokens_all": 171, "is_greedy": false, "sum_logits_uncond": -16.046030044555664, "logits_per_token": -5.620235443115234, "logits_per_char": -1.4050588607788086, "num_chars": 8}, {"sum_logits": -5.067192554473877, "num_tokens": 1, "num_tokens_all": 170, "is_greedy": false, "sum_logits_uncond": -12.83314037322998, "logits_per_token": -5.067192554473877, "logits_per_char": -0.8445320924123129, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 317, "native_id": "8c607d2e2e897d74048fcc794137b683", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.347284317016602, "incorrect_loss_raw": 11.85320782661438, "correct_loss_per_char": 0.5962345940726144, "incorrect_loss_per_char": 1.2357874197837633, "correct_loss_per_token": 2.7824281056722007, "incorrect_loss_per_token": 7.165056387583415, "correct_loss_uncond": -11.04682731628418, "incorrect_loss_uncond": -4.627113103866577}, "model_output": [{"sum_logits": -18.730363845825195, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.455995559692383, "logits_per_token": -9.365181922912598, "logits_per_char": -1.4407972189096303, "num_chars": 13}, {"sum_logits": -8.359847068786621, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -8.359847068786621, "logits_per_char": -1.6719694137573242, "num_chars": 5}, {"sum_logits": -14.081135749816895, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.074176788330078, "logits_per_token": -4.693711916605632, "logits_per_char": -0.9387423833211263, "num_chars": 15}, {"sum_logits": -8.347284317016602, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.39411163330078, "logits_per_token": -2.7824281056722007, "logits_per_char": -0.5962345940726144, "num_chars": 14}, {"sum_logits": -6.241484642028809, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -6.241484642028809, "logits_per_char": -0.8916406631469727, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 318, "native_id": "5215e26c99b2a9b376fb1c70096a388a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.880170822143555, "incorrect_loss_raw": 9.731480717658997, "correct_loss_per_char": 1.2350213527679443, "incorrect_loss_per_char": 1.328160208644289, "correct_loss_per_token": 4.940085411071777, "incorrect_loss_per_token": 5.322697997093201, "correct_loss_uncond": -5.495234489440918, "incorrect_loss_uncond": -7.394766926765442}, "model_output": [{"sum_logits": -12.046142578125, "num_tokens": 3, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -17.014820098876953, "logits_per_token": -4.015380859375, "logits_per_char": -1.3384602864583333, "num_chars": 9}, {"sum_logits": -9.880170822143555, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -15.375405311584473, "logits_per_token": -4.940085411071777, "logits_per_char": -1.2350213527679443, "num_chars": 8}, {"sum_logits": -9.157146453857422, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -18.93746566772461, "logits_per_token": -4.578573226928711, "logits_per_char": -1.5261910756429036, "num_chars": 6}, {"sum_logits": -7.671041965484619, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -7.671041965484619, "logits_per_char": -1.534208393096924, "num_chars": 5}, {"sum_logits": -10.051591873168945, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -18.72458267211914, "logits_per_token": -5.025795936584473, "logits_per_char": -0.913781079378995, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 319, "native_id": "668dc6bce771b10cbf6336f3ec76520a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.539398670196533, "incorrect_loss_raw": 12.160857915878296, "correct_loss_per_char": 0.7265998522440592, "incorrect_loss_per_char": 1.6448311830178284, "correct_loss_per_token": 3.2696993350982666, "incorrect_loss_per_token": 12.160857915878296, "correct_loss_uncond": -9.132033824920654, "incorrect_loss_uncond": -1.8230931758880615}, "model_output": [{"sum_logits": -11.696611404418945, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.35517406463623, "logits_per_token": -11.696611404418945, "logits_per_char": -0.8997393388014573, "num_chars": 13}, {"sum_logits": -6.539398670196533, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.671432495117188, "logits_per_token": -3.2696993350982666, "logits_per_char": -0.7265998522440592, "num_chars": 9}, {"sum_logits": -12.87863540649414, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.219818115234375, "logits_per_token": -12.87863540649414, "logits_per_char": -1.287863540649414, "num_chars": 10}, {"sum_logits": -11.410731315612793, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.536421775817871, "logits_per_token": -11.410731315612793, "logits_per_char": -2.2821462631225584, "num_chars": 5}, {"sum_logits": -12.657453536987305, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.824390411376953, "logits_per_token": -12.657453536987305, "logits_per_char": -2.1095755894978843, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 320, "native_id": "a339fe08f1f50463ee180b797e99ebcc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.232436180114746, "incorrect_loss_raw": 11.250533699989319, "correct_loss_per_char": 0.5193696816762289, "incorrect_loss_per_char": 1.8920433183511098, "correct_loss_per_token": 3.116218090057373, "incorrect_loss_per_token": 7.5132904052734375, "correct_loss_uncond": -14.487366676330566, "incorrect_loss_uncond": -2.7228070497512817}, "model_output": [{"sum_logits": -2.603076457977295, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.260344505310059, "logits_per_token": -2.603076457977295, "logits_per_char": -0.5206152915954589, "num_chars": 5}, {"sum_logits": -19.442214965820312, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.606252670288086, "logits_per_token": -9.721107482910156, "logits_per_char": -3.2403691609700522, "num_chars": 6}, {"sum_logits": -12.50111198425293, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.611124038696289, "logits_per_token": -12.50111198425293, "logits_per_char": -2.500222396850586, "num_chars": 5}, {"sum_logits": -6.232436180114746, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.719802856445312, "logits_per_token": -3.116218090057373, "logits_per_char": -0.5193696816762289, "num_chars": 12}, {"sum_logits": -10.455731391906738, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.415641784667969, "logits_per_token": -5.227865695953369, "logits_per_char": -1.3069664239883423, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 321, "native_id": "526cd34f5b2afefbbb7830434785f298", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.276708602905273, "incorrect_loss_raw": 10.325214862823486, "correct_loss_per_char": 1.8553417205810547, "incorrect_loss_per_char": 2.0170490900675455, "correct_loss_per_token": 9.276708602905273, "incorrect_loss_per_token": 10.325214862823486, "correct_loss_uncond": -3.061004638671875, "incorrect_loss_uncond": -3.235860824584961}, "model_output": [{"sum_logits": -9.276708602905273, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.337713241577148, "logits_per_token": -9.276708602905273, "logits_per_char": -1.8553417205810547, "num_chars": 5}, {"sum_logits": -13.739364624023438, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.12802791595459, "logits_per_token": -13.739364624023438, "logits_per_char": -2.2898941040039062, "num_chars": 6}, {"sum_logits": -9.892980575561523, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.10049819946289, "logits_per_token": -9.892980575561523, "logits_per_char": -1.9785961151123046, "num_chars": 5}, {"sum_logits": -7.409069061279297, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.691120147705078, "logits_per_token": -7.409069061279297, "logits_per_char": -1.2348448435465496, "num_chars": 6}, {"sum_logits": -10.259445190429688, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.32465648651123, "logits_per_token": -10.259445190429688, "logits_per_char": -2.564861297607422, "num_chars": 4}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 322, "native_id": "6c1c1c282cebe8917f607f0dbc1c102e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 3.7818386554718018, "incorrect_loss_raw": 11.24650263786316, "correct_loss_per_char": 0.9454596638679504, "incorrect_loss_per_char": 1.373278723043554, "correct_loss_per_token": 3.7818386554718018, "incorrect_loss_per_token": 8.198266506195068, "correct_loss_uncond": -8.410448789596558, "incorrect_loss_uncond": -3.5906245708465576}, "model_output": [{"sum_logits": -10.403609275817871, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -10.74312973022461, "logits_per_token": -10.403609275817871, "logits_per_char": -1.7339348793029785, "num_chars": 6}, {"sum_logits": -11.958809852600098, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.789621353149414, "logits_per_token": -5.979404926300049, "logits_per_char": -1.3287566502888997, "num_chars": 9}, {"sum_logits": -10.196512222290039, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.941009521484375, "logits_per_token": -10.196512222290039, "logits_per_char": -1.6994187037150066, "num_chars": 6}, {"sum_logits": -12.427079200744629, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.87474822998047, "logits_per_token": -6.2135396003723145, "logits_per_char": -0.7310046588673311, "num_chars": 17}, {"sum_logits": -3.7818386554718018, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.19228744506836, "logits_per_token": -3.7818386554718018, "logits_per_char": -0.9454596638679504, "num_chars": 4}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 323, "native_id": "b5baf77d3855935c87f01f5fb2216667", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.558345317840576, "incorrect_loss_raw": 8.478050410747528, "correct_loss_per_char": 0.3038896878560384, "incorrect_loss_per_char": 1.1947507500648498, "correct_loss_per_token": 2.279172658920288, "incorrect_loss_per_token": 7.100470006465912, "correct_loss_uncond": -11.259567737579346, "incorrect_loss_uncond": -5.545768082141876}, "model_output": [{"sum_logits": -10.82093334197998, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.16828441619873, "logits_per_token": -10.82093334197998, "logits_per_char": -2.1641866683959963, "num_chars": 5}, {"sum_logits": -8.074075698852539, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.484567642211914, "logits_per_token": -8.074075698852539, "logits_per_char": -0.8971195220947266, "num_chars": 9}, {"sum_logits": -3.996549367904663, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.027201652526855, "logits_per_token": -3.996549367904663, "logits_per_char": -0.7993098735809326, "num_chars": 5}, {"sum_logits": -4.558345317840576, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.817913055419922, "logits_per_token": -2.279172658920288, "logits_per_char": -0.3038896878560384, "num_chars": 15}, {"sum_logits": -11.02064323425293, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.415220260620117, "logits_per_token": -5.510321617126465, "logits_per_char": -0.9183869361877441, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 324, "native_id": "83808e92381b2e5f4cdf55d1391645ae", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.3765034675598145, "incorrect_loss_raw": 11.169371843338013, "correct_loss_per_char": 1.2753006935119628, "incorrect_loss_per_char": 1.7942606891904558, "correct_loss_per_token": 6.3765034675598145, "incorrect_loss_per_token": 11.169371843338013, "correct_loss_uncond": -6.464599132537842, "incorrect_loss_uncond": -2.4607036113739014}, "model_output": [{"sum_logits": -11.980070114135742, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.672751426696777, "logits_per_token": -11.980070114135742, "logits_per_char": -1.996678352355957, "num_chars": 6}, {"sum_logits": -11.40069580078125, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.719640731811523, "logits_per_token": -11.40069580078125, "logits_per_char": -1.900115966796875, "num_chars": 6}, {"sum_logits": -11.306615829467773, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.238256454467773, "logits_per_token": -11.306615829467773, "logits_per_char": -1.6152308327811105, "num_chars": 7}, {"sum_logits": -9.990105628967285, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -9.990105628967285, "logits_per_char": -1.6650176048278809, "num_chars": 6}, {"sum_logits": -6.3765034675598145, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.841102600097656, "logits_per_token": -6.3765034675598145, "logits_per_char": -1.2753006935119628, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 325, "native_id": "1a86310d7279097205a3403752c3b914", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.705867767333984, "incorrect_loss_raw": 14.030067682266235, "correct_loss_per_char": 0.856207529703776, "incorrect_loss_per_char": 1.4386967356715883, "correct_loss_per_token": 7.705867767333984, "incorrect_loss_per_token": 6.414831638336182, "correct_loss_uncond": -6.232115745544434, "incorrect_loss_uncond": -1.7554337978363037}, "model_output": [{"sum_logits": -14.404852867126465, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.5665283203125, "logits_per_token": -4.801617622375488, "logits_per_char": -1.0289180619376046, "num_chars": 14}, {"sum_logits": -7.705867767333984, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.937983512878418, "logits_per_token": -7.705867767333984, "logits_per_char": -0.856207529703776, "num_chars": 9}, {"sum_logits": -18.012588500976562, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.700084686279297, "logits_per_token": -9.006294250488281, "logits_per_char": -2.5732269287109375, "num_chars": 7}, {"sum_logits": -12.963387489318848, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.91880989074707, "logits_per_token": -6.481693744659424, "logits_per_char": -0.810211718082428, "num_chars": 16}, {"sum_logits": -10.739441871643066, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.956583023071289, "logits_per_token": -5.369720935821533, "logits_per_char": -1.3424302339553833, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 326, "native_id": "b4130d1790948134f3aeab9d3d79c181", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.7452679872512817, "incorrect_loss_raw": 8.873237252235413, "correct_loss_per_char": 0.2908779978752136, "incorrect_loss_per_char": 1.637116335829099, "correct_loss_per_token": 1.7452679872512817, "incorrect_loss_per_token": 6.860174298286438, "correct_loss_uncond": -10.537228465080261, "incorrect_loss_uncond": -6.624329686164856}, "model_output": [{"sum_logits": -1.7452679872512817, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.282496452331543, "logits_per_token": -1.7452679872512817, "logits_per_char": -0.2908779978752136, "num_chars": 6}, {"sum_logits": -8.442334175109863, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.77853012084961, "logits_per_token": -8.442334175109863, "logits_per_char": -1.4070556958516438, "num_chars": 6}, {"sum_logits": -16.104503631591797, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.316680908203125, "logits_per_token": -8.052251815795898, "logits_per_char": -3.2209007263183596, "num_chars": 5}, {"sum_logits": -6.528151035308838, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -6.528151035308838, "logits_per_char": -0.8160188794136047, "num_chars": 8}, {"sum_logits": -4.417960166931152, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.31821060180664, "logits_per_token": -4.417960166931152, "logits_per_char": -1.104490041732788, "num_chars": 4}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 327, "native_id": "a5097b7f56d20217679f28201801476f", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.807351589202881, "incorrect_loss_raw": 10.168678760528564, "correct_loss_per_char": 0.4006126324335734, "incorrect_loss_per_char": 1.1161285139265513, "correct_loss_per_token": 2.4036757946014404, "incorrect_loss_per_token": 5.159895896911621, "correct_loss_uncond": -10.858167171478271, "incorrect_loss_uncond": -6.221729040145874}, "model_output": [{"sum_logits": -6.148782730102539, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.96527862548828, "logits_per_token": -3.0743913650512695, "logits_per_char": -0.6148782730102539, "num_chars": 10}, {"sum_logits": -4.238213539123535, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.41093635559082, "logits_per_token": -4.238213539123535, "logits_per_char": -0.6054590770176479, "num_chars": 7}, {"sum_logits": -4.807351589202881, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.665518760681152, "logits_per_token": -2.4036757946014404, "logits_per_char": -0.4006126324335734, "num_chars": 12}, {"sum_logits": -10.901284217834473, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.395794868469238, "logits_per_token": -3.633761405944824, "logits_per_char": -1.0901284217834473, "num_chars": 10}, {"sum_logits": -19.38643455505371, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.789621353149414, "logits_per_token": -9.693217277526855, "logits_per_char": -2.154048283894857, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 328, "native_id": "bcc5dd6292a64d8fa17cd07c360b335d", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.5182671546936035, "incorrect_loss_raw": 6.117061048746109, "correct_loss_per_char": 0.1798762253352574, "incorrect_loss_per_char": 0.6959909724588155, "correct_loss_per_token": 1.2591335773468018, "incorrect_loss_per_token": 4.667399475971857, "correct_loss_uncond": -15.255918025970459, "incorrect_loss_uncond": -10.191887706518173}, "model_output": [{"sum_logits": -11.840133666992188, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.998748779296875, "logits_per_token": -11.840133666992188, "logits_per_char": -1.6914476667131697, "num_chars": 7}, {"sum_logits": -8.697969436645508, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -21.003753662109375, "logits_per_token": -2.8993231455485025, "logits_per_char": -0.5116452609791475, "num_chars": 17}, {"sum_logits": -2.5182671546936035, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.774185180664062, "logits_per_token": -1.2591335773468018, "logits_per_char": -0.1798762253352574, "num_chars": 14}, {"sum_logits": -2.051572561264038, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.214923858642578, "logits_per_token": -2.051572561264038, "logits_per_char": -0.20515725612640381, "num_chars": 10}, {"sum_logits": -1.8785685300827026, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": true, "sum_logits_uncond": -13.0183687210083, "logits_per_token": -1.8785685300827026, "logits_per_char": -0.3757137060165405, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 329, "native_id": "cfc7fccb8449a2a950c9d2a50991420e", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.446557998657227, "incorrect_loss_raw": 12.558307409286499, "correct_loss_per_char": 0.9604684284755162, "incorrect_loss_per_char": 1.034881649758284, "correct_loss_per_token": 6.723278999328613, "incorrect_loss_per_token": 8.444701313972473, "correct_loss_uncond": -3.6094188690185547, "incorrect_loss_uncond": -4.472449541091919}, "model_output": [{"sum_logits": -11.096847534179688, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.975407600402832, "logits_per_token": -11.096847534179688, "logits_per_char": -1.0088043212890625, "num_chars": 11}, {"sum_logits": -13.446557998657227, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.05597686767578, "logits_per_token": -6.723278999328613, "logits_per_char": -0.9604684284755162, "num_chars": 14}, {"sum_logits": -19.19218635559082, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.78225326538086, "logits_per_token": -9.59609317779541, "logits_per_char": -1.3708704539707728, "num_chars": 14}, {"sum_logits": -6.227533340454102, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.685454368591309, "logits_per_token": -6.227533340454102, "logits_per_char": -1.0379222234090169, "num_chars": 6}, {"sum_logits": -13.716662406921387, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.679912567138672, "logits_per_token": -6.858331203460693, "logits_per_char": -0.7219296003642836, "num_chars": 19}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 330, "native_id": "2e83c5989a018bec6d5f5ac7d3b72f49", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.8835883140563965, "incorrect_loss_raw": 13.706376075744629, "correct_loss_per_char": 0.45258371646587664, "incorrect_loss_per_char": 1.3470670929976873, "correct_loss_per_token": 2.9417941570281982, "incorrect_loss_per_token": 6.777714808781941, "correct_loss_uncond": -10.907440662384033, "incorrect_loss_uncond": -4.92714786529541}, "model_output": [{"sum_logits": -11.29198932647705, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.931059837341309, "logits_per_token": -11.29198932647705, "logits_per_char": -1.4114986658096313, "num_chars": 8}, {"sum_logits": -5.8835883140563965, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.79102897644043, "logits_per_token": -2.9417941570281982, "logits_per_char": -0.45258371646587664, "num_chars": 13}, {"sum_logits": -7.866848945617676, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -11.102972984313965, "logits_per_token": -7.866848945617676, "logits_per_char": -1.3111414909362793, "num_chars": 6}, {"sum_logits": -23.621206283569336, "num_tokens": 6, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -34.985050201416016, "logits_per_token": -3.9368677139282227, "logits_per_char": -0.9448482513427734, "num_chars": 25}, {"sum_logits": -12.045459747314453, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.515012741088867, "logits_per_token": -4.015153249104817, "logits_per_char": -1.7207799639020647, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 331, "native_id": "34b2d6aecdb5af8efacf0b0aa7e3989f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.6377429962158203, "incorrect_loss_raw": 8.499409914016724, "correct_loss_per_char": 0.21981191635131836, "incorrect_loss_per_char": 0.6497002647036598, "correct_loss_per_token": 1.3188714981079102, "incorrect_loss_per_token": 4.249704957008362, "correct_loss_uncond": -18.626779556274414, "incorrect_loss_uncond": -12.25458312034607}, "model_output": [{"sum_logits": -2.6377429962158203, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.264522552490234, "logits_per_token": -1.3188714981079102, "logits_per_char": -0.21981191635131836, "num_chars": 12}, {"sum_logits": -6.944353103637695, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.341278076171875, "logits_per_token": -3.4721765518188477, "logits_per_char": -0.46295687357584636, "num_chars": 15}, {"sum_logits": -8.31033706665039, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -23.989656448364258, "logits_per_token": -4.155168533325195, "logits_per_char": -0.6925280888875326, "num_chars": 12}, {"sum_logits": -9.9620943069458, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.50229835510254, "logits_per_token": -4.9810471534729, "logits_per_char": -0.7115781647818429, "num_chars": 14}, {"sum_logits": -8.780855178833008, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.1827392578125, "logits_per_token": -4.390427589416504, "logits_per_char": -0.7317379315694174, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 332, "native_id": "2ec7f8fe7948f9997e73f9bff7ba6e05", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.497747421264648, "incorrect_loss_raw": 11.246920347213745, "correct_loss_per_char": 0.590704311024059, "incorrect_loss_per_char": 1.191097558628429, "correct_loss_per_token": 2.1659158070882163, "incorrect_loss_per_token": 5.229245762030284, "correct_loss_uncond": -11.74876594543457, "incorrect_loss_uncond": -6.824604511260986}, "model_output": [{"sum_logits": -6.497747421264648, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.24651336669922, "logits_per_token": -2.1659158070882163, "logits_per_char": -0.590704311024059, "num_chars": 11}, {"sum_logits": -14.570722579956055, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.23171043395996, "logits_per_token": -4.8569075266520185, "logits_per_char": -1.3246111436323686, "num_chars": 11}, {"sum_logits": -8.215007781982422, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.794994354248047, "logits_per_token": -4.107503890991211, "logits_per_char": -0.8215007781982422, "num_chars": 10}, {"sum_logits": -13.665839195251465, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.056001663208008, "logits_per_token": -3.416459798812866, "logits_per_char": -0.9110559463500977, "num_chars": 15}, {"sum_logits": -8.536111831665039, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.20339298248291, "logits_per_token": -8.536111831665039, "logits_per_char": -1.7072223663330077, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 333, "native_id": "651785ed4f7b0bd2e7ca9f70a42acea5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.471330165863037, "incorrect_loss_raw": 8.823211669921875, "correct_loss_per_char": 0.6079255739847819, "incorrect_loss_per_char": 1.1206202178290396, "correct_loss_per_token": 5.471330165863037, "incorrect_loss_per_token": 6.744278907775879, "correct_loss_uncond": -9.845984935760498, "incorrect_loss_uncond": -6.428137302398682}, "model_output": [{"sum_logits": -5.471330165863037, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.317315101623535, "logits_per_token": -5.471330165863037, "logits_per_char": -0.6079255739847819, "num_chars": 9}, {"sum_logits": -6.5354461669921875, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.132861137390137, "logits_per_token": -6.5354461669921875, "logits_per_char": -1.3070892333984374, "num_chars": 5}, {"sum_logits": -7.545060157775879, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.462339401245117, "logits_per_token": -3.7725300788879395, "logits_per_char": -0.6859145597978071, "num_chars": 11}, {"sum_logits": -12.125938415527344, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.554898262023926, "logits_per_token": -12.125938415527344, "logits_per_char": -1.7322769165039062, "num_chars": 7}, {"sum_logits": -9.08640193939209, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.855297088623047, "logits_per_token": -4.543200969696045, "logits_per_char": -0.7572001616160074, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 334, "native_id": "ee46995407eb6357bb5410d49d378629", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.5622053146362305, "incorrect_loss_raw": 7.83202064037323, "correct_loss_per_char": 0.6180228127373589, "incorrect_loss_per_char": 1.1103944593951816, "correct_loss_per_token": 2.7811026573181152, "incorrect_loss_per_token": 7.83202064037323, "correct_loss_uncond": -11.003388404846191, "incorrect_loss_uncond": -6.346428036689758}, "model_output": [{"sum_logits": -5.5622053146362305, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -16.565593719482422, "logits_per_token": -2.7811026573181152, "logits_per_char": -0.6180228127373589, "num_chars": 9}, {"sum_logits": -9.750492095947266, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -13.023554801940918, "logits_per_token": -9.750492095947266, "logits_per_char": -1.2188115119934082, "num_chars": 8}, {"sum_logits": -10.739978790283203, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -15.932482719421387, "logits_per_token": -10.739978790283203, "logits_per_char": -1.7899964650472004, "num_chars": 6}, {"sum_logits": -6.465781211853027, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -13.709348678588867, "logits_per_token": -6.465781211853027, "logits_per_char": -0.8082226514816284, "num_chars": 8}, {"sum_logits": -4.371830463409424, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.048408508300781, "logits_per_token": -4.371830463409424, "logits_per_char": -0.6245472090584892, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 335, "native_id": "303aedda3a5ab8d853cbe4edc4b914c6", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.429535150527954, "incorrect_loss_raw": 9.40666651725769, "correct_loss_per_char": 0.15883723894755045, "incorrect_loss_per_char": 1.01400699350569, "correct_loss_per_token": 1.429535150527954, "incorrect_loss_per_token": 5.862487077713013, "correct_loss_uncond": -12.592967748641968, "incorrect_loss_uncond": -6.036177635192871}, "model_output": [{"sum_logits": -11.224143028259277, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.613142013549805, "logits_per_token": -5.612071514129639, "logits_per_char": -1.1224143028259277, "num_chars": 10}, {"sum_logits": -5.791665077209473, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.699617385864258, "logits_per_token": -2.8958325386047363, "logits_per_char": -0.6435183419121636, "num_chars": 9}, {"sum_logits": -1.429535150527954, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -14.022502899169922, "logits_per_token": -1.429535150527954, "logits_per_char": -0.15883723894755045, "num_chars": 9}, {"sum_logits": -11.337627410888672, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.565593719482422, "logits_per_token": -5.668813705444336, "logits_per_char": -1.2597363789876301, "num_chars": 9}, {"sum_logits": -9.27323055267334, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.893023490905762, "logits_per_token": -9.27323055267334, "logits_per_char": -1.0303589502970378, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 336, "native_id": "720b98fbc365736597147c984f6bd301", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 12.574423789978027, "incorrect_loss_raw": 16.21403980255127, "correct_loss_per_char": 1.1431294354525479, "incorrect_loss_per_char": 1.490414031346639, "correct_loss_per_token": 6.287211894989014, "incorrect_loss_per_token": 6.82797090212504, "correct_loss_uncond": -7.239314079284668, "incorrect_loss_uncond": -1.2408626079559326}, "model_output": [{"sum_logits": -12.183521270751953, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.993633270263672, "logits_per_token": -4.061173756917317, "logits_per_char": -1.0152934392293294, "num_chars": 12}, {"sum_logits": -12.574423789978027, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.813737869262695, "logits_per_token": -6.287211894989014, "logits_per_char": -1.1431294354525479, "num_chars": 11}, {"sum_logits": -16.78546714782715, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.357776641845703, "logits_per_token": -8.392733573913574, "logits_per_char": -1.5259515588933772, "num_chars": 11}, {"sum_logits": -17.373516082763672, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.032876014709473, "logits_per_token": -8.686758041381836, "logits_per_char": -1.7373516082763671, "num_chars": 10}, {"sum_logits": -18.513654708862305, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.43532371520996, "logits_per_token": -6.171218236287435, "logits_per_char": -1.6830595189874822, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 337, "native_id": "c611875b43b67b91030b889b267bbcb3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.404539108276367, "incorrect_loss_raw": 12.372339487075806, "correct_loss_per_char": 0.7837115923563639, "incorrect_loss_per_char": 1.1274405962116962, "correct_loss_per_token": 3.1348463694254556, "incorrect_loss_per_token": 5.692141016324361, "correct_loss_uncond": -8.945032119750977, "incorrect_loss_uncond": -5.5772705078125}, "model_output": [{"sum_logits": -10.810402870178223, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.707603454589844, "logits_per_token": -5.405201435089111, "logits_per_char": -0.7721716335841587, "num_chars": 14}, {"sum_logits": -9.404539108276367, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.349571228027344, "logits_per_token": -3.1348463694254556, "logits_per_char": -0.7837115923563639, "num_chars": 12}, {"sum_logits": -15.401702880859375, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.340926170349121, "logits_per_token": -7.7008514404296875, "logits_per_char": -1.7113003200954862, "num_chars": 9}, {"sum_logits": -11.420562744140625, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.72458267211914, "logits_per_token": -5.7102813720703125, "logits_per_char": -1.0382329767400569, "num_chars": 11}, {"sum_logits": -11.856689453125, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -21.025327682495117, "logits_per_token": -3.9522298177083335, "logits_per_char": -0.9880574544270834, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 338, "native_id": "0547da29ffab9b441bae8870cd0f9dab", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.775016784667969, "incorrect_loss_raw": 11.336588859558105, "correct_loss_per_char": 0.6982154846191406, "incorrect_loss_per_char": 1.0381972365654433, "correct_loss_per_token": 4.887508392333984, "incorrect_loss_per_token": 7.038704693317413, "correct_loss_uncond": -7.890247344970703, "incorrect_loss_uncond": -2.952104091644287}, "model_output": [{"sum_logits": -9.775016784667969, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.665264129638672, "logits_per_token": -4.887508392333984, "logits_per_char": -0.6982154846191406, "num_chars": 14}, {"sum_logits": -6.667534828186035, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.327664375305176, "logits_per_token": -6.667534828186035, "logits_per_char": -0.8334418535232544, "num_chars": 8}, {"sum_logits": -14.302796363830566, "num_tokens": 4, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.720380783081055, "logits_per_token": -3.5756990909576416, "logits_per_char": -0.8939247727394104, "num_chars": 16}, {"sum_logits": -11.447145462036133, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.793139457702637, "logits_per_token": -11.447145462036133, "logits_per_char": -1.4308931827545166, "num_chars": 8}, {"sum_logits": -12.928878784179688, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.313587188720703, "logits_per_token": -6.464439392089844, "logits_per_char": -0.9945291372445914, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 339, "native_id": "21e312c7fd1a52341ce35b66457eab36", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.182832717895508, "incorrect_loss_raw": 13.707746863365173, "correct_loss_per_char": 1.2728540897369385, "incorrect_loss_per_char": 1.0966991789065876, "correct_loss_per_token": 5.091416358947754, "incorrect_loss_per_token": 7.55532705783844, "correct_loss_uncond": -7.684436798095703, "incorrect_loss_uncond": -3.97610080242157}, "model_output": [{"sum_logits": -10.182832717895508, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.86726951599121, "logits_per_token": -5.091416358947754, "logits_per_char": -1.2728540897369385, "num_chars": 8}, {"sum_logits": -17.63998794555664, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.45016860961914, "logits_per_token": -8.81999397277832, "logits_per_char": -1.1759991963704428, "num_chars": 15}, {"sum_logits": -5.611629009246826, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.3323392868042, "logits_per_token": -5.611629009246826, "logits_per_char": -0.7014536261558533, "num_chars": 8}, {"sum_logits": -19.074169158935547, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.17993927001953, "logits_per_token": -9.537084579467773, "logits_per_char": -1.4672437814565806, "num_chars": 13}, {"sum_logits": -12.50520133972168, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.7729434967041, "logits_per_token": -6.25260066986084, "logits_per_char": -1.0421001116434734, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 340, "native_id": "82e26bc22af89c38d54aa2d00dcb8a2b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.5551844835281372, "incorrect_loss_raw": 15.02855134010315, "correct_loss_per_char": 0.15551844835281373, "incorrect_loss_per_char": 1.1826905766649851, "correct_loss_per_token": 1.5551844835281372, "incorrect_loss_per_token": 8.220550537109375, "correct_loss_uncond": -11.73849093914032, "incorrect_loss_uncond": -5.061674118041992}, "model_output": [{"sum_logits": -1.5551844835281372, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": true, "sum_logits_uncond": -13.293675422668457, "logits_per_token": -1.5551844835281372, "logits_per_char": -0.15551844835281373, "num_chars": 10}, {"sum_logits": -12.420914649963379, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.28540325164795, "logits_per_token": -12.420914649963379, "logits_per_char": -1.3801016277737088, "num_chars": 9}, {"sum_logits": -12.796677589416504, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -23.18533706665039, "logits_per_token": -4.265559196472168, "logits_per_char": -0.7997923493385315, "num_chars": 16}, {"sum_logits": -24.934513092041016, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -30.762733459472656, "logits_per_token": -6.233628273010254, "logits_per_char": -0.8905183247157505, "num_chars": 28}, {"sum_logits": -9.9621000289917, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.12742805480957, "logits_per_token": -9.9621000289917, "logits_per_char": -1.66035000483195, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 341, "native_id": "f75357e48c3026cfa4da3dba9f91bb21", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.328794479370117, "incorrect_loss_raw": 10.43078088760376, "correct_loss_per_char": 0.7571631344881925, "incorrect_loss_per_char": 0.7577461965262572, "correct_loss_per_token": 4.164397239685059, "incorrect_loss_per_token": 4.837500413258871, "correct_loss_uncond": -12.053461074829102, "incorrect_loss_uncond": -10.268460273742676}, "model_output": [{"sum_logits": -13.730770111083984, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.237686157226562, "logits_per_token": -6.865385055541992, "logits_per_char": -0.8076923594755285, "num_chars": 17}, {"sum_logits": -8.423538208007812, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.08359146118164, "logits_per_token": -4.211769104003906, "logits_per_char": -0.7019615173339844, "num_chars": 12}, {"sum_logits": -9.069360733032227, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -23.93267822265625, "logits_per_token": -3.0231202443440757, "logits_per_char": -0.5668350458145142, "num_chars": 16}, {"sum_logits": -10.499454498291016, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.54300880432129, "logits_per_token": -5.249727249145508, "logits_per_char": -0.9544958634810015, "num_chars": 11}, {"sum_logits": -8.328794479370117, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.38225555419922, "logits_per_token": -4.164397239685059, "logits_per_char": -0.7571631344881925, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 342, "native_id": "64931f9097155672bfe3e16f03b2c195", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.609138488769531, "incorrect_loss_raw": 8.201722860336304, "correct_loss_per_char": 0.600830771706321, "incorrect_loss_per_char": 1.2876669245106833, "correct_loss_per_token": 6.609138488769531, "incorrect_loss_per_token": 8.201722860336304, "correct_loss_uncond": -7.953001022338867, "incorrect_loss_uncond": -5.275135278701782}, "model_output": [{"sum_logits": -8.417769432067871, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.75567626953125, "logits_per_token": -8.417769432067871, "logits_per_char": -1.0522211790084839, "num_chars": 8}, {"sum_logits": -6.609138488769531, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.562139511108398, "logits_per_token": -6.609138488769531, "logits_per_char": -0.600830771706321, "num_chars": 11}, {"sum_logits": -8.204405784606934, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.001758575439453, "logits_per_token": -8.204405784606934, "logits_per_char": -1.1720579692295618, "num_chars": 7}, {"sum_logits": -9.316640853881836, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -9.316640853881836, "logits_per_char": -1.5527734756469727, "num_chars": 6}, {"sum_logits": -6.868075370788574, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.260344505310059, "logits_per_token": -6.868075370788574, "logits_per_char": -1.3736150741577149, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 343, "native_id": "5de3248caa2e5ed83dd0ec45a15eae18", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.753246307373047, "incorrect_loss_raw": 12.532475709915161, "correct_loss_per_char": 1.1593860279430042, "incorrect_loss_per_char": 1.1399445436098479, "correct_loss_per_token": 4.251082102457683, "incorrect_loss_per_token": 6.978826642036438, "correct_loss_uncond": -8.112174987792969, "incorrect_loss_uncond": -6.215312480926514}, "model_output": [{"sum_logits": -23.074697494506836, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.461183547973633, "logits_per_token": -11.537348747253418, "logits_per_char": -1.7749767303466797, "num_chars": 13}, {"sum_logits": -12.753246307373047, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.865421295166016, "logits_per_token": -4.251082102457683, "logits_per_char": -1.1593860279430042, "num_chars": 11}, {"sum_logits": -8.737687110900879, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.55306053161621, "logits_per_token": -2.912562370300293, "logits_per_char": -0.6721297777616061, "num_chars": 13}, {"sum_logits": -8.613272666931152, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.043295860290527, "logits_per_token": -8.613272666931152, "logits_per_char": -1.2304675238473075, "num_chars": 7}, {"sum_logits": -9.704245567321777, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.933612823486328, "logits_per_token": -4.852122783660889, "logits_per_char": -0.882204142483798, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 344, "native_id": "0611dfbf5114084723d75f59b4f67412", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.3347105979919434, "incorrect_loss_raw": 12.05256962776184, "correct_loss_per_char": 0.46694211959838866, "incorrect_loss_per_char": 1.1661189453942435, "correct_loss_per_token": 2.3347105979919434, "incorrect_loss_per_token": 9.731452226638794, "correct_loss_uncond": -9.190568447113037, "incorrect_loss_uncond": -3.0114901065826416}, "model_output": [{"sum_logits": -18.568939208984375, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.54778289794922, "logits_per_token": -9.284469604492188, "logits_per_char": -1.1605587005615234, "num_chars": 16}, {"sum_logits": -7.823275566101074, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -7.823275566101074, "logits_per_char": -1.1176107951572962, "num_chars": 7}, {"sum_logits": -13.638067245483398, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.436118125915527, "logits_per_token": -13.638067245483398, "logits_per_char": -1.3638067245483398, "num_chars": 10}, {"sum_logits": -8.179996490478516, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.709348678588867, "logits_per_token": -8.179996490478516, "logits_per_char": -1.0224995613098145, "num_chars": 8}, {"sum_logits": -2.3347105979919434, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.52527904510498, "logits_per_token": -2.3347105979919434, "logits_per_char": -0.46694211959838866, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 345, "native_id": "5b8d76889510384b38b72945e8d28f53", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.725733757019043, "incorrect_loss_raw": 8.802884221076965, "correct_loss_per_char": 0.4725733757019043, "incorrect_loss_per_char": 0.7297696798910827, "correct_loss_per_token": 2.3628668785095215, "incorrect_loss_per_token": 5.77014156182607, "correct_loss_uncond": -12.996586799621582, "incorrect_loss_uncond": -6.825612425804138}, "model_output": [{"sum_logits": -12.859420776367188, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.28645133972168, "logits_per_token": -4.2864735921223955, "logits_per_char": -0.9185300554547992, "num_chars": 14}, {"sum_logits": -13.426017761230469, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.530681610107422, "logits_per_token": -13.426017761230469, "logits_per_char": -1.2205470692027698, "num_chars": 11}, {"sum_logits": -5.337035179138184, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.671985626220703, "logits_per_token": -1.7790117263793945, "logits_per_char": -0.3812167985098703, "num_chars": 14}, {"sum_logits": -3.5890631675720215, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.02486801147461, "logits_per_token": -3.5890631675720215, "logits_per_char": -0.3987847963968913, "num_chars": 9}, {"sum_logits": -4.725733757019043, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.722320556640625, "logits_per_token": -2.3628668785095215, "logits_per_char": -0.4725733757019043, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 346, "native_id": "d81f5c49bc060dc799681bf4cacac73a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.207106590270996, "incorrect_loss_raw": 12.170369386672974, "correct_loss_per_char": 0.6005922158559164, "incorrect_loss_per_char": 1.1046146353085837, "correct_loss_per_token": 3.603553295135498, "incorrect_loss_per_token": 5.78268059094747, "correct_loss_uncond": -8.227850914001465, "incorrect_loss_uncond": -4.214626312255859}, "model_output": [{"sum_logits": -7.260098457336426, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.7159423828125, "logits_per_token": -2.420032819112142, "logits_per_char": -0.4840065638224284, "num_chars": 15}, {"sum_logits": -12.804671287536621, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.940740585327148, "logits_per_token": -6.4023356437683105, "logits_per_char": -1.2804671287536622, "num_chars": 10}, {"sum_logits": -12.461155891418457, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.28518295288086, "logits_per_token": -6.2305779457092285, "logits_per_char": -1.0384296576182048, "num_chars": 12}, {"sum_logits": -7.207106590270996, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.434957504272461, "logits_per_token": -3.603553295135498, "logits_per_char": -0.6005922158559164, "num_chars": 12}, {"sum_logits": -16.15555191040039, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.598116874694824, "logits_per_token": -8.077775955200195, "logits_per_char": -1.615555191040039, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 347, "native_id": "aaf4fa38433c84b3bd0a86551259ce62", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.74338150024414, "incorrect_loss_raw": 15.720999240875244, "correct_loss_per_char": 0.7339613437652588, "incorrect_loss_per_char": 1.294577883145748, "correct_loss_per_token": 3.9144605000813804, "incorrect_loss_per_token": 7.282344897588094, "correct_loss_uncond": -7.256305694580078, "incorrect_loss_uncond": -2.0224435329437256}, "model_output": [{"sum_logits": -13.875713348388672, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.918804168701172, "logits_per_token": -4.625237782796224, "logits_per_char": -0.867232084274292, "num_chars": 16}, {"sum_logits": -15.888689041137695, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.12395477294922, "logits_per_token": -7.944344520568848, "logits_per_char": -1.0592459360758464, "num_chars": 15}, {"sum_logits": -14.646785736083984, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.222378730773926, "logits_per_token": -7.323392868041992, "logits_per_char": -1.830848217010498, "num_chars": 8}, {"sum_logits": -11.74338150024414, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.99968719482422, "logits_per_token": -3.9144605000813804, "logits_per_char": -0.7339613437652588, "num_chars": 16}, {"sum_logits": -18.472808837890625, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.708633422851562, "logits_per_token": -9.236404418945312, "logits_per_char": -1.4209852952223558, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 348, "native_id": "33ea932a876ac0361c9eefeff1d24e92", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.6664628982543945, "incorrect_loss_raw": 10.211639404296875, "correct_loss_per_char": 0.6296069886949327, "incorrect_loss_per_char": 0.9811856495706659, "correct_loss_per_token": 5.6664628982543945, "incorrect_loss_per_token": 6.97212553024292, "correct_loss_uncond": -8.423850059509277, "incorrect_loss_uncond": -6.8139331340789795}, "model_output": [{"sum_logits": -5.6664628982543945, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.090312957763672, "logits_per_token": -5.6664628982543945, "logits_per_char": -0.6296069886949327, "num_chars": 9}, {"sum_logits": -8.101638793945312, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.676141738891602, "logits_per_token": -8.101638793945312, "logits_per_char": -1.6203277587890625, "num_chars": 5}, {"sum_logits": -12.5648193359375, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -22.941017150878906, "logits_per_token": -6.28240966796875, "logits_per_char": -0.6613062808388158, "num_chars": 19}, {"sum_logits": -13.35129165649414, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.34340476989746, "logits_per_token": -6.67564582824707, "logits_per_char": -0.667564582824707, "num_chars": 20}, {"sum_logits": -6.828807830810547, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.14172649383545, "logits_per_token": -6.828807830810547, "logits_per_char": -0.9755439758300781, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 349, "native_id": "aead08289ca9abfcd169f935ea228ee5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.089524269104004, "incorrect_loss_raw": 11.01265001296997, "correct_loss_per_char": 0.826320388100364, "incorrect_loss_per_char": 0.9966510907634274, "correct_loss_per_token": 4.544762134552002, "incorrect_loss_per_token": 6.90955893198649, "correct_loss_uncond": -9.017516136169434, "incorrect_loss_uncond": -7.601357460021973}, "model_output": [{"sum_logits": -5.541175842285156, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.845549583435059, "logits_per_token": -2.770587921142578, "logits_per_char": -0.3957982744489397, "num_chars": 14}, {"sum_logits": -10.223987579345703, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.84388542175293, "logits_per_token": -3.407995859781901, "logits_per_char": -0.7864605830265925, "num_chars": 13}, {"sum_logits": -14.633867263793945, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.855534553527832, "logits_per_token": -14.633867263793945, "logits_per_char": -1.8292334079742432, "num_chars": 8}, {"sum_logits": -9.089524269104004, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.107040405273438, "logits_per_token": -4.544762134552002, "logits_per_char": -0.826320388100364, "num_chars": 11}, {"sum_logits": -13.651569366455078, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -22.911060333251953, "logits_per_token": -6.825784683227539, "logits_per_char": -0.9751120976039341, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 350, "native_id": "adbddc80b10bf25f09c6c2bee4e3c59b", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.897655010223389, "incorrect_loss_raw": 12.22720193862915, "correct_loss_per_char": 0.2880973535425523, "incorrect_loss_per_char": 1.4356291322481065, "correct_loss_per_token": 2.4488275051116943, "incorrect_loss_per_token": 8.820422768592834, "correct_loss_uncond": -11.691124439239502, "incorrect_loss_uncond": -3.7754111289978027}, "model_output": [{"sum_logits": -9.869558334350586, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.567606925964355, "logits_per_token": -9.869558334350586, "logits_per_char": -1.2336947917938232, "num_chars": 8}, {"sum_logits": -10.026877403259277, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.714458465576172, "logits_per_token": -5.013438701629639, "logits_per_char": -0.716205528804234, "num_chars": 14}, {"sum_logits": -17.22735595703125, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.034700393676758, "logits_per_token": -8.613677978515625, "logits_per_char": -1.4356129964192708, "num_chars": 12}, {"sum_logits": -11.785016059875488, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.693686485290527, "logits_per_token": -11.785016059875488, "logits_per_char": -2.3570032119750977, "num_chars": 5}, {"sum_logits": -4.897655010223389, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.58877944946289, "logits_per_token": -2.4488275051116943, "logits_per_char": -0.2880973535425523, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 351, "native_id": "1caf93d6a22dc8190e19c14bbe1fafda", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.968371868133545, "incorrect_loss_raw": 10.500015497207642, "correct_loss_per_char": 0.4140309890111287, "incorrect_loss_per_char": 0.6506582176463204, "correct_loss_per_token": 2.4841859340667725, "incorrect_loss_per_token": 5.859942197799683, "correct_loss_uncond": -10.42749834060669, "incorrect_loss_uncond": -7.538619518280029}, "model_output": [{"sum_logits": -10.62465763092041, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.51554012298584, "logits_per_token": -10.62465763092041, "logits_per_char": -0.8853881359100342, "num_chars": 12}, {"sum_logits": -4.968371868133545, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.395870208740234, "logits_per_token": -2.4841859340667725, "logits_per_char": -0.4140309890111287, "num_chars": 12}, {"sum_logits": -5.29534912109375, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.41822052001953, "logits_per_token": -2.647674560546875, "logits_per_char": -0.37823922293526785, "num_chars": 14}, {"sum_logits": -17.235546112060547, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -23.113868713378906, "logits_per_token": -5.745182037353516, "logits_per_char": -0.749371570089589, "num_chars": 23}, {"sum_logits": -8.84450912475586, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.106910705566406, "logits_per_token": -4.42225456237793, "logits_per_char": -0.5896339416503906, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 352, "native_id": "0bf4d64ad0eee7224acb3a4eb85accb2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.3998799324035645, "incorrect_loss_raw": 12.64586615562439, "correct_loss_per_char": 0.7714114189147949, "incorrect_loss_per_char": 1.3360859889488714, "correct_loss_per_token": 5.3998799324035645, "incorrect_loss_per_token": 7.384043375651042, "correct_loss_uncond": -9.305773258209229, "incorrect_loss_uncond": -3.0806875228881836}, "model_output": [{"sum_logits": -13.204378128051758, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.285294532775879, "logits_per_token": -13.204378128051758, "logits_per_char": -1.8863397325788225, "num_chars": 7}, {"sum_logits": -5.3998799324035645, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.705653190612793, "logits_per_token": -5.3998799324035645, "logits_per_char": -0.7714114189147949, "num_chars": 7}, {"sum_logits": -6.589599609375, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.28221321105957, "logits_per_token": -3.2947998046875, "logits_per_char": -0.6589599609375, "num_chars": 10}, {"sum_logits": -14.14648723602295, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.918708801269531, "logits_per_token": -4.715495745340983, "logits_per_char": -1.2860442941839045, "num_chars": 11}, {"sum_logits": -16.64299964904785, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.419998168945312, "logits_per_token": -8.321499824523926, "logits_per_char": -1.5129999680952593, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 353, "native_id": "b93532cae23e505628dd88568da3337e", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.185652256011963, "incorrect_loss_raw": 5.088032782077789, "correct_loss_per_char": 1.1976087093353271, "incorrect_loss_per_char": 0.6383989487375532, "correct_loss_per_token": 7.185652256011963, "incorrect_loss_per_token": 4.7075028121471405, "correct_loss_uncond": -8.487099170684814, "incorrect_loss_uncond": -10.09699958562851}, "model_output": [{"sum_logits": -6.79794454574585, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.001758575439453, "logits_per_token": -6.79794454574585, "logits_per_char": -0.9711349351065499, "num_chars": 7}, {"sum_logits": -7.185652256011963, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.672751426696777, "logits_per_token": -7.185652256011963, "logits_per_char": -1.1976087093353271, "num_chars": 6}, {"sum_logits": -3.0442397594451904, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.974644660949707, "logits_per_token": -1.5221198797225952, "logits_per_char": -0.4348913942064558, "num_chars": 7}, {"sum_logits": -6.646955490112305, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.455964088439941, "logits_per_token": -6.646955490112305, "logits_per_char": -0.6646955490112305, "num_chars": 10}, {"sum_logits": -3.8629913330078125, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.307762145996094, "logits_per_token": -3.8629913330078125, "logits_per_char": -0.48287391662597656, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 354, "native_id": "2d3c9d3dff1a7a8253180cb3de1ceeea", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.704486846923828, "incorrect_loss_raw": 9.74145793914795, "correct_loss_per_char": 0.529212406703404, "incorrect_loss_per_char": 1.066230927980863, "correct_loss_per_token": 3.704486846923828, "incorrect_loss_per_token": 5.621495246887207, "correct_loss_uncond": -8.90404987335205, "incorrect_loss_uncond": -4.958251714706421}, "model_output": [{"sum_logits": -17.442340850830078, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.763538360595703, "logits_per_token": -5.814113616943359, "logits_per_char": -1.3417185269869292, "num_chars": 13}, {"sum_logits": -3.704486846923828, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.608536720275879, "logits_per_token": -3.704486846923828, "logits_per_char": -0.529212406703404, "num_chars": 7}, {"sum_logits": -9.7032470703125, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.918428421020508, "logits_per_token": -4.85162353515625, "logits_per_char": -0.97032470703125, "num_chars": 10}, {"sum_logits": -7.1954450607299805, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.520729064941406, "logits_per_token": -7.1954450607299805, "logits_per_char": -1.0279207229614258, "num_chars": 7}, {"sum_logits": -4.624798774719238, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -11.596142768859863, "logits_per_token": -4.624798774719238, "logits_per_char": -0.9249597549438476, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 355, "native_id": "70701f5d1d62e58d5c74e2e303bb4065", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.873725414276123, "incorrect_loss_raw": 9.249950408935547, "correct_loss_per_char": 0.23421567678451538, "incorrect_loss_per_char": 1.3633745127254062, "correct_loss_per_token": 1.873725414276123, "incorrect_loss_per_token": 7.151802659034729, "correct_loss_uncond": -10.292303562164307, "incorrect_loss_uncond": -4.65900731086731}, "model_output": [{"sum_logits": -1.873725414276123, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": true, "sum_logits_uncond": -12.16602897644043, "logits_per_token": -1.873725414276123, "logits_per_char": -0.23421567678451538, "num_chars": 8}, {"sum_logits": -8.043842315673828, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.2900390625, "logits_per_token": -4.021921157836914, "logits_per_char": -0.893760257297092, "num_chars": 9}, {"sum_logits": -8.741339683532715, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.28518295288086, "logits_per_token": -4.370669841766357, "logits_per_char": -0.7284449736277262, "num_chars": 12}, {"sum_logits": -13.865686416625977, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.957635879516602, "logits_per_token": -13.865686416625977, "logits_per_char": -2.7731372833251955, "num_chars": 5}, {"sum_logits": -6.348933219909668, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -11.102972984313965, "logits_per_token": -6.348933219909668, "logits_per_char": -1.0581555366516113, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 356, "native_id": "eacd87f297193033669a93160ae3776f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.778487205505371, "incorrect_loss_raw": 7.206222057342529, "correct_loss_per_char": 0.4861554503440857, "incorrect_loss_per_char": 0.7997658462751479, "correct_loss_per_token": 3.8892436027526855, "incorrect_loss_per_token": 5.640021284421286, "correct_loss_uncond": -10.769295692443848, "incorrect_loss_uncond": -9.304765224456787}, "model_output": [{"sum_logits": -8.169512748718262, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.095705032348633, "logits_per_token": -4.084756374359131, "logits_per_char": -0.6807927290598551, "num_chars": 12}, {"sum_logits": -7.778487205505371, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.54778289794922, "logits_per_token": -3.8892436027526855, "logits_per_char": -0.4861554503440857, "num_chars": 16}, {"sum_logits": -10.169822692871094, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.363737106323242, "logits_per_token": -10.169822692871094, "logits_per_char": -1.452831813267299, "num_chars": 7}, {"sum_logits": -3.2700700759887695, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.82883071899414, "logits_per_token": -1.090023358662923, "logits_per_char": -0.16350350379943848, "num_chars": 20}, {"sum_logits": -7.215482711791992, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.75567626953125, "logits_per_token": -7.215482711791992, "logits_per_char": -0.901935338973999, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 357, "native_id": "8e1b0792e441a5d54ae47a4b24f48977", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 21.08892059326172, "incorrect_loss_raw": 11.587241172790527, "correct_loss_per_char": 2.108892059326172, "incorrect_loss_per_char": 1.3623072334843824, "correct_loss_per_token": 7.029640197753906, "incorrect_loss_per_token": 8.411388635635376, "correct_loss_uncond": -3.7940502166748047, "incorrect_loss_uncond": -3.42110276222229}, "model_output": [{"sum_logits": -12.14189338684082, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.02483367919922, "logits_per_token": -6.07094669342041, "logits_per_char": -1.214189338684082, "num_chars": 10}, {"sum_logits": -21.08892059326172, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -24.882970809936523, "logits_per_token": -7.029640197753906, "logits_per_char": -2.108892059326172, "num_chars": 10}, {"sum_logits": -13.26492691040039, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.956235885620117, "logits_per_token": -6.632463455200195, "logits_per_char": -1.0203789931077223, "num_chars": 13}, {"sum_logits": -9.987251281738281, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.018102645874023, "logits_per_token": -9.987251281738281, "logits_per_char": -1.9974502563476562, "num_chars": 5}, {"sum_logits": -10.954893112182617, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.03420352935791, "logits_per_token": -10.954893112182617, "logits_per_char": -1.2172103457980685, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 358, "native_id": "b4cde6a56fb19afc84876ebf2fb9e71a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.586959838867188, "incorrect_loss_raw": 13.424866795539856, "correct_loss_per_char": 1.0451507568359375, "incorrect_loss_per_char": 1.6924726043815737, "correct_loss_per_token": 4.5289866129557295, "incorrect_loss_per_token": 8.24504142999649, "correct_loss_uncond": -9.354053497314453, "incorrect_loss_uncond": -7.756265759468079}, "model_output": [{"sum_logits": -6.417088031768799, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.116493225097656, "logits_per_token": -3.2085440158843994, "logits_per_char": -0.8021360039710999, "num_chars": 8}, {"sum_logits": -12.2608642578125, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.465144157409668, "logits_per_token": -12.2608642578125, "logits_per_char": -1.7515520368303572, "num_chars": 7}, {"sum_logits": -13.586959838867188, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -22.94101333618164, "logits_per_token": -4.5289866129557295, "logits_per_char": -1.0451507568359375, "num_chars": 13}, {"sum_logits": -19.874244689941406, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -27.264591217041016, "logits_per_token": -9.937122344970703, "logits_per_char": -2.839177812848772, "num_chars": 7}, {"sum_logits": -15.147270202636719, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -24.8783016204834, "logits_per_token": -7.573635101318359, "logits_per_char": -1.3770245638760654, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 359, "native_id": "095c5bc5fbaf12b384e9f7df47fdec16", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.053689956665039, "incorrect_loss_raw": 16.94876503944397, "correct_loss_per_char": 0.7567112445831299, "incorrect_loss_per_char": 1.5829426004105256, "correct_loss_per_token": 6.053689956665039, "incorrect_loss_per_token": 9.398956298828125, "correct_loss_uncond": -7.655658721923828, "incorrect_loss_uncond": -2.1268186569213867}, "model_output": [{"sum_logits": -7.396590232849121, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.181896209716797, "logits_per_token": -7.396590232849121, "logits_per_char": -1.0566557475498743, "num_chars": 7}, {"sum_logits": -25.162826538085938, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -26.162046432495117, "logits_per_token": -12.581413269042969, "logits_per_char": -1.4801662669462317, "num_chars": 17}, {"sum_logits": -6.053689956665039, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.709348678588867, "logits_per_token": -6.053689956665039, "logits_per_char": -0.7567112445831299, "num_chars": 8}, {"sum_logits": -24.424564361572266, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.05228042602539, "logits_per_token": -12.212282180786133, "logits_per_char": -2.7138404846191406, "num_chars": 9}, {"sum_logits": -10.811079025268555, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.906111717224121, "logits_per_token": -5.405539512634277, "logits_per_char": -1.0811079025268555, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 360, "native_id": "494c501dbbfd36c602aae9e5b8e0cfff", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.8549056053161621, "incorrect_loss_raw": 10.082003355026245, "correct_loss_per_char": 0.1424842675526937, "incorrect_loss_per_char": 1.1872344255447387, "correct_loss_per_token": 0.8549056053161621, "incorrect_loss_per_token": 7.821419358253479, "correct_loss_uncond": -11.03474760055542, "incorrect_loss_uncond": -4.787617921829224}, "model_output": [{"sum_logits": -10.158884048461914, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.348342895507812, "logits_per_token": -10.158884048461914, "logits_per_char": -1.4512691497802734, "num_chars": 7}, {"sum_logits": -0.8549056053161621, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": true, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -0.8549056053161621, "logits_per_char": -0.1424842675526937, "num_chars": 6}, {"sum_logits": -12.084457397460938, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -12.084457397460938, "logits_per_char": -1.0070381164550781, "num_chars": 12}, {"sum_logits": -13.263039588928223, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.333370208740234, "logits_per_token": -6.631519794464111, "logits_per_char": -1.3263039588928223, "num_chars": 10}, {"sum_logits": -4.821632385253906, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.639236450195312, "logits_per_token": -2.410816192626953, "logits_per_char": -0.9643264770507812, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 361, "native_id": "5a7f6fd97b2c9ad05f773bc8b2ecf441", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.427323341369629, "incorrect_loss_raw": 13.991711139678955, "correct_loss_per_char": 1.2039033344813757, "incorrect_loss_per_char": 1.7191912242344447, "correct_loss_per_token": 8.427323341369629, "incorrect_loss_per_token": 10.729423522949219, "correct_loss_uncond": -6.107758522033691, "incorrect_loss_uncond": -0.5736048221588135}, "model_output": [{"sum_logits": -15.942071914672852, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.104859352111816, "logits_per_token": -7.971035957336426, "logits_per_char": -1.594207191467285, "num_chars": 10}, {"sum_logits": -10.156229019165039, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.918428421020508, "logits_per_token": -5.0781145095825195, "logits_per_char": -1.015622901916504, "num_chars": 10}, {"sum_logits": -15.85247802734375, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.218299865722656, "logits_per_token": -15.85247802734375, "logits_per_char": -2.2646397181919644, "num_chars": 7}, {"sum_logits": -8.427323341369629, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.53508186340332, "logits_per_token": -8.427323341369629, "logits_per_char": -1.2039033344813757, "num_chars": 7}, {"sum_logits": -14.01606559753418, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.019676208496094, "logits_per_token": -14.01606559753418, "logits_per_char": -2.0022950853620256, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 362, "native_id": "5279a2ea333ba8a5bf3a7637a7279da1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.553349494934082, "incorrect_loss_raw": 10.587387681007385, "correct_loss_per_char": 1.1383373737335205, "incorrect_loss_per_char": 1.266071454836772, "correct_loss_per_token": 4.553349494934082, "incorrect_loss_per_token": 7.726104199886322, "correct_loss_uncond": -5.321951866149902, "incorrect_loss_uncond": -6.039731860160828}, "model_output": [{"sum_logits": -9.658778190612793, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.672751426696777, "logits_per_token": -9.658778190612793, "logits_per_char": -1.609796365102132, "num_chars": 6}, {"sum_logits": -18.13177490234375, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.606101989746094, "logits_per_token": -9.065887451171875, "logits_per_char": -1.5109812418619792, "num_chars": 12}, {"sum_logits": -4.758492946624756, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.027515411376953, "logits_per_token": -2.379246473312378, "logits_per_char": -1.189623236656189, "num_chars": 4}, {"sum_logits": -4.553349494934082, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -9.875301361083984, "logits_per_token": -4.553349494934082, "logits_per_char": -1.1383373737335205, "num_chars": 4}, {"sum_logits": -9.800504684448242, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.202109336853027, "logits_per_token": -9.800504684448242, "logits_per_char": -0.7538849757267878, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 363, "native_id": "42c46e28baf0fc617a07419286178c0a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.07867431640625, "incorrect_loss_raw": 15.606742858886719, "correct_loss_per_char": 1.1348342895507812, "incorrect_loss_per_char": 1.5099193063335141, "correct_loss_per_token": 4.539337158203125, "incorrect_loss_per_token": 8.412487268447876, "correct_loss_uncond": -6.600590705871582, "incorrect_loss_uncond": -3.4222538471221924}, "model_output": [{"sum_logits": -19.244787216186523, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -24.31184196472168, "logits_per_token": -4.811196804046631, "logits_per_char": -0.8367298789646315, "num_chars": 23}, {"sum_logits": -4.89754581451416, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.750934600830078, "logits_per_token": -2.44877290725708, "logits_per_char": -0.40812881787618, "num_chars": 12}, {"sum_logits": -23.789318084716797, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.52896499633789, "logits_per_token": -11.894659042358398, "logits_per_char": -2.3789318084716795, "num_chars": 10}, {"sum_logits": -14.495320320129395, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.524245262145996, "logits_per_token": -14.495320320129395, "logits_per_char": -2.415886720021566, "num_chars": 6}, {"sum_logits": -9.07867431640625, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.679265022277832, "logits_per_token": -4.539337158203125, "logits_per_char": -1.1348342895507812, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 364, "native_id": "c76304b4962f94ab9f20f09cf4a1a7c1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.192099571228027, "incorrect_loss_raw": 10.889075517654419, "correct_loss_per_char": 0.8653499285380045, "incorrect_loss_per_char": 1.4625275100980486, "correct_loss_per_token": 5.192099571228027, "incorrect_loss_per_token": 7.938335657119751, "correct_loss_uncond": -8.28935718536377, "incorrect_loss_uncond": -4.751616716384888}, "model_output": [{"sum_logits": -9.78394889831543, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.02480411529541, "logits_per_token": -9.78394889831543, "logits_per_char": -1.6306581497192383, "num_chars": 6}, {"sum_logits": -5.192099571228027, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.481456756591797, "logits_per_token": -5.192099571228027, "logits_per_char": -0.8653499285380045, "num_chars": 6}, {"sum_logits": -17.704439163208008, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.546873092651367, "logits_per_token": -5.901479721069336, "logits_per_char": -2.5292055947440013, "num_chars": 7}, {"sum_logits": -7.5109405517578125, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.6738920211792, "logits_per_token": -7.5109405517578125, "logits_per_char": -0.8345489501953125, "num_chars": 9}, {"sum_logits": -8.556973457336426, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.31719970703125, "logits_per_token": -8.556973457336426, "logits_per_char": -0.8556973457336425, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 365, "native_id": "8b23cd355ffc8b6e7aa5459ffb21b4e0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.691348552703857, "incorrect_loss_raw": 9.468975722789764, "correct_loss_per_char": 1.1382697105407715, "incorrect_loss_per_char": 1.0714709935766278, "correct_loss_per_token": 5.691348552703857, "incorrect_loss_per_token": 6.077230155467987, "correct_loss_uncond": -5.409149646759033, "incorrect_loss_uncond": -7.526202023029327}, "model_output": [{"sum_logits": -10.061100006103516, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.882173538208008, "logits_per_token": -5.030550003051758, "logits_per_char": -0.8384250005086263, "num_chars": 12}, {"sum_logits": -5.691348552703857, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.10049819946289, "logits_per_token": -5.691348552703857, "logits_per_char": -1.1382697105407715, "num_chars": 5}, {"sum_logits": -6.999966144561768, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.388150215148926, "logits_per_token": -6.999966144561768, "logits_per_char": -1.3999932289123536, "num_chars": 5}, {"sum_logits": -3.7419722080230713, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.953621864318848, "logits_per_token": -3.7419722080230713, "logits_per_char": -0.340179291638461, "num_chars": 11}, {"sum_logits": -17.072864532470703, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -22.756765365600586, "logits_per_token": -8.536432266235352, "logits_per_char": -1.7072864532470704, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 366, "native_id": "c35f7de9e9005fcf654cb0b23f17acd6", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.393377304077148, "incorrect_loss_raw": 10.385514259338379, "correct_loss_per_char": 1.098344326019287, "incorrect_loss_per_char": 1.0190250830044822, "correct_loss_per_token": 4.393377304077148, "incorrect_loss_per_token": 7.692155520121257, "correct_loss_uncond": -9.376496315002441, "incorrect_loss_uncond": -4.686528444290161}, "model_output": [{"sum_logits": -5.695163726806641, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.859586715698242, "logits_per_token": -5.695163726806641, "logits_per_char": -0.569516372680664, "num_chars": 10}, {"sum_logits": -16.160152435302734, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.524097442626953, "logits_per_token": -5.386717478434245, "logits_per_char": -1.010009527206421, "num_chars": 16}, {"sum_logits": -4.393377304077148, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.76987361907959, "logits_per_token": -4.393377304077148, "logits_per_char": -1.098344326019287, "num_chars": 4}, {"sum_logits": -9.73850154876709, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.673858642578125, "logits_per_token": -9.73850154876709, "logits_per_char": -1.3912145069667272, "num_chars": 7}, {"sum_logits": -9.94823932647705, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.23062801361084, "logits_per_token": -9.94823932647705, "logits_per_char": -1.1053599251641169, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 367, "native_id": "d910859b9d1acae40456dbeaa8334bc0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.895948886871338, "incorrect_loss_raw": 10.961682319641113, "correct_loss_per_char": 0.34971063477652414, "incorrect_loss_per_char": 1.065235609774823, "correct_loss_per_token": 2.447974443435669, "incorrect_loss_per_token": 6.988085905710856, "correct_loss_uncond": -13.15696096420288, "incorrect_loss_uncond": -4.3692145347595215}, "model_output": [{"sum_logits": -14.36507797241211, "num_tokens": 3, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -19.128847122192383, "logits_per_token": -4.78835932413737, "logits_per_char": -1.1050059978778546, "num_chars": 13}, {"sum_logits": -8.91929817199707, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -14.04694652557373, "logits_per_token": -8.91929817199707, "logits_per_char": -0.68609985938439, "num_chars": 13}, {"sum_logits": -4.895948886871338, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -18.05290985107422, "logits_per_token": -2.447974443435669, "logits_per_char": -0.34971063477652414, "num_chars": 14}, {"sum_logits": -7.927019119262695, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -11.685454368591309, "logits_per_token": -7.927019119262695, "logits_per_char": -1.3211698532104492, "num_chars": 6}, {"sum_logits": -12.635334014892578, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -16.462339401245117, "logits_per_token": -6.317667007446289, "logits_per_char": -1.148666728626598, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 368, "native_id": "6ca8439d062de4d43d7d471c508b78db", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.560907363891602, "incorrect_loss_raw": 9.540693044662476, "correct_loss_per_char": 0.8123774895301232, "incorrect_loss_per_char": 1.124902595414056, "correct_loss_per_token": 5.280453681945801, "incorrect_loss_per_token": 6.5688087940216064, "correct_loss_uncond": -8.514928817749023, "incorrect_loss_uncond": -7.249425172805786}, "model_output": [{"sum_logits": -10.560907363891602, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.075836181640625, "logits_per_token": -5.280453681945801, "logits_per_char": -0.8123774895301232, "num_chars": 13}, {"sum_logits": -6.287485122680664, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.421607971191406, "logits_per_token": -6.287485122680664, "logits_per_char": -1.047914187113444, "num_chars": 6}, {"sum_logits": -8.100213050842285, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.58857536315918, "logits_per_token": -8.100213050842285, "logits_per_char": -0.8100213050842285, "num_chars": 10}, {"sum_logits": -11.861579895019531, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.88397789001465, "logits_per_token": -5.930789947509766, "logits_per_char": -1.3179533216688368, "num_chars": 9}, {"sum_logits": -11.913494110107422, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.266311645507812, "logits_per_token": -5.956747055053711, "logits_per_char": -1.3237215677897136, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 369, "native_id": "ddd8c62ec94b4f94eeefdd05b9208a71", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.7406258583068848, "incorrect_loss_raw": 11.176188588142395, "correct_loss_per_char": 0.30451398425632054, "incorrect_loss_per_char": 1.1439951022466024, "correct_loss_per_token": 1.3703129291534424, "incorrect_loss_per_token": 7.769812226295471, "correct_loss_uncond": -17.617005825042725, "incorrect_loss_uncond": -6.639100909233093}, "model_output": [{"sum_logits": -14.764026641845703, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.638458251953125, "logits_per_token": -7.382013320922852, "logits_per_char": -0.9842684427897136, "num_chars": 15}, {"sum_logits": -12.833516120910645, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.444046974182129, "logits_per_token": -12.833516120910645, "logits_per_char": -1.6041895151138306, "num_chars": 8}, {"sum_logits": -2.7406258583068848, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.35763168334961, "logits_per_token": -1.3703129291534424, "logits_per_char": -0.30451398425632054, "num_chars": 9}, {"sum_logits": -12.486984252929688, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.693796157836914, "logits_per_token": -6.243492126464844, "logits_per_char": -0.8324656168619792, "num_chars": 15}, {"sum_logits": -4.620227336883545, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.484856605529785, "logits_per_token": -4.620227336883545, "logits_per_char": -1.1550568342208862, "num_chars": 4}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 370, "native_id": "72b638200414a526b598de0e01a044df", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.432612895965576, "incorrect_loss_raw": 11.255218863487244, "correct_loss_per_char": 0.6432612895965576, "incorrect_loss_per_char": 1.6752607249594353, "correct_loss_per_token": 6.432612895965576, "incorrect_loss_per_token": 9.590131878852844, "correct_loss_uncond": -9.782310962677002, "incorrect_loss_uncond": -3.712927460670471}, "model_output": [{"sum_logits": -7.287264347076416, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.450029373168945, "logits_per_token": -7.287264347076416, "logits_per_char": -1.0410377638680595, "num_chars": 7}, {"sum_logits": -13.320695877075195, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.85131072998047, "logits_per_token": -6.660347938537598, "logits_per_char": -1.2109723524613814, "num_chars": 11}, {"sum_logits": -6.432612895965576, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -16.214923858642578, "logits_per_token": -6.432612895965576, "logits_per_char": -0.6432612895965576, "num_chars": 10}, {"sum_logits": -11.406407356262207, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.090980529785156, "logits_per_token": -11.406407356262207, "logits_per_char": -2.2812814712524414, "num_chars": 5}, {"sum_logits": -13.006507873535156, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.480264663696289, "logits_per_token": -13.006507873535156, "logits_per_char": -2.1677513122558594, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 371, "native_id": "c770870c88f35f9d110217049c5a7334", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.529749870300293, "incorrect_loss_raw": 5.134465128183365, "correct_loss_per_char": 0.3921944300333659, "incorrect_loss_per_char": 0.6781394325069383, "correct_loss_per_token": 3.529749870300293, "incorrect_loss_per_token": 5.134465128183365, "correct_loss_uncond": -10.920628547668457, "incorrect_loss_uncond": -8.863648742437363}, "model_output": [{"sum_logits": -4.973837375640869, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.087692260742188, "logits_per_token": -4.973837375640869, "logits_per_char": -0.38260287504929763, "num_chars": 13}, {"sum_logits": -3.529749870300293, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.45037841796875, "logits_per_token": -3.529749870300293, "logits_per_char": -0.3921944300333659, "num_chars": 9}, {"sum_logits": -7.819500923156738, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.23508071899414, "logits_per_token": -7.819500923156738, "logits_per_char": -0.8688334359063042, "num_chars": 9}, {"sum_logits": -6.574081897735596, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.591198921203613, "logits_per_token": -6.574081897735596, "logits_per_char": -1.314816379547119, "num_chars": 5}, {"sum_logits": -1.1704403162002563, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -14.078483581542969, "logits_per_token": -1.1704403162002563, "logits_per_char": -0.14630503952503204, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 372, "native_id": "1d8d9e3504c8c58a3b923ddc155c19b0", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.369647979736328, "incorrect_loss_raw": 9.547701120376587, "correct_loss_per_char": 0.4369647979736328, "incorrect_loss_per_char": 0.9425617218017577, "correct_loss_per_token": 1.4565493265787761, "incorrect_loss_per_token": 5.406570037206014, "correct_loss_uncond": -13.748186111450195, "incorrect_loss_uncond": -6.582605838775635}, "model_output": [{"sum_logits": -10.726913452148438, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.974586486816406, "logits_per_token": -5.363456726074219, "logits_per_char": -1.1918792724609375, "num_chars": 9}, {"sum_logits": -4.369647979736328, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.117834091186523, "logits_per_token": -1.4565493265787761, "logits_per_char": -0.4369647979736328, "num_chars": 10}, {"sum_logits": -16.80160140991211, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.704547882080078, "logits_per_token": -5.600533803304036, "logits_per_char": -1.1201067606608073, "num_chars": 15}, {"sum_logits": -4.924116134643555, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.055810928344727, "logits_per_token": -4.924116134643555, "logits_per_char": -0.8206860224405924, "num_chars": 6}, {"sum_logits": -5.738173484802246, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.786282539367676, "logits_per_token": -5.738173484802246, "logits_per_char": -0.637574831644694, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 373, "native_id": "95acebea992a26c3a7c3bfb45845fa83", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.471806526184082, "incorrect_loss_raw": 8.884460210800171, "correct_loss_per_char": 0.5786344210306803, "incorrect_loss_per_char": 0.6712464073321203, "correct_loss_per_token": 3.471806526184082, "incorrect_loss_per_token": 4.4422301054000854, "correct_loss_uncond": -9.892091751098633, "incorrect_loss_uncond": -10.069855451583862}, "model_output": [{"sum_logits": -6.80094051361084, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.982807159423828, "logits_per_token": -3.40047025680542, "logits_per_char": -0.5231492702777569, "num_chars": 13}, {"sum_logits": -11.60982894897461, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.19986343383789, "logits_per_token": -5.804914474487305, "logits_per_char": -0.7256143093109131, "num_chars": 16}, {"sum_logits": -3.471806526184082, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.363898277282715, "logits_per_token": -3.471806526184082, "logits_per_char": -0.5786344210306803, "num_chars": 6}, {"sum_logits": -8.490983963012695, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.616514205932617, "logits_per_token": -4.245491981506348, "logits_per_char": -0.7719076330011542, "num_chars": 11}, {"sum_logits": -8.636087417602539, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.018077850341797, "logits_per_token": -4.3180437088012695, "logits_per_char": -0.6643144167386569, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 374, "native_id": "c2c2a387fd9a6a26cff636008de21f71", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.082625389099121, "incorrect_loss_raw": 9.191188216209412, "correct_loss_per_char": 0.6055083592732747, "incorrect_loss_per_char": 1.1168139775144568, "correct_loss_per_token": 3.0275417963663735, "incorrect_loss_per_token": 6.66582574446996, "correct_loss_uncond": -11.991551399230957, "incorrect_loss_uncond": -7.261638760566711}, "model_output": [{"sum_logits": -9.735635757446289, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.202109336853027, "logits_per_token": -9.735635757446289, "logits_per_char": -0.7488950582650992, "num_chars": 13}, {"sum_logits": -9.082625389099121, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.074176788330078, "logits_per_token": -3.0275417963663735, "logits_per_char": -0.6055083592732747, "num_chars": 15}, {"sum_logits": -7.687227725982666, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.027515411376953, "logits_per_token": -3.843613862991333, "logits_per_char": -1.9218069314956665, "num_chars": 4}, {"sum_logits": -9.386754035949707, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -22.004837036132812, "logits_per_token": -3.128918011983236, "logits_per_char": -0.5521620021146887, "num_chars": 17}, {"sum_logits": -9.955135345458984, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -9.955135345458984, "logits_per_char": -1.244391918182373, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 375, "native_id": "57e96118fee6e2bbac5f59790fc833c0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.082594871520996, "incorrect_loss_raw": 10.924620270729065, "correct_loss_per_char": 0.5676621794700623, "incorrect_loss_per_char": 1.4786373724540074, "correct_loss_per_token": 3.027531623840332, "incorrect_loss_per_token": 9.949093997478485, "correct_loss_uncond": -8.734898567199707, "incorrect_loss_uncond": -3.5282784700393677}, "model_output": [{"sum_logits": -9.082594871520996, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.817493438720703, "logits_per_token": -3.027531623840332, "logits_per_char": -0.5676621794700623, "num_chars": 16}, {"sum_logits": -14.440065383911133, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.081451416015625, "logits_per_token": -14.440065383911133, "logits_per_char": -1.8050081729888916, "num_chars": 8}, {"sum_logits": -10.853744506835938, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.49835205078125, "logits_per_token": -10.853744506835938, "logits_per_char": -1.8089574178059895, "num_chars": 6}, {"sum_logits": -7.804210186004639, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.906187057495117, "logits_per_token": -3.9021050930023193, "logits_per_char": -0.9755262732505798, "num_chars": 8}, {"sum_logits": -10.60046100616455, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.325604438781738, "logits_per_token": -10.60046100616455, "logits_per_char": -1.3250576257705688, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 376, "native_id": "b9b82aa4c236cd342ff95455b8516a42", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.989025115966797, "incorrect_loss_raw": 11.715818881988525, "correct_loss_per_char": 0.2717295559969815, "incorrect_loss_per_char": 1.2169727073775398, "correct_loss_per_token": 1.4945125579833984, "incorrect_loss_per_token": 8.273497660954794, "correct_loss_uncond": -13.973072052001953, "incorrect_loss_uncond": -2.3287651538848877}, "model_output": [{"sum_logits": -2.989025115966797, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.96209716796875, "logits_per_token": -1.4945125579833984, "logits_per_char": -0.2717295559969815, "num_chars": 11}, {"sum_logits": -11.98078441619873, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -11.98078441619873, "logits_per_char": -1.198078441619873, "num_chars": 10}, {"sum_logits": -10.966593742370605, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.664363861083984, "logits_per_token": -3.6555312474568686, "logits_per_char": -1.2185104158189561, "num_chars": 9}, {"sum_logits": -12.916444778442383, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.28518295288086, "logits_per_token": -6.458222389221191, "logits_per_char": -1.076370398203532, "num_chars": 12}, {"sum_logits": -10.999452590942383, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.494281768798828, "logits_per_token": -10.999452590942383, "logits_per_char": -1.3749315738677979, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 377, "native_id": "41fac392c6a5827c1b6682d5d3798e59", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.597326278686523, "incorrect_loss_raw": 11.924392223358154, "correct_loss_per_char": 0.8246657848358154, "incorrect_loss_per_char": 2.0563004766191755, "correct_loss_per_token": 6.597326278686523, "incorrect_loss_per_token": 9.987979888916016, "correct_loss_uncond": -8.037508964538574, "incorrect_loss_uncond": -3.955631732940674}, "model_output": [{"sum_logits": -8.070384979248047, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.945343017578125, "logits_per_token": -8.070384979248047, "logits_per_char": -1.6140769958496093, "num_chars": 5}, {"sum_logits": -6.597326278686523, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.634835243225098, "logits_per_token": -6.597326278686523, "logits_per_char": -0.8246657848358154, "num_chars": 8}, {"sum_logits": -15.49129867553711, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -22.621089935302734, "logits_per_token": -7.745649337768555, "logits_per_char": -1.1065213339669364, "num_chars": 14}, {"sum_logits": -10.58735466003418, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.803377151489258, "logits_per_token": -10.58735466003418, "logits_per_char": -2.117470932006836, "num_chars": 5}, {"sum_logits": -13.548530578613281, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.150285720825195, "logits_per_token": -13.548530578613281, "logits_per_char": -3.3871326446533203, "num_chars": 4}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 378, "native_id": "5c224410a40c9269b1e542cfcb430d35", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.77439022064209, "incorrect_loss_raw": 10.22467303276062, "correct_loss_per_char": 0.8249128886631557, "incorrect_loss_per_char": 1.7138116578261058, "correct_loss_per_token": 5.77439022064209, "incorrect_loss_per_token": 10.22467303276062, "correct_loss_uncond": -7.695815086364746, "incorrect_loss_uncond": -2.833221197128296}, "model_output": [{"sum_logits": -10.583852767944336, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -10.583852767944336, "logits_per_char": -1.763975461324056, "num_chars": 6}, {"sum_logits": -5.77439022064209, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.470205307006836, "logits_per_token": -5.77439022064209, "logits_per_char": -0.8249128886631557, "num_chars": 7}, {"sum_logits": -10.41749382019043, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.693686485290527, "logits_per_token": -10.41749382019043, "logits_per_char": -2.0834987640380858, "num_chars": 5}, {"sum_logits": -7.402844429016113, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -7.402844429016113, "logits_per_char": -0.9253555536270142, "num_chars": 8}, {"sum_logits": -12.494501113891602, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -12.494501113891602, "logits_per_char": -2.082416852315267, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 379, "native_id": "0b90c6710a65eb55fea4cc92895bf601", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.529066801071167, "incorrect_loss_raw": 14.600652694702148, "correct_loss_per_char": 0.5058133602142334, "incorrect_loss_per_char": 1.5282493100021823, "correct_loss_per_token": 2.529066801071167, "incorrect_loss_per_token": 9.252683401107788, "correct_loss_uncond": -8.884279489517212, "incorrect_loss_uncond": -2.4541099071502686}, "model_output": [{"sum_logits": -13.927484512329102, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.935901641845703, "logits_per_token": -6.963742256164551, "logits_per_char": -1.266134955666282, "num_chars": 11}, {"sum_logits": -21.642202377319336, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -23.20166778564453, "logits_per_token": -7.214067459106445, "logits_per_char": -1.202344576517741, "num_chars": 18}, {"sum_logits": -9.220670700073242, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.320228576660156, "logits_per_token": -9.220670700073242, "logits_per_char": -0.9220670700073242, "num_chars": 10}, {"sum_logits": -13.612253189086914, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.761252403259277, "logits_per_token": -13.612253189086914, "logits_per_char": -2.7224506378173827, "num_chars": 5}, {"sum_logits": -2.529066801071167, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -11.413346290588379, "logits_per_token": -2.529066801071167, "logits_per_char": -0.5058133602142334, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 380, "native_id": "70af2b5df22ec96901350dfa3c9ee74f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.589227199554443, "incorrect_loss_raw": 12.730791091918945, "correct_loss_per_char": 0.5990206545049493, "incorrect_loss_per_char": 2.360692262649536, "correct_loss_per_token": 6.589227199554443, "incorrect_loss_per_token": 12.730791091918945, "correct_loss_uncond": -8.336058139801025, "incorrect_loss_uncond": -0.6804473400115967}, "model_output": [{"sum_logits": -6.589227199554443, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.925285339355469, "logits_per_token": -6.589227199554443, "logits_per_char": -0.5990206545049493, "num_chars": 11}, {"sum_logits": -14.640377044677734, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -13.622650146484375, "logits_per_token": -14.640377044677734, "logits_per_char": -2.4400628407796225, "num_chars": 6}, {"sum_logits": -14.440633773803711, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -13.075480461120605, "logits_per_token": -14.440633773803711, "logits_per_char": -2.4067722956339517, "num_chars": 6}, {"sum_logits": -11.466899871826172, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -13.76987361907959, "logits_per_token": -11.466899871826172, "logits_per_char": -2.866724967956543, "num_chars": 4}, {"sum_logits": -10.375253677368164, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -13.176949501037598, "logits_per_token": -10.375253677368164, "logits_per_char": -1.7292089462280273, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 381, "native_id": "f9243ef9f0037657c337d3c6a9832f05", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.359478950500488, "incorrect_loss_raw": 10.955573916435242, "correct_loss_per_char": 0.544934868812561, "incorrect_loss_per_char": 1.461386167813861, "correct_loss_per_token": 4.359478950500488, "incorrect_loss_per_token": 10.955573916435242, "correct_loss_uncond": -8.782172203063965, "incorrect_loss_uncond": -4.072409510612488}, "model_output": [{"sum_logits": -7.769637584686279, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.31732177734375, "logits_per_token": -7.769637584686279, "logits_per_char": -1.1099482263837541, "num_chars": 7}, {"sum_logits": -10.885307312011719, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.969682693481445, "logits_per_token": -10.885307312011719, "logits_per_char": -1.2094785902235243, "num_chars": 9}, {"sum_logits": -13.135419845581055, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.24472427368164, "logits_per_token": -13.135419845581055, "logits_per_char": -2.189236640930176, "num_chars": 6}, {"sum_logits": -4.359478950500488, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.141651153564453, "logits_per_token": -4.359478950500488, "logits_per_char": -0.544934868812561, "num_chars": 8}, {"sum_logits": -12.031930923461914, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.580204963684082, "logits_per_token": -12.031930923461914, "logits_per_char": -1.3368812137179904, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 382, "native_id": "27f2074270ea8a5e8f5ec2a017ec4a50", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.226720809936523, "incorrect_loss_raw": 14.071932792663574, "correct_loss_per_char": 1.1363023122151692, "incorrect_loss_per_char": 1.330895853376055, "correct_loss_per_token": 5.113360404968262, "incorrect_loss_per_token": 5.741292874018351, "correct_loss_uncond": -5.051904678344727, "incorrect_loss_uncond": -5.743300676345825}, "model_output": [{"sum_logits": -10.608291625976562, "num_tokens": 3, "num_tokens_all": 169, "is_greedy": false, "sum_logits_uncond": -14.9756441116333, "logits_per_token": -3.536097208658854, "logits_per_char": -1.3260364532470703, "num_chars": 8}, {"sum_logits": -14.418651580810547, "num_tokens": 2, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -21.759857177734375, "logits_per_token": -7.209325790405273, "logits_per_char": -1.4418651580810546, "num_chars": 10}, {"sum_logits": -10.226720809936523, "num_tokens": 2, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -5.113360404968262, "logits_per_char": -1.1363023122151692, "num_chars": 9}, {"sum_logits": -20.4638729095459, "num_tokens": 3, "num_tokens_all": 169, "is_greedy": false, "sum_logits_uncond": -23.833255767822266, "logits_per_token": -6.821290969848633, "logits_per_char": -1.574144069965069, "num_chars": 13}, {"sum_logits": -10.796915054321289, "num_tokens": 2, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -18.692176818847656, "logits_per_token": -5.3984575271606445, "logits_per_char": -0.9815377322110262, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 383, "native_id": "63b3652d54c8c0e571f6bb50de318bf0", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.154946327209473, "incorrect_loss_raw": 11.4303879737854, "correct_loss_per_char": 0.42957886060078937, "incorrect_loss_per_char": 1.4938119029241894, "correct_loss_per_token": 2.5774731636047363, "incorrect_loss_per_token": 10.130688905715942, "correct_loss_uncond": -10.475872993469238, "incorrect_loss_uncond": -3.232802152633667}, "model_output": [{"sum_logits": -16.4748592376709, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.218299865722656, "logits_per_token": -16.4748592376709, "logits_per_char": -2.3535513196672713, "num_chars": 7}, {"sum_logits": -5.154946327209473, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.630819320678711, "logits_per_token": -2.5774731636047363, "logits_per_char": -0.42957886060078937, "num_chars": 12}, {"sum_logits": -7.434965133666992, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.027201652526855, "logits_per_token": -7.434965133666992, "logits_per_char": -1.4869930267333984, "num_chars": 5}, {"sum_logits": -10.397592544555664, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.922691345214844, "logits_per_token": -5.198796272277832, "logits_per_char": -0.8664660453796387, "num_chars": 12}, {"sum_logits": -11.414134979248047, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.484567642211914, "logits_per_token": -11.414134979248047, "logits_per_char": -1.2682372199164496, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 384, "native_id": "0843c51212a3c2eee660fab5648c9e19", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.5460402965545654, "incorrect_loss_raw": 18.798551559448242, "correct_loss_per_char": 0.38651007413864136, "incorrect_loss_per_char": 1.5713513702541202, "correct_loss_per_token": 1.5460402965545654, "incorrect_loss_per_token": 7.776621580123901, "correct_loss_uncond": -11.362776041030884, "incorrect_loss_uncond": -0.030097007751464844}, "model_output": [{"sum_logits": -20.178672790527344, "num_tokens": 3, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -21.46866226196289, "logits_per_token": -6.726224263509114, "logits_per_char": -0.9172123995694247, "num_chars": 22}, {"sum_logits": -17.240467071533203, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -8.620233535766602, "logits_per_char": -1.9156074523925781, "num_chars": 9}, {"sum_logits": -1.5460402965545654, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": true, "sum_logits_uncond": -12.90881633758545, "logits_per_token": -1.5460402965545654, "logits_per_char": -0.38651007413864136, "num_chars": 4}, {"sum_logits": -19.010038375854492, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -17.02395248413086, "logits_per_token": -9.505019187927246, "logits_per_char": -2.1122264862060547, "num_chars": 9}, {"sum_logits": -18.76502799987793, "num_tokens": 3, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -21.543354034423828, "logits_per_token": -6.2550093332926435, "logits_per_char": -1.3403591428484236, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 385, "native_id": "1b3d286458a7e7f069222de0376d06da", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.137511253356934, "incorrect_loss_raw": 14.94168472290039, "correct_loss_per_char": 1.1263901392618816, "incorrect_loss_per_char": 1.516497291218151, "correct_loss_per_token": 5.068755626678467, "incorrect_loss_per_token": 6.7191855907440186, "correct_loss_uncond": -8.067158699035645, "incorrect_loss_uncond": -4.623960494995117}, "model_output": [{"sum_logits": -17.96257972717285, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -23.898086547851562, "logits_per_token": -8.981289863586426, "logits_per_char": -1.7962579727172852, "num_chars": 10}, {"sum_logits": -15.115084648132324, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.539966583251953, "logits_per_token": -7.557542324066162, "logits_per_char": -1.6794538497924805, "num_chars": 9}, {"sum_logits": -10.137511253356934, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.204669952392578, "logits_per_token": -5.068755626678467, "logits_per_char": -1.1263901392618816, "num_chars": 9}, {"sum_logits": -8.649312019348145, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.352895736694336, "logits_per_token": -4.324656009674072, "logits_per_char": -0.7863010926680132, "num_chars": 11}, {"sum_logits": -18.039762496948242, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.47163200378418, "logits_per_token": -6.013254165649414, "logits_per_char": -1.8039762496948242, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 386, "native_id": "86e2aabfb9d401567f04d87a648ff776", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.897714138031006, "incorrect_loss_raw": 10.045738101005554, "correct_loss_per_char": 0.41395916257585796, "incorrect_loss_per_char": 1.1161230752865474, "correct_loss_per_token": 2.897714138031006, "incorrect_loss_per_token": 5.022869050502777, "correct_loss_uncond": -11.412721157073975, "incorrect_loss_uncond": -7.454605937004089}, "model_output": [{"sum_logits": -11.70626449584961, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.355520248413086, "logits_per_token": -5.853132247924805, "logits_per_char": -0.9755220413208008, "num_chars": 12}, {"sum_logits": -14.171459197998047, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.194522857666016, "logits_per_token": -7.085729598999023, "logits_per_char": -1.5746065775553386, "num_chars": 9}, {"sum_logits": -2.897714138031006, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.31043529510498, "logits_per_token": -2.897714138031006, "logits_per_char": -0.41395916257585796, "num_chars": 7}, {"sum_logits": -7.067765235900879, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.753787994384766, "logits_per_token": -3.5338826179504395, "logits_per_char": -1.0096807479858398, "num_chars": 7}, {"sum_logits": -7.237463474273682, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.697545051574707, "logits_per_token": -3.618731737136841, "logits_per_char": -0.9046829342842102, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 387, "native_id": "092c24369367b3c7457198f3ce160fe3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 3.9281458854675293, "incorrect_loss_raw": 10.614912509918213, "correct_loss_per_char": 0.6546909809112549, "incorrect_loss_per_char": 0.9104195250405205, "correct_loss_per_token": 3.9281458854675293, "incorrect_loss_per_token": 4.614406108856201, "correct_loss_uncond": -11.683340549468994, "incorrect_loss_uncond": -7.827786445617676}, "model_output": [{"sum_logits": -6.487105369567871, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.11066436767578, "logits_per_token": -3.2435526847839355, "logits_per_char": -0.7207894855075412, "num_chars": 9}, {"sum_logits": -10.851797103881836, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.884258270263672, "logits_per_token": -5.425898551940918, "logits_per_char": -1.2057552337646484, "num_chars": 9}, {"sum_logits": -3.9281458854675293, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.611486434936523, "logits_per_token": -3.9281458854675293, "logits_per_char": -0.6546909809112549, "num_chars": 6}, {"sum_logits": -16.633203506469727, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -22.00168800354004, "logits_per_token": -5.544401168823242, "logits_per_char": -1.1088802337646484, "num_chars": 15}, {"sum_logits": -8.487544059753418, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.774185180664062, "logits_per_token": -4.243772029876709, "logits_per_char": -0.6062531471252441, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 388, "native_id": "cab9eea2a91b1bd5c0a01b11f594f154", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.921854972839355, "incorrect_loss_raw": 10.596786856651306, "correct_loss_per_char": 0.9928959066217596, "incorrect_loss_per_char": 1.2713172520049894, "correct_loss_per_token": 5.460927486419678, "incorrect_loss_per_token": 5.841456214586894, "correct_loss_uncond": -2.915670394897461, "incorrect_loss_uncond": -7.387032151222229}, "model_output": [{"sum_logits": -10.921854972839355, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.837525367736816, "logits_per_token": -5.460927486419678, "logits_per_char": -0.9928959066217596, "num_chars": 11}, {"sum_logits": -7.430287837982178, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.895898818969727, "logits_per_token": -7.430287837982178, "logits_per_char": -0.8255875375535753, "num_chars": 9}, {"sum_logits": -11.419939041137695, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.676166534423828, "logits_per_token": -5.709969520568848, "logits_per_char": -1.0381762764670632, "num_chars": 11}, {"sum_logits": -14.279563903808594, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.593149185180664, "logits_per_token": -7.139781951904297, "logits_per_char": -2.379927317301432, "num_chars": 6}, {"sum_logits": -9.257356643676758, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.770061492919922, "logits_per_token": -3.0857855478922525, "logits_per_char": -0.8415778766978871, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 389, "native_id": "6e77de03bee86d6c20780e14f00944d0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 1.5013325214385986, "incorrect_loss_raw": 12.10820722579956, "correct_loss_per_char": 0.18766656517982483, "incorrect_loss_per_char": 1.573901935418447, "correct_loss_per_token": 1.5013325214385986, "incorrect_loss_per_token": 9.914513051509857, "correct_loss_uncond": -12.497018575668335, "incorrect_loss_uncond": -4.520090818405151}, "model_output": [{"sum_logits": -11.699702262878418, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -27.9509334564209, "logits_per_token": -2.9249255657196045, "logits_per_char": -0.5849851131439209, "num_chars": 20}, {"sum_logits": -8.11166763305664, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.926148414611816, "logits_per_token": -8.11166763305664, "logits_per_char": -1.3519446055094402, "num_chars": 6}, {"sum_logits": -1.5013325214385986, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -13.998351097106934, "logits_per_token": -1.5013325214385986, "logits_per_char": -0.18766656517982483, "num_chars": 8}, {"sum_logits": -17.285736083984375, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.564719200134277, "logits_per_token": -17.285736083984375, "logits_per_char": -2.469390869140625, "num_chars": 7}, {"sum_logits": -11.335722923278809, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -11.335722923278809, "logits_per_char": -1.8892871538798015, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 390, "native_id": "7f25dbab26165b3c8800c2733ca759d6", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 14.210134506225586, "incorrect_loss_raw": 11.790817975997925, "correct_loss_per_char": 1.0150096075875419, "incorrect_loss_per_char": 1.3096239244399341, "correct_loss_per_token": 4.736711502075195, "incorrect_loss_per_token": 5.7312689026196795, "correct_loss_uncond": -6.029140472412109, "incorrect_loss_uncond": -4.134858846664429}, "model_output": [{"sum_logits": -10.441755294799805, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.22107219696045, "logits_per_token": -5.220877647399902, "logits_per_char": -1.3052194118499756, "num_chars": 8}, {"sum_logits": -14.210134506225586, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.239274978637695, "logits_per_token": -4.736711502075195, "logits_per_char": -1.0150096075875419, "num_chars": 14}, {"sum_logits": -9.87932014465332, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.892474174499512, "logits_per_token": -9.87932014465332, "logits_per_char": -1.4113314492361886, "num_chars": 7}, {"sum_logits": -13.371944427490234, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.061859130859375, "logits_per_token": -4.457314809163411, "logits_per_char": -1.4857716030544705, "num_chars": 9}, {"sum_logits": -13.47025203704834, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.527301788330078, "logits_per_token": -3.367563009262085, "logits_per_char": -1.036173233619103, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 391, "native_id": "9024493a3edbaf555fda5b477e835bf5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.530797004699707, "incorrect_loss_raw": 13.415114641189575, "correct_loss_per_char": 1.3923107782999675, "incorrect_loss_per_char": 1.5012839926613701, "correct_loss_per_token": 12.530797004699707, "incorrect_loss_per_token": 7.841303745905559, "correct_loss_uncond": -1.4448270797729492, "incorrect_loss_uncond": -1.354616403579712}, "model_output": [{"sum_logits": -4.907559394836426, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.641080856323242, "logits_per_token": -4.907559394836426, "logits_per_char": -0.8179265658060709, "num_chars": 6}, {"sum_logits": -11.055793762207031, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.463623046875, "logits_per_token": -11.055793762207031, "logits_per_char": -1.2284215291341145, "num_chars": 9}, {"sum_logits": -17.01696014404297, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -8.508480072021484, "logits_per_char": -1.8907733493381076, "num_chars": 9}, {"sum_logits": -20.680145263671875, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.695594787597656, "logits_per_token": -6.893381754557292, "logits_per_char": -2.0680145263671874, "num_chars": 10}, {"sum_logits": -12.530797004699707, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.975624084472656, "logits_per_token": -12.530797004699707, "logits_per_char": -1.3923107782999675, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 392, "native_id": "fc59ab1a9e6d2b51126dd828d30e9167", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.005895137786865, "incorrect_loss_raw": 10.645859718322754, "correct_loss_per_char": 0.5004210812704903, "incorrect_loss_per_char": 1.5913781139585705, "correct_loss_per_token": 3.5029475688934326, "incorrect_loss_per_token": 8.647867679595947, "correct_loss_uncond": -9.850080966949463, "incorrect_loss_uncond": -4.415532827377319}, "model_output": [{"sum_logits": -11.578069686889648, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.715117454528809, "logits_per_token": -11.578069686889648, "logits_per_char": -2.31561393737793, "num_chars": 5}, {"sum_logits": -15.983936309814453, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.280855178833008, "logits_per_token": -7.991968154907227, "logits_per_char": -1.775992923312717, "num_chars": 9}, {"sum_logits": -9.509435653686523, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.672751426696777, "logits_per_token": -9.509435653686523, "logits_per_char": -1.5849059422810872, "num_chars": 6}, {"sum_logits": -7.005895137786865, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.855976104736328, "logits_per_token": -3.5029475688934326, "logits_per_char": -0.5004210812704903, "num_chars": 14}, {"sum_logits": -5.511997222900391, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -5.511997222900391, "logits_per_char": -0.6889996528625488, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 393, "native_id": "5a50ea4bb2d13dc4f620ebd45025d445", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.4518059492111206, "incorrect_loss_raw": 9.31299078464508, "correct_loss_per_char": 0.14518059492111207, "incorrect_loss_per_char": 1.1085552197127115, "correct_loss_per_token": 1.4518059492111206, "incorrect_loss_per_token": 7.217661023139954, "correct_loss_uncond": -13.433241963386536, "incorrect_loss_uncond": -6.812226176261902}, "model_output": [{"sum_logits": -9.465572357177734, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.595938682556152, "logits_per_token": -9.465572357177734, "logits_per_char": -1.5775953928629558, "num_chars": 6}, {"sum_logits": -4.579113960266113, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.304811477661133, "logits_per_token": -4.579113960266113, "logits_per_char": -0.6541591371808734, "num_chars": 7}, {"sum_logits": -16.762638092041016, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -21.846853256225586, "logits_per_token": -8.381319046020508, "logits_per_char": -1.3968865076700847, "num_chars": 12}, {"sum_logits": -6.444638729095459, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.753264427185059, "logits_per_token": -6.444638729095459, "logits_per_char": -0.8055798411369324, "num_chars": 8}, {"sum_logits": -1.4518059492111206, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": true, "sum_logits_uncond": -14.885047912597656, "logits_per_token": -1.4518059492111206, "logits_per_char": -0.14518059492111207, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 394, "native_id": "8becd2ee4e86258566a9c2b0e6d9544e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.317963600158691, "incorrect_loss_raw": 12.663164973258972, "correct_loss_per_char": 0.3851559789557206, "incorrect_loss_per_char": 1.1278753625604259, "correct_loss_per_token": 2.439321200052897, "incorrect_loss_per_token": 6.331582486629486, "correct_loss_uncond": -12.688675880432129, "incorrect_loss_uncond": -7.162770390510559}, "model_output": [{"sum_logits": -10.262975692749023, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.822582244873047, "logits_per_token": -5.131487846374512, "logits_per_char": -0.7330696923392159, "num_chars": 14}, {"sum_logits": -7.317963600158691, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.00663948059082, "logits_per_token": -2.439321200052897, "logits_per_char": -0.3851559789557206, "num_chars": 19}, {"sum_logits": -7.39694356918335, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -21.34524917602539, "logits_per_token": -3.698471784591675, "logits_per_char": -0.5689956591679499, "num_chars": 13}, {"sum_logits": -16.431262969970703, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.392536163330078, "logits_per_token": -8.215631484985352, "logits_per_char": -1.3692719141642253, "num_chars": 12}, {"sum_logits": -16.561477661132812, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.74337387084961, "logits_per_token": -8.280738830566406, "logits_per_char": -1.8401641845703125, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 395, "native_id": "2a21820a135e1a49883525c055c74a0b", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.2740581035614014, "incorrect_loss_raw": 11.136125326156616, "correct_loss_per_char": 0.32740581035614014, "incorrect_loss_per_char": 0.9450060392426989, "correct_loss_per_token": 1.6370290517807007, "incorrect_loss_per_token": 6.93894366423289, "correct_loss_uncond": -16.542189836502075, "incorrect_loss_uncond": -4.844730377197266}, "model_output": [{"sum_logits": -10.152504920959473, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.263617515563965, "logits_per_token": -10.152504920959473, "logits_per_char": -1.1280561023288302, "num_chars": 9}, {"sum_logits": -15.782222747802734, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.931365966796875, "logits_per_token": -5.260740915934245, "logits_per_char": -0.9283660439883962, "num_chars": 17}, {"sum_logits": -6.075284004211426, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.474529266357422, "logits_per_token": -6.075284004211426, "logits_per_char": -0.7594105005264282, "num_chars": 8}, {"sum_logits": -12.534489631652832, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.253910064697266, "logits_per_token": -6.267244815826416, "logits_per_char": -0.9641915101271409, "num_chars": 13}, {"sum_logits": -3.2740581035614014, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.816247940063477, "logits_per_token": -1.6370290517807007, "logits_per_char": -0.32740581035614014, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 396, "native_id": "e5adfec0b5ba691ec752f9b5e0fb8084", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.838694095611572, "incorrect_loss_raw": 10.563524961471558, "correct_loss_per_char": 0.8064490159352621, "incorrect_loss_per_char": 1.1076505541801454, "correct_loss_per_token": 4.838694095611572, "incorrect_loss_per_token": 7.587146480878195, "correct_loss_uncond": -10.834057331085205, "incorrect_loss_uncond": -5.776949882507324}, "model_output": [{"sum_logits": -9.21455192565918, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.339004516601562, "logits_per_token": -3.07151730855306, "logits_per_char": -0.460727596282959, "num_chars": 20}, {"sum_logits": -11.524958610534668, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -18.204669952392578, "logits_per_token": -5.762479305267334, "logits_per_char": -1.2805509567260742, "num_chars": 9}, {"sum_logits": -8.890569686889648, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -8.890569686889648, "logits_per_char": -1.111321210861206, "num_chars": 8}, {"sum_logits": -4.838694095611572, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.672751426696777, "logits_per_token": -4.838694095611572, "logits_per_char": -0.8064490159352621, "num_chars": 6}, {"sum_logits": -12.624019622802734, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.241378784179688, "logits_per_token": -12.624019622802734, "logits_per_char": -1.5780024528503418, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 397, "native_id": "406e15b76269d20b5448a91648094291", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.797685623168945, "incorrect_loss_raw": 8.153872549533844, "correct_loss_per_char": 0.7996142705281576, "incorrect_loss_per_char": 1.0035940241848063, "correct_loss_per_token": 4.797685623168945, "incorrect_loss_per_token": 7.689146548509598, "correct_loss_uncond": -8.797514915466309, "incorrect_loss_uncond": -5.5322646498680115}, "model_output": [{"sum_logits": -14.017236709594727, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.572244644165039, "logits_per_token": -14.017236709594727, "logits_per_char": -1.7521545886993408, "num_chars": 8}, {"sum_logits": -3.7178080081939697, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.920121192932129, "logits_per_token": -1.8589040040969849, "logits_per_char": -0.3379825461994518, "num_chars": 11}, {"sum_logits": -8.531968116760254, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.470205307006836, "logits_per_token": -8.531968116760254, "logits_per_char": -1.2188525881086076, "num_chars": 7}, {"sum_logits": -6.348477363586426, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.781977653503418, "logits_per_token": -6.348477363586426, "logits_per_char": -0.7053863737318251, "num_chars": 9}, {"sum_logits": -4.797685623168945, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.595200538635254, "logits_per_token": -4.797685623168945, "logits_per_char": -0.7996142705281576, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 398, "native_id": "9c596382ea15768f95b5ef9ceec191dc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.021278381347656, "incorrect_loss_raw": 19.59282088279724, "correct_loss_per_char": 1.860182625906808, "incorrect_loss_per_char": 1.3256553301104792, "correct_loss_per_token": 13.021278381347656, "incorrect_loss_per_token": 6.7613139152526855, "correct_loss_uncond": -2.245865821838379, "incorrect_loss_uncond": -0.48729968070983887}, "model_output": [{"sum_logits": -13.797673225402832, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.626487731933594, "logits_per_token": -6.898836612701416, "logits_per_char": -1.533074802822537, "num_chars": 9}, {"sum_logits": -16.012065887451172, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.799480438232422, "logits_per_token": -8.006032943725586, "logits_per_char": -1.0007541179656982, "num_chars": 16}, {"sum_logits": -13.021278381347656, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.267144203186035, "logits_per_token": -13.021278381347656, "logits_per_char": -1.860182625906808, "num_chars": 7}, {"sum_logits": -20.956680297851562, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.585721969604492, "logits_per_token": -5.239170074462891, "logits_per_char": -1.7463900248209636, "num_chars": 12}, {"sum_logits": -27.6048641204834, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -26.308792114257812, "logits_per_token": -6.90121603012085, "logits_per_char": -1.0224023748327185, "num_chars": 27}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 399, "native_id": "7a3d0c94438a5c8a09364aaebb848a2c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.6634392738342285, "incorrect_loss_raw": 8.172447919845581, "correct_loss_per_char": 0.6105732123057047, "incorrect_loss_per_char": 1.0721960167090099, "correct_loss_per_token": 3.6634392738342285, "incorrect_loss_per_token": 5.712430357933044, "correct_loss_uncond": -10.292991161346436, "incorrect_loss_uncond": -9.17310881614685}, "model_output": [{"sum_logits": -3.6634392738342285, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.956430435180664, "logits_per_token": -3.6634392738342285, "logits_per_char": -0.6105732123057047, "num_chars": 6}, {"sum_logits": -10.007061004638672, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -24.129915237426758, "logits_per_token": -5.003530502319336, "logits_per_char": -0.9097328186035156, "num_chars": 11}, {"sum_logits": -7.877236366271973, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.891498565673828, "logits_per_token": -7.877236366271973, "logits_per_char": -1.125319480895996, "num_chars": 7}, {"sum_logits": -5.132414817810059, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.59318733215332, "logits_per_token": -5.132414817810059, "logits_per_char": -0.6415518522262573, "num_chars": 8}, {"sum_logits": -9.673079490661621, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.76762580871582, "logits_per_token": -4.8365397453308105, "logits_per_char": -1.6121799151102703, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 400, "native_id": "1ef68db97654f30cd3701b942fadc934", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 11.381475448608398, "incorrect_loss_raw": 9.564626693725586, "correct_loss_per_char": 0.669498555800494, "incorrect_loss_per_char": 1.5634144306182862, "correct_loss_per_token": 3.793825149536133, "incorrect_loss_per_token": 9.564626693725586, "correct_loss_uncond": -10.623361587524414, "incorrect_loss_uncond": -4.1706154346466064}, "model_output": [{"sum_logits": -13.041049003601074, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.389348983764648, "logits_per_token": -13.041049003601074, "logits_per_char": -2.1735081672668457, "num_chars": 6}, {"sum_logits": -11.381475448608398, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.004837036132812, "logits_per_token": -3.793825149536133, "logits_per_char": -0.669498555800494, "num_chars": 17}, {"sum_logits": -6.302742958068848, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.455964088439941, "logits_per_token": -6.302742958068848, "logits_per_char": -0.6302742958068848, "num_chars": 10}, {"sum_logits": -8.922683715820312, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.509245872497559, "logits_per_token": -8.922683715820312, "logits_per_char": -1.7845367431640624, "num_chars": 5}, {"sum_logits": -9.99203109741211, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.586409568786621, "logits_per_token": -9.99203109741211, "logits_per_char": -1.6653385162353516, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 401, "native_id": "abb090bbc572be1016bcd5f261f28e76", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.2004499435424805, "incorrect_loss_raw": 11.119667053222656, "correct_loss_per_char": 0.6000374952952067, "incorrect_loss_per_char": 1.1721955185844786, "correct_loss_per_token": 7.2004499435424805, "incorrect_loss_per_token": 8.065227746963501, "correct_loss_uncond": -11.511570930480957, "incorrect_loss_uncond": -5.481818914413452}, "model_output": [{"sum_logits": -5.730075836181641, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.685454368591309, "logits_per_token": -5.730075836181641, "logits_per_char": -0.9550126393636068, "num_chars": 6}, {"sum_logits": -14.313077926635742, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.184246063232422, "logits_per_token": -14.313077926635742, "logits_per_char": -2.0447254180908203, "num_chars": 7}, {"sum_logits": -7.2004499435424805, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.712020874023438, "logits_per_token": -7.2004499435424805, "logits_per_char": -0.6000374952952067, "num_chars": 12}, {"sum_logits": -12.602041244506836, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.087373733520508, "logits_per_token": -6.301020622253418, "logits_per_char": -0.9001458031790597, "num_chars": 14}, {"sum_logits": -11.833473205566406, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.448869705200195, "logits_per_token": -5.916736602783203, "logits_per_char": -0.788898213704427, "num_chars": 15}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 402, "native_id": "91f2532a832a35cba1b08a3c767be6da", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.1124358177185059, "incorrect_loss_raw": 11.422283172607422, "correct_loss_per_char": 0.15891940253121511, "incorrect_loss_per_char": 2.0987006130672636, "correct_loss_per_token": 1.1124358177185059, "incorrect_loss_per_token": 11.422283172607422, "correct_loss_uncond": -13.102122783660889, "incorrect_loss_uncond": -2.2991490364074707}, "model_output": [{"sum_logits": -16.416404724121094, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.465144157409668, "logits_per_token": -16.416404724121094, "logits_per_char": -2.345200674874442, "num_chars": 7}, {"sum_logits": -9.531579971313477, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.9362211227417, "logits_per_token": -9.531579971313477, "logits_per_char": -2.382894992828369, "num_chars": 4}, {"sum_logits": -11.295463562011719, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.45659065246582, "logits_per_token": -11.295463562011719, "logits_per_char": -2.2590927124023437, "num_chars": 5}, {"sum_logits": -8.445684432983398, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.027772903442383, "logits_per_token": -8.445684432983398, "logits_per_char": -1.4076140721638997, "num_chars": 6}, {"sum_logits": -1.1124358177185059, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -14.214558601379395, "logits_per_token": -1.1124358177185059, "logits_per_char": -0.15891940253121511, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 403, "native_id": "f8544c9679d27b747dfad3b8d7aac87a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.445288181304932, "incorrect_loss_raw": 11.790184020996094, "correct_loss_per_char": 0.4939209090338813, "incorrect_loss_per_char": 1.686781189839045, "correct_loss_per_token": 4.445288181304932, "incorrect_loss_per_token": 6.857525706291199, "correct_loss_uncond": -8.44773530960083, "incorrect_loss_uncond": -2.692042827606201}, "model_output": [{"sum_logits": -15.264930725097656, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -7.632465362548828, "logits_per_char": -1.6961034138997395, "num_chars": 9}, {"sum_logits": -12.518284797668457, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.95015811920166, "logits_per_token": -6.2591423988342285, "logits_per_char": -1.5647855997085571, "num_chars": 8}, {"sum_logits": -7.699469566345215, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.313766479492188, "logits_per_token": -7.699469566345215, "logits_per_char": -1.539893913269043, "num_chars": 5}, {"sum_logits": -11.678050994873047, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.386357307434082, "logits_per_token": -5.839025497436523, "logits_per_char": -1.946341832478841, "num_chars": 6}, {"sum_logits": -4.445288181304932, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.893023490905762, "logits_per_token": -4.445288181304932, "logits_per_char": -0.4939209090338813, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 404, "native_id": "a7f423c1636ba9e36d18e381928c5dcc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.362521171569824, "incorrect_loss_raw": 13.407367944717407, "correct_loss_per_char": 0.920315146446228, "incorrect_loss_per_char": 1.4372280509698958, "correct_loss_per_token": 7.362521171569824, "incorrect_loss_per_token": 8.001938104629517, "correct_loss_uncond": -7.373273849487305, "incorrect_loss_uncond": -4.107482671737671}, "model_output": [{"sum_logits": -10.386033058166504, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -13.995492935180664, "logits_per_token": -10.386033058166504, "logits_per_char": -1.298254132270813, "num_chars": 8}, {"sum_logits": -18.95559310913086, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.711145401000977, "logits_per_token": -9.47779655456543, "logits_per_char": -2.106177012125651, "num_chars": 9}, {"sum_logits": -7.362521171569824, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.735795021057129, "logits_per_token": -7.362521171569824, "logits_per_char": -0.920315146446228, "num_chars": 8}, {"sum_logits": -15.362800598144531, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -18.789621353149414, "logits_per_token": -7.681400299072266, "logits_per_char": -1.7069778442382812, "num_chars": 9}, {"sum_logits": -8.925045013427734, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -16.563142776489258, "logits_per_token": -4.462522506713867, "logits_per_char": -0.6375032152448382, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 405, "native_id": "e1d354cbfcd620e5dacf83c17746c4b3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 11.115243911743164, "incorrect_loss_raw": 10.11167311668396, "correct_loss_per_char": 1.235027101304796, "incorrect_loss_per_char": 1.1456742513747442, "correct_loss_per_token": 5.557621955871582, "incorrect_loss_per_token": 7.078343152999878, "correct_loss_uncond": -8.283008575439453, "incorrect_loss_uncond": -6.594040393829346}, "model_output": [{"sum_logits": -8.965473175048828, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.549616813659668, "logits_per_token": -4.482736587524414, "logits_per_char": -0.9961636861165365, "num_chars": 9}, {"sum_logits": -7.895525932312012, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.071504592895508, "logits_per_token": -7.895525932312012, "logits_per_char": -1.1279322760445731, "num_chars": 7}, {"sum_logits": -15.301166534423828, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.569496154785156, "logits_per_token": -7.650583267211914, "logits_per_char": -1.2750972112019856, "num_chars": 12}, {"sum_logits": -11.115243911743164, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.398252487182617, "logits_per_token": -5.557621955871582, "logits_per_char": -1.235027101304796, "num_chars": 9}, {"sum_logits": -8.284526824951172, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.63223648071289, "logits_per_token": -8.284526824951172, "logits_per_char": -1.1835038321358817, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 406, "native_id": "53e1e50d204f6ad5a0f69429eadae82e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.467161178588867, "incorrect_loss_raw": 13.668839693069458, "correct_loss_per_char": 0.3852401309543186, "incorrect_loss_per_char": 1.3554289905619232, "correct_loss_per_token": 1.7335805892944336, "incorrect_loss_per_token": 5.7188801050186155, "correct_loss_uncond": -8.110675811767578, "incorrect_loss_uncond": -8.212162733078003}, "model_output": [{"sum_logits": -3.467161178588867, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -11.577836990356445, "logits_per_token": -1.7335805892944336, "logits_per_char": -0.3852401309543186, "num_chars": 9}, {"sum_logits": -13.18972396850586, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.09952163696289, "logits_per_token": -6.59486198425293, "logits_per_char": -2.19828732808431, "num_chars": 6}, {"sum_logits": -14.873863220214844, "num_tokens": 5, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -24.905792236328125, "logits_per_token": -2.974772644042969, "logits_per_char": -0.9915908813476563, "num_chars": 15}, {"sum_logits": -13.400115966796875, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -22.3677978515625, "logits_per_token": -6.7000579833984375, "logits_per_char": -1.0307781512920673, "num_chars": 13}, {"sum_logits": -13.211655616760254, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -23.150897979736328, "logits_per_token": -6.605827808380127, "logits_per_char": -1.2010596015236594, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 407, "native_id": "48205cc84aab5e455b22e17c3cc7277d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.485626220703125, "incorrect_loss_raw": 12.624675989151001, "correct_loss_per_char": 0.7489733014787946, "incorrect_loss_per_char": 1.4656400998433432, "correct_loss_per_token": 5.2428131103515625, "incorrect_loss_per_token": 8.699119806289673, "correct_loss_uncond": -12.169490814208984, "incorrect_loss_uncond": -4.741323947906494}, "model_output": [{"sum_logits": -6.502169609069824, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.260344505310059, "logits_per_token": -6.502169609069824, "logits_per_char": -1.3004339218139649, "num_chars": 5}, {"sum_logits": -10.485626220703125, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -22.65511703491211, "logits_per_token": -5.2428131103515625, "logits_per_char": -0.7489733014787946, "num_chars": 14}, {"sum_logits": -12.592084884643555, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.528736114501953, "logits_per_token": -12.592084884643555, "logits_per_char": -2.0986808141072593, "num_chars": 6}, {"sum_logits": -12.901710510253906, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -22.911060333251953, "logits_per_token": -6.450855255126953, "logits_per_char": -0.9215507507324219, "num_chars": 14}, {"sum_logits": -18.50273895263672, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.763858795166016, "logits_per_token": -9.25136947631836, "logits_per_char": -1.5418949127197266, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 408, "native_id": "0f7419d25337e0a75503a015ae777905", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.480510711669922, "incorrect_loss_raw": 9.730495810508728, "correct_loss_per_char": 0.6800464283336293, "incorrect_loss_per_char": 1.4476345224986, "correct_loss_per_token": 3.740255355834961, "incorrect_loss_per_token": 9.730495810508728, "correct_loss_uncond": -8.69473648071289, "incorrect_loss_uncond": -3.578558564186096}, "model_output": [{"sum_logits": -7.480510711669922, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.175247192382812, "logits_per_token": -3.740255355834961, "logits_per_char": -0.6800464283336293, "num_chars": 11}, {"sum_logits": -7.777123928070068, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.523824691772461, "logits_per_token": -7.777123928070068, "logits_per_char": -0.8641248808966743, "num_chars": 9}, {"sum_logits": -7.548151016235352, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.699138641357422, "logits_per_token": -7.548151016235352, "logits_per_char": -0.7548151016235352, "num_chars": 10}, {"sum_logits": -14.01119613647461, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -14.01119613647461, "logits_per_char": -2.8022392272949217, "num_chars": 5}, {"sum_logits": -9.585512161254883, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.185132026672363, "logits_per_token": -9.585512161254883, "logits_per_char": -1.3693588801792689, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 409, "native_id": "5cac4da628f0a58db980649079bd5784", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.624512195587158, "incorrect_loss_raw": 12.203013896942139, "correct_loss_per_char": 0.5780640244483948, "incorrect_loss_per_char": 0.9969118235603212, "correct_loss_per_token": 4.624512195587158, "incorrect_loss_per_token": 4.401921534538269, "correct_loss_uncond": -9.95598840713501, "incorrect_loss_uncond": -4.8597800731658936}, "model_output": [{"sum_logits": -15.573103904724121, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -7.7865519523620605, "logits_per_char": -1.73034487830268, "num_chars": 9}, {"sum_logits": -8.688183784484863, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.43128490447998, "logits_per_token": -4.344091892242432, "logits_per_char": -0.6205845560346331, "num_chars": 14}, {"sum_logits": -11.337774276733398, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.26136589050293, "logits_per_token": -2.8344435691833496, "logits_per_char": -0.7558516184488933, "num_chars": 15}, {"sum_logits": -4.624512195587158, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.580500602722168, "logits_per_token": -4.624512195587158, "logits_per_char": -0.5780640244483948, "num_chars": 8}, {"sum_logits": -13.212993621826172, "num_tokens": 5, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.27989959716797, "logits_per_token": -2.6425987243652345, "logits_per_char": -0.8808662414550781, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 410, "native_id": "78d1218aeff70a70904767349e3c4c53", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.295408248901367, "incorrect_loss_raw": 9.562215685844421, "correct_loss_per_char": 0.9295408248901367, "incorrect_loss_per_char": 0.8761360775340687, "correct_loss_per_token": 4.647704124450684, "incorrect_loss_per_token": 4.781107842922211, "correct_loss_uncond": -8.706884384155273, "incorrect_loss_uncond": -10.33670723438263}, "model_output": [{"sum_logits": -10.925104141235352, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.803943634033203, "logits_per_token": -5.462552070617676, "logits_per_char": -0.9104253451029459, "num_chars": 12}, {"sum_logits": -9.206489562988281, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.359928131103516, "logits_per_token": -4.603244781494141, "logits_per_char": -0.8369535966352983, "num_chars": 11}, {"sum_logits": -9.295408248901367, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.00229263305664, "logits_per_token": -4.647704124450684, "logits_per_char": -0.9295408248901367, "num_chars": 10}, {"sum_logits": -5.756951808929443, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.195960998535156, "logits_per_token": -2.8784759044647217, "logits_per_char": -0.38379678726196287, "num_chars": 15}, {"sum_logits": -12.36031723022461, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.235858917236328, "logits_per_token": -6.180158615112305, "logits_per_char": -1.3733685811360676, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 411, "native_id": "cce13a32fedb997c017d3fac87c34912", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.478408813476562, "incorrect_loss_raw": 7.734313011169434, "correct_loss_per_char": 0.8478408813476562, "incorrect_loss_per_char": 1.1474399950597194, "correct_loss_per_token": 8.478408813476562, "incorrect_loss_per_token": 7.734313011169434, "correct_loss_uncond": -5.364529609680176, "incorrect_loss_uncond": -6.399672746658325}, "model_output": [{"sum_logits": -9.811023712158203, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.33129596710205, "logits_per_token": -9.811023712158203, "logits_per_char": -1.4015748160226005, "num_chars": 7}, {"sum_logits": -8.478408813476562, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.842938423156738, "logits_per_token": -8.478408813476562, "logits_per_char": -0.8478408813476562, "num_chars": 10}, {"sum_logits": -6.22337007522583, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.330537796020508, "logits_per_token": -6.22337007522583, "logits_per_char": -0.565760915929621, "num_chars": 11}, {"sum_logits": -6.267579555511475, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.705824851989746, "logits_per_token": -6.267579555511475, "logits_per_char": -0.8953685079302106, "num_chars": 7}, {"sum_logits": -8.635278701782227, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.16828441619873, "logits_per_token": -8.635278701782227, "logits_per_char": -1.7270557403564453, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 412, "native_id": "6714487b839f648e348ac972ed114af3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.601296424865723, "incorrect_loss_raw": 14.810458660125732, "correct_loss_per_char": 1.3251620531082153, "incorrect_loss_per_char": 1.0757820258647095, "correct_loss_per_token": 10.601296424865723, "incorrect_loss_per_token": 6.419334888458252, "correct_loss_uncond": -4.7688751220703125, "incorrect_loss_uncond": -8.717830657958984}, "model_output": [{"sum_logits": -13.85339641571045, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -23.854751586914062, "logits_per_token": -6.926698207855225, "logits_per_char": -1.3853396415710448, "num_chars": 10}, {"sum_logits": -10.601296424865723, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.370171546936035, "logits_per_token": -10.601296424865723, "logits_per_char": -1.3251620531082153, "num_chars": 8}, {"sum_logits": -9.343151092529297, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.047258377075195, "logits_per_token": -4.671575546264648, "logits_per_char": -0.7187039301945612, "num_chars": 13}, {"sum_logits": -12.383820533752441, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -24.171825408935547, "logits_per_token": -6.191910266876221, "logits_per_char": -0.8845586095537458, "num_chars": 14}, {"sum_logits": -23.661466598510742, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -29.039321899414062, "logits_per_token": -7.887155532836914, "logits_per_char": -1.3145259221394856, "num_chars": 18}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 413, "native_id": "3e536d9253bfac45de83e8ee291ca143", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.859099864959717, "incorrect_loss_raw": 7.287867188453674, "correct_loss_per_char": 1.5718199729919433, "incorrect_loss_per_char": 1.2734767964908054, "correct_loss_per_token": 3.9295499324798584, "incorrect_loss_per_token": 7.287867188453674, "correct_loss_uncond": -8.649326801300049, "incorrect_loss_uncond": -6.767907977104187}, "model_output": [{"sum_logits": -4.958021640777588, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.455964088439941, "logits_per_token": -4.958021640777588, "logits_per_char": -0.4958021640777588, "num_chars": 10}, {"sum_logits": -7.859099864959717, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.508426666259766, "logits_per_token": -3.9295499324798584, "logits_per_char": -1.5718199729919433, "num_chars": 5}, {"sum_logits": -4.466717720031738, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.691120147705078, "logits_per_token": -4.466717720031738, "logits_per_char": -0.744452953338623, "num_chars": 6}, {"sum_logits": -9.66511344909668, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.413416862487793, "logits_per_token": -9.66511344909668, "logits_per_char": -2.41627836227417, "num_chars": 4}, {"sum_logits": -10.061615943908691, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.662599563598633, "logits_per_token": -10.061615943908691, "logits_per_char": -1.4373737062726701, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 414, "native_id": "9f830faa0f8e3d7fb3a658c15a5fbe63", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.928152084350586, "incorrect_loss_raw": 10.149861812591553, "correct_loss_per_char": 0.41067934036254883, "incorrect_loss_per_char": 1.1340537190437319, "correct_loss_per_token": 4.928152084350586, "incorrect_loss_per_token": 8.839512586593628, "correct_loss_uncond": -9.701177597045898, "incorrect_loss_uncond": -5.057802200317383}, "model_output": [{"sum_logits": -10.83862590789795, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.929276466369629, "logits_per_token": -10.83862590789795, "logits_per_char": -1.083862590789795, "num_chars": 10}, {"sum_logits": -8.813241958618164, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.24791145324707, "logits_per_token": -8.813241958618164, "logits_per_char": -0.9792491065131294, "num_chars": 9}, {"sum_logits": -10.4647855758667, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.747281074523926, "logits_per_token": -10.4647855758667, "logits_per_char": -1.1627539528740778, "num_chars": 9}, {"sum_logits": -4.928152084350586, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.629329681396484, "logits_per_token": -4.928152084350586, "logits_per_char": -0.41067934036254883, "num_chars": 12}, {"sum_logits": -10.482793807983398, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.906187057495117, "logits_per_token": -5.241396903991699, "logits_per_char": -1.3103492259979248, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 415, "native_id": "bbcef409e0acb71b515acc144d5b402c_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.360511779785156, "incorrect_loss_raw": 7.9916980266571045, "correct_loss_per_char": 0.2907007853190104, "incorrect_loss_per_char": 0.9500152275159761, "correct_loss_per_token": 2.180255889892578, "incorrect_loss_per_token": 6.365229099988937, "correct_loss_uncond": -14.980766296386719, "incorrect_loss_uncond": -7.918327569961548}, "model_output": [{"sum_logits": -3.419172763824463, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.75247573852539, "logits_per_token": -1.7095863819122314, "logits_per_char": -0.24422662598746164, "num_chars": 14}, {"sum_logits": -11.502120971679688, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.998748779296875, "logits_per_token": -11.502120971679688, "logits_per_char": -1.643160138811384, "num_chars": 7}, {"sum_logits": -6.395052433013916, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.45389175415039, "logits_per_token": -1.598763108253479, "logits_per_char": -0.5813684030012651, "num_chars": 11}, {"sum_logits": -4.360511779785156, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.341278076171875, "logits_per_token": -2.180255889892578, "logits_per_char": -0.2907007853190104, "num_chars": 15}, {"sum_logits": -10.650445938110352, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.434986114501953, "logits_per_token": -10.650445938110352, "logits_per_char": -1.331305742263794, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 416, "native_id": "cbb0c9a69ca0922371a48177087ef407", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.2683684825897217, "incorrect_loss_raw": 14.204972386360168, "correct_loss_per_char": 0.5670921206474304, "incorrect_loss_per_char": 1.3021113253556766, "correct_loss_per_token": 2.2683684825897217, "incorrect_loss_per_token": 6.9569209814071655, "correct_loss_uncond": -8.904228448867798, "incorrect_loss_uncond": -2.497048258781433}, "model_output": [{"sum_logits": -6.229116916656494, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.126147270202637, "logits_per_token": -6.229116916656494, "logits_per_char": -1.5572792291641235, "num_chars": 4}, {"sum_logits": -22.18091583251953, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.774368286132812, "logits_per_token": -7.393638610839844, "logits_per_char": -1.3863072395324707, "num_chars": 16}, {"sum_logits": -2.2683684825897217, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -11.17259693145752, "logits_per_token": -2.2683684825897217, "logits_per_char": -0.5670921206474304, "num_chars": 4}, {"sum_logits": -16.010160446166992, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -17.242048263549805, "logits_per_token": -8.005080223083496, "logits_per_char": -1.2315508035513072, "num_chars": 13}, {"sum_logits": -12.399696350097656, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.665518760681152, "logits_per_token": -6.199848175048828, "logits_per_char": -1.0333080291748047, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 417, "native_id": "b92f786638796fc028947ac0e9a44fef", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.91151762008667, "incorrect_loss_raw": 6.599266588687897, "correct_loss_per_char": 0.35082268714904785, "incorrect_loss_per_char": 0.7198266693305619, "correct_loss_per_token": 1.2278794050216675, "incorrect_loss_per_token": 4.407688170671463, "correct_loss_uncond": -12.820780277252197, "incorrect_loss_uncond": -8.890372693538666}, "model_output": [{"sum_logits": -3.5405161380767822, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.72293472290039, "logits_per_token": -1.7702580690383911, "logits_per_char": -0.35405161380767824, "num_chars": 10}, {"sum_logits": -8.864439010620117, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.470205307006836, "logits_per_token": -8.864439010620117, "logits_per_char": -1.266348430088588, "num_chars": 7}, {"sum_logits": -5.340768814086914, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.262495040893555, "logits_per_token": -2.670384407043457, "logits_per_char": -0.5934187571207682, "num_chars": 9}, {"sum_logits": -8.651342391967773, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.50292205810547, "logits_per_token": -4.325671195983887, "logits_per_char": -0.6654878763052133, "num_chars": 13}, {"sum_logits": -4.91151762008667, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.732297897338867, "logits_per_token": -1.2278794050216675, "logits_per_char": -0.35082268714904785, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 418, "native_id": "5abeb4a2126597d4ef7b5a32e9e22abf", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.008224010467529, "incorrect_loss_raw": 6.967995285987854, "correct_loss_per_char": 0.5010280013084412, "incorrect_loss_per_char": 0.9576994081338247, "correct_loss_per_token": 4.008224010467529, "incorrect_loss_per_token": 6.5856388211250305, "correct_loss_uncond": -9.56862211227417, "incorrect_loss_uncond": -6.965752005577087}, "model_output": [{"sum_logits": -3.058851718902588, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.48346710205078, "logits_per_token": -1.529425859451294, "logits_per_char": -0.254904309908549, "num_chars": 12}, {"sum_logits": -5.517542839050293, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.470205307006836, "logits_per_token": -5.517542839050293, "logits_per_char": -0.7882204055786133, "num_chars": 7}, {"sum_logits": -9.017390251159668, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -9.017390251159668, "logits_per_char": -1.502898375193278, "num_chars": 6}, {"sum_logits": -10.278196334838867, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.891663551330566, "logits_per_token": -10.278196334838867, "logits_per_char": -1.2847745418548584, "num_chars": 8}, {"sum_logits": -4.008224010467529, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -4.008224010467529, "logits_per_char": -0.5010280013084412, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 419, "native_id": "8d4b0312f02be445e09a9462873d02bb", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.757050514221191, "incorrect_loss_raw": 11.061494827270508, "correct_loss_per_char": 0.7196313142776489, "incorrect_loss_per_char": 1.505042048863002, "correct_loss_per_token": 5.757050514221191, "incorrect_loss_per_token": 7.162320454915364, "correct_loss_uncond": -8.871910095214844, "incorrect_loss_uncond": -4.968076229095459}, "model_output": [{"sum_logits": -18.32642364501953, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -22.269519805908203, "logits_per_token": -9.163211822509766, "logits_per_char": -2.2908029556274414, "num_chars": 8}, {"sum_logits": -7.70267391204834, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.10049819946289, "logits_per_token": -7.70267391204834, "logits_per_char": -1.540534782409668, "num_chars": 5}, {"sum_logits": -8.56665325164795, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.837890625, "logits_per_token": -8.56665325164795, "logits_per_char": -1.2238076073782784, "num_chars": 7}, {"sum_logits": -5.757050514221191, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.628960609436035, "logits_per_token": -5.757050514221191, "logits_per_char": -0.7196313142776489, "num_chars": 8}, {"sum_logits": -9.650228500366211, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.910375595092773, "logits_per_token": -3.216742833455404, "logits_per_char": -0.9650228500366211, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 420, "native_id": "f7140f00ddd8d1c5d93b05ea32ad1fff", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.480906963348389, "incorrect_loss_raw": 13.320197463035583, "correct_loss_per_char": 1.4961813926696776, "incorrect_loss_per_char": 1.3843026720342182, "correct_loss_per_token": 7.480906963348389, "incorrect_loss_per_token": 7.645254731178284, "correct_loss_uncond": -4.7738213539123535, "incorrect_loss_uncond": -4.0247708559036255}, "model_output": [{"sum_logits": -22.095918655395508, "num_tokens": 4, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -28.91449737548828, "logits_per_token": -5.523979663848877, "logits_per_char": -0.9206632773081461, "num_chars": 24}, {"sum_logits": -12.255663871765137, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.22107219696045, "logits_per_token": -6.127831935882568, "logits_per_char": -1.531957983970642, "num_chars": 8}, {"sum_logits": -12.271910667419434, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.737959861755371, "logits_per_token": -12.271910667419434, "logits_per_char": -1.7531300953456335, "num_chars": 7}, {"sum_logits": -7.480906963348389, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -7.480906963348389, "logits_per_char": -1.4961813926696776, "num_chars": 5}, {"sum_logits": -6.657296657562256, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.506343841552734, "logits_per_token": -6.657296657562256, "logits_per_char": -1.3314593315124512, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 421, "native_id": "8b3b598a647dfd2d63fcedce5f461040", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.1641969680786133, "incorrect_loss_raw": 12.257923603057861, "correct_loss_per_char": 0.14427979787190756, "incorrect_loss_per_char": 1.4348926783626914, "correct_loss_per_token": 1.0820984840393066, "incorrect_loss_per_token": 6.128961801528931, "correct_loss_uncond": -18.211813926696777, "incorrect_loss_uncond": -6.832686901092529}, "model_output": [{"sum_logits": -2.1641969680786133, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.37601089477539, "logits_per_token": -1.0820984840393066, "logits_per_char": -0.14427979787190756, "num_chars": 15}, {"sum_logits": -10.453950881958008, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.095869064331055, "logits_per_token": -5.226975440979004, "logits_per_char": -1.306743860244751, "num_chars": 8}, {"sum_logits": -15.075726509094238, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -25.68439483642578, "logits_per_token": -7.537863254547119, "logits_per_char": -1.159671269930326, "num_chars": 13}, {"sum_logits": -11.589250564575195, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.674301147460938, "logits_per_token": -5.794625282287598, "logits_per_char": -1.2876945071750217, "num_chars": 9}, {"sum_logits": -11.912766456604004, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.907876968383789, "logits_per_token": -5.956383228302002, "logits_per_char": -1.9854610761006672, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 422, "native_id": "7a900bc3a373806b6c56f0e19534005f", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.322104454040527, "incorrect_loss_raw": 12.41539978981018, "correct_loss_per_char": 0.9152630567550659, "incorrect_loss_per_char": 1.0253536107493382, "correct_loss_per_token": 7.322104454040527, "incorrect_loss_per_token": 7.101843476295471, "correct_loss_uncond": -8.048067092895508, "incorrect_loss_uncond": -6.365354061126709}, "model_output": [{"sum_logits": -17.531532287597656, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.50275230407715, "logits_per_token": -8.765766143798828, "logits_per_char": -0.8765766143798828, "num_chars": 20}, {"sum_logits": -11.293960571289062, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.844112396240234, "logits_per_token": -5.646980285644531, "logits_per_char": -0.6643506218405331, "num_chars": 17}, {"sum_logits": -7.153148651123047, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -11.102972984313965, "logits_per_token": -7.153148651123047, "logits_per_char": -1.192191441853841, "num_chars": 6}, {"sum_logits": -13.682957649230957, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.67317771911621, "logits_per_token": -6.8414788246154785, "logits_per_char": -1.3682957649230958, "num_chars": 10}, {"sum_logits": -7.322104454040527, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.370171546936035, "logits_per_token": -7.322104454040527, "logits_per_char": -0.9152630567550659, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 423, "native_id": "3d79c10ddf26a5ed7dc0bb168fb0b3ed", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.220569133758545, "incorrect_loss_raw": 10.604241847991943, "correct_loss_per_char": 0.30709230198579673, "incorrect_loss_per_char": 1.2274374810476152, "correct_loss_per_token": 1.7401897112528484, "incorrect_loss_per_token": 7.229022026062012, "correct_loss_uncond": -15.75819444656372, "incorrect_loss_uncond": -5.344921588897705}, "model_output": [{"sum_logits": -12.920181274414062, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.843746185302734, "logits_per_token": -6.460090637207031, "logits_per_char": -1.435575697157118, "num_chars": 9}, {"sum_logits": -5.220569133758545, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -20.978763580322266, "logits_per_token": -1.7401897112528484, "logits_per_char": -0.30709230198579673, "num_chars": 17}, {"sum_logits": -6.197736740112305, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -6.197736740112305, "logits_per_char": -0.8853909628731864, "num_chars": 7}, {"sum_logits": -9.217472076416016, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.893023490905762, "logits_per_token": -9.217472076416016, "logits_per_char": -1.0241635640462239, "num_chars": 9}, {"sum_logits": -14.08157730102539, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -20.49689483642578, "logits_per_token": -7.040788650512695, "logits_per_char": -1.5646197001139324, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 424, "native_id": "b7091d2bfcea421d787ce9e7982f104a", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.338838577270508, "incorrect_loss_raw": 10.40961480140686, "correct_loss_per_char": 0.38134561266217915, "incorrect_loss_per_char": 0.9250857786698774, "correct_loss_per_token": 1.7796128590901692, "incorrect_loss_per_token": 5.777289430300394, "correct_loss_uncond": -11.333147048950195, "incorrect_loss_uncond": -8.964983701705933}, "model_output": [{"sum_logits": -7.715993881225586, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.530681610107422, "logits_per_token": -7.715993881225586, "logits_per_char": -0.701453989202326, "num_chars": 11}, {"sum_logits": -9.40841293334961, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -24.631166458129883, "logits_per_token": -3.1361376444498696, "logits_per_char": -0.6720294952392578, "num_chars": 14}, {"sum_logits": -5.338838577270508, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -16.671985626220703, "logits_per_token": -1.7796128590901692, "logits_per_char": -0.38134561266217915, "num_chars": 14}, {"sum_logits": -13.700016975402832, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -19.614225387573242, "logits_per_token": -6.850008487701416, "logits_per_char": -1.2454560886729846, "num_chars": 11}, {"sum_logits": -10.814035415649414, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.722320556640625, "logits_per_token": -5.407017707824707, "logits_per_char": -1.0814035415649415, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 425, "native_id": "d060ab71d0efff3cab5960089a6bb3a2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.525835990905762, "incorrect_loss_raw": 11.635808944702148, "correct_loss_per_char": 0.6841669082641602, "incorrect_loss_per_char": 1.1799568145994157, "correct_loss_per_token": 3.762917995452881, "incorrect_loss_per_token": 8.056398868560791, "correct_loss_uncond": -7.758258819580078, "incorrect_loss_uncond": -2.916138172149658}, "model_output": [{"sum_logits": -9.286809921264648, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.153390884399414, "logits_per_token": -4.643404960632324, "logits_per_char": -1.0318677690294054, "num_chars": 9}, {"sum_logits": -7.525835990905762, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.28409481048584, "logits_per_token": -3.762917995452881, "logits_per_char": -0.6841669082641602, "num_chars": 11}, {"sum_logits": -11.262822151184082, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.99741268157959, "logits_per_token": -11.262822151184082, "logits_per_char": -1.1262822151184082, "num_chars": 10}, {"sum_logits": -6.645133018493652, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.576016426086426, "logits_per_token": -6.645133018493652, "logits_per_char": -0.9493047169276646, "num_chars": 7}, {"sum_logits": -19.34847068786621, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.480968475341797, "logits_per_token": -9.674235343933105, "logits_per_char": -1.6123725573221843, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 426, "native_id": "b399f6008d90dbd92bcce5abed4c1fd1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 2.7953367233276367, "incorrect_loss_raw": 5.314357042312622, "correct_loss_per_char": 0.5590673446655273, "incorrect_loss_per_char": 0.449741967802956, "correct_loss_per_token": 2.7953367233276367, "incorrect_loss_per_token": 3.5342676639556885, "correct_loss_uncond": -11.592813491821289, "incorrect_loss_uncond": -10.054520606994629}, "model_output": [{"sum_logits": -2.7953367233276367, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.388150215148926, "logits_per_token": -2.7953367233276367, "logits_per_char": -0.5590673446655273, "num_chars": 5}, {"sum_logits": -7.120357513427734, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.393335342407227, "logits_per_token": -3.560178756713867, "logits_per_char": -0.5085969652448382, "num_chars": 14}, {"sum_logits": -7.120357513427734, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.393335342407227, "logits_per_token": -3.560178756713867, "logits_per_char": -0.5085969652448382, "num_chars": 14}, {"sum_logits": -1.688981533050537, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -13.715117454528809, "logits_per_token": -1.688981533050537, "logits_per_char": -0.3377963066101074, "num_chars": 5}, {"sum_logits": -5.327731609344482, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.973722457885742, "logits_per_token": -5.327731609344482, "logits_per_char": -0.4439776341120402, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 427, "native_id": "80c19c62338edae0e8a1f5c6fec0d29a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.110390663146973, "incorrect_loss_raw": 10.415221214294434, "correct_loss_per_char": 1.0122656292385526, "incorrect_loss_per_char": 1.788523508821215, "correct_loss_per_token": 4.555195331573486, "incorrect_loss_per_token": 8.192568063735962, "correct_loss_uncond": -6.439226150512695, "incorrect_loss_uncond": -4.202253580093384}, "model_output": [{"sum_logits": -17.781225204467773, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.13357162475586, "logits_per_token": -8.890612602233887, "logits_per_char": -3.556245040893555, "num_chars": 5}, {"sum_logits": -9.110390663146973, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.549616813659668, "logits_per_token": -4.555195331573486, "logits_per_char": -1.0122656292385526, "num_chars": 9}, {"sum_logits": -6.639242172241211, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.800455093383789, "logits_per_token": -6.639242172241211, "logits_per_char": -1.3278484344482422, "num_chars": 5}, {"sum_logits": -10.803308486938477, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.092910766601562, "logits_per_token": -10.803308486938477, "logits_per_char": -1.3504135608673096, "num_chars": 8}, {"sum_logits": -6.437108993530273, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.442961692810059, "logits_per_token": -6.437108993530273, "logits_per_char": -0.9195869990757534, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 428, "native_id": "1a4e83b433620cb2d7d806882f8d57e4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.459715843200684, "incorrect_loss_raw": 14.150146484375, "correct_loss_per_char": 0.6824644804000854, "incorrect_loss_per_char": 1.3668658561176723, "correct_loss_per_token": 5.459715843200684, "incorrect_loss_per_token": 9.28242313861847, "correct_loss_uncond": -8.835173606872559, "incorrect_loss_uncond": -4.141891002655029}, "model_output": [{"sum_logits": -5.459715843200684, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.294889450073242, "logits_per_token": -5.459715843200684, "logits_per_char": -0.6824644804000854, "num_chars": 8}, {"sum_logits": -12.299689292907715, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.30907440185547, "logits_per_token": -6.149844646453857, "logits_per_char": -0.6149844646453857, "num_chars": 20}, {"sum_logits": -14.728086471557617, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.27340316772461, "logits_per_token": -14.728086471557617, "logits_per_char": -2.454681078592936, "num_chars": 6}, {"sum_logits": -19.9815731048584, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -22.41942024230957, "logits_per_token": -6.660524368286133, "logits_per_char": -1.3321048736572265, "num_chars": 15}, {"sum_logits": -9.59123706817627, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.166252136230469, "logits_per_token": -9.59123706817627, "logits_per_char": -1.065693007575141, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 429, "native_id": "b9e04a53c0ee7325b901de4d12d56884", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.173724889755249, "incorrect_loss_raw": 8.434230029582977, "correct_loss_per_char": 0.6347449779510498, "incorrect_loss_per_char": 1.2807047139514576, "correct_loss_per_token": 3.173724889755249, "incorrect_loss_per_token": 7.980930298566818, "correct_loss_uncond": -8.917255640029907, "incorrect_loss_uncond": -6.862818777561188}, "model_output": [{"sum_logits": -8.562634468078613, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -11.641695976257324, "logits_per_token": -8.562634468078613, "logits_per_char": -1.7125268936157227, "num_chars": 5}, {"sum_logits": -7.660428047180176, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.214923858642578, "logits_per_token": -7.660428047180176, "logits_per_char": -0.7660428047180176, "num_chars": 10}, {"sum_logits": -3.173724889755249, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.090980529785156, "logits_per_token": -3.173724889755249, "logits_per_char": -0.6347449779510498, "num_chars": 5}, {"sum_logits": -3.6263978481292725, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.85131072998047, "logits_per_token": -1.8131989240646362, "logits_per_char": -0.32967253164811566, "num_chars": 11}, {"sum_logits": -13.887459754943848, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.480264663696289, "logits_per_token": -13.887459754943848, "logits_per_char": -2.3145766258239746, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 430, "native_id": "7490aa460f66000555a8a94008179cbb", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 3.566823959350586, "incorrect_loss_raw": 8.06472635269165, "correct_loss_per_char": 0.324256723577326, "incorrect_loss_per_char": 0.7652343420755296, "correct_loss_per_token": 3.566823959350586, "incorrect_loss_per_token": 5.689566254615784, "correct_loss_uncond": -11.358461380004883, "incorrect_loss_uncond": -7.62458872795105}, "model_output": [{"sum_logits": -4.266156196594238, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.171658515930176, "logits_per_token": -4.266156196594238, "logits_per_char": -0.3047254426138742, "num_chars": 14}, {"sum_logits": -6.703417778015137, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.817913055419922, "logits_per_token": -3.3517088890075684, "logits_per_char": -0.4468945185343424, "num_chars": 15}, {"sum_logits": -12.297863006591797, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.341487884521484, "logits_per_token": -6.148931503295898, "logits_per_char": -1.0248219172159831, "num_chars": 12}, {"sum_logits": -8.99146842956543, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.426200866699219, "logits_per_token": -8.99146842956543, "logits_per_char": -1.2844954899379186, "num_chars": 7}, {"sum_logits": -3.566823959350586, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.925285339355469, "logits_per_token": -3.566823959350586, "logits_per_char": -0.324256723577326, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 431, "native_id": "ad8ee2965a33ff4b0e3d2ac732676594", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 13.8447904586792, "incorrect_loss_raw": 12.449833154678345, "correct_loss_per_char": 0.9229860305786133, "incorrect_loss_per_char": 0.9109405988331236, "correct_loss_per_token": 6.9223952293396, "incorrect_loss_per_token": 6.224916577339172, "correct_loss_uncond": -7.673333168029785, "incorrect_loss_uncond": -4.926642179489136}, "model_output": [{"sum_logits": -13.242259979248047, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -19.098804473876953, "logits_per_token": -6.621129989624023, "logits_per_char": -0.6969610515393709, "num_chars": 19}, {"sum_logits": -9.077319145202637, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -12.262495040893555, "logits_per_token": -4.538659572601318, "logits_per_char": -1.0085910161336262, "num_chars": 9}, {"sum_logits": -12.851083755493164, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -19.418678283691406, "logits_per_token": -6.425541877746582, "logits_per_char": -1.1682803414084695, "num_chars": 11}, {"sum_logits": -14.628669738769531, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -18.725923538208008, "logits_per_token": -7.314334869384766, "logits_per_char": -0.7699299862510279, "num_chars": 19}, {"sum_logits": -13.8447904586792, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -21.518123626708984, "logits_per_token": -6.9223952293396, "logits_per_char": -0.9229860305786133, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 432, "native_id": "64d2310eff6b661baeb41b4ccc392e35", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.456945419311523, "incorrect_loss_raw": 15.013692378997803, "correct_loss_per_char": 1.4051768563010476, "incorrect_loss_per_char": 1.0748879875455584, "correct_loss_per_token": 7.728472709655762, "incorrect_loss_per_token": 7.018792649110158, "correct_loss_uncond": -4.157279968261719, "incorrect_loss_uncond": -3.626030206680298}, "model_output": [{"sum_logits": -13.94534969329834, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.868622779846191, "logits_per_token": -13.94534969329834, "logits_per_char": -1.394534969329834, "num_chars": 10}, {"sum_logits": -14.823978424072266, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -23.238170623779297, "logits_per_token": -4.941326141357422, "logits_per_char": -0.7411989212036133, "num_chars": 20}, {"sum_logits": -16.40561294555664, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.28645133972168, "logits_per_token": -5.46853764851888, "logits_per_char": -1.1718294961111886, "num_chars": 14}, {"sum_logits": -15.456945419311523, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.614225387573242, "logits_per_token": -7.728472709655762, "logits_per_char": -1.4051768563010476, "num_chars": 11}, {"sum_logits": -14.879828453063965, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.165645599365234, "logits_per_token": -3.719957113265991, "logits_per_char": -0.9919885635375977, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 433, "native_id": "6b1f5ebd9d0dbc7e34a598456a6091a8", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.143941402435303, "incorrect_loss_raw": 13.003219842910767, "correct_loss_per_char": 0.7937712669372559, "incorrect_loss_per_char": 1.6372030287507982, "correct_loss_per_token": 2.3813138008117676, "incorrect_loss_per_token": 10.489816188812256, "correct_loss_uncond": -13.80654001235962, "incorrect_loss_uncond": -2.3312954902648926}, "model_output": [{"sum_logits": -14.990317344665527, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.871305465698242, "logits_per_token": -14.990317344665527, "logits_per_char": -1.873789668083191, "num_chars": 8}, {"sum_logits": -8.887327194213867, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.614473342895508, "logits_per_token": -8.887327194213867, "logits_per_char": -1.2696181706019811, "num_chars": 7}, {"sum_logits": -7.143941402435303, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.950481414794922, "logits_per_token": -2.3813138008117676, "logits_per_char": -0.7937712669372559, "num_chars": 9}, {"sum_logits": -20.107229232788086, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.978534698486328, "logits_per_token": -10.053614616394043, "logits_per_char": -2.5134036540985107, "num_chars": 8}, {"sum_logits": -8.028005599975586, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.873747825622559, "logits_per_token": -8.028005599975586, "logits_per_char": -0.8920006222195096, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 434, "native_id": "080ef6941410139d6869e78122bc741e", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.015509605407715, "incorrect_loss_raw": 14.457189559936523, "correct_loss_per_char": 0.8346258004506429, "incorrect_loss_per_char": 1.6833553828111216, "correct_loss_per_token": 3.3385032018025718, "incorrect_loss_per_token": 7.242621342341105, "correct_loss_uncond": -9.461676597595215, "incorrect_loss_uncond": -3.9983222484588623}, "model_output": [{"sum_logits": -11.982717514038086, "num_tokens": 6, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -21.281784057617188, "logits_per_token": -1.9971195856730144, "logits_per_char": -0.7048657361198875, "num_chars": 17}, {"sum_logits": -10.679577827453613, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.52644157409668, "logits_per_token": -3.559859275817871, "logits_per_char": -0.7628269876752581, "num_chars": 14}, {"sum_logits": -10.015509605407715, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.47718620300293, "logits_per_token": -3.3385032018025718, "logits_per_char": -0.8346258004506429, "num_chars": 12}, {"sum_logits": -23.50591278076172, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -22.52896499633789, "logits_per_token": -11.75295639038086, "logits_per_char": -2.350591278076172, "num_chars": 10}, {"sum_logits": -11.660550117492676, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.484856605529785, "logits_per_token": -11.660550117492676, "logits_per_char": -2.915137529373169, "num_chars": 4}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 435, "native_id": "6c70d98cfb8e97fda8caefcee761a229", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.86319637298584, "incorrect_loss_raw": 9.231835842132568, "correct_loss_per_char": 0.786319637298584, "incorrect_loss_per_char": 0.9383235589607613, "correct_loss_per_token": 7.86319637298584, "incorrect_loss_per_token": 9.231835842132568, "correct_loss_uncond": -5.241243362426758, "incorrect_loss_uncond": -4.260763168334961}, "model_output": [{"sum_logits": -7.646427154541016, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.622650146484375, "logits_per_token": -7.646427154541016, "logits_per_char": -1.274404525756836, "num_chars": 6}, {"sum_logits": -11.608232498168945, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.533917427062988, "logits_per_token": -11.608232498168945, "logits_per_char": -1.055293863469904, "num_chars": 11}, {"sum_logits": -2.780207633972168, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.458654403686523, "logits_per_token": -2.780207633972168, "logits_per_char": -0.2780207633972168, "num_chars": 10}, {"sum_logits": -7.86319637298584, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.104439735412598, "logits_per_token": -7.86319637298584, "logits_per_char": -0.786319637298584, "num_chars": 10}, {"sum_logits": -14.892476081848145, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.35517406463623, "logits_per_token": -14.892476081848145, "logits_per_char": -1.145575083219088, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 436, "native_id": "75ac594b4fdbfba006e61315d1b2c815", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.426031112670898, "incorrect_loss_raw": 10.688896417617798, "correct_loss_per_char": 0.7141269445419312, "incorrect_loss_per_char": 1.180872192953387, "correct_loss_per_token": 5.713015556335449, "incorrect_loss_per_token": 6.541176199913025, "correct_loss_uncond": -9.963590621948242, "incorrect_loss_uncond": -6.311587333679199}, "model_output": [{"sum_logits": -11.426031112670898, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.38962173461914, "logits_per_token": -5.713015556335449, "logits_per_char": -0.7141269445419312, "num_chars": 16}, {"sum_logits": -9.953731536865234, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.642633438110352, "logits_per_token": -4.976865768432617, "logits_per_char": -1.1059701707628038, "num_chars": 9}, {"sum_logits": -9.573823928833008, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.132861137390137, "logits_per_token": -9.573823928833008, "logits_per_char": -1.9147647857666015, "num_chars": 5}, {"sum_logits": -15.036300659179688, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.726823806762695, "logits_per_token": -7.518150329589844, "logits_per_char": -1.1566385122445912, "num_chars": 13}, {"sum_logits": -8.191729545593262, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.499616622924805, "logits_per_token": -4.095864772796631, "logits_per_char": -0.5461153030395508, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 437, "native_id": "5a8e7d2f97f76adb23fbd59a009d16f0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.699682235717773, "incorrect_loss_raw": 8.957894325256348, "correct_loss_per_char": 1.9499470392862956, "incorrect_loss_per_char": 0.935027747709527, "correct_loss_per_token": 11.699682235717773, "incorrect_loss_per_token": 6.689043998718262, "correct_loss_uncond": -3.194171905517578, "incorrect_loss_uncond": -6.610040187835693}, "model_output": [{"sum_logits": -11.699682235717773, "num_tokens": 1, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -14.893854141235352, "logits_per_token": -11.699682235717773, "logits_per_char": -1.9499470392862956, "num_chars": 6}, {"sum_logits": -8.562376976013184, "num_tokens": 1, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -14.197783470153809, "logits_per_token": -8.562376976013184, "logits_per_char": -1.070297122001648, "num_chars": 8}, {"sum_logits": -6.073928356170654, "num_tokens": 1, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -14.495186805725098, "logits_per_token": -6.073928356170654, "logits_per_char": -0.6748809284634061, "num_chars": 9}, {"sum_logits": -7.582170009613037, "num_tokens": 1, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -14.434986114501953, "logits_per_token": -7.582170009613037, "logits_per_char": -0.9477712512016296, "num_chars": 8}, {"sum_logits": -13.613101959228516, "num_tokens": 3, "num_tokens_all": 170, "is_greedy": false, "sum_logits_uncond": -19.143781661987305, "logits_per_token": -4.537700653076172, "logits_per_char": -1.0471616891714244, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 438, "native_id": "178cb8153123716aa94f286b615149d4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.66195821762085, "incorrect_loss_raw": 7.695849299430847, "correct_loss_per_char": 0.9323916435241699, "incorrect_loss_per_char": 1.023744913081189, "correct_loss_per_token": 4.66195821762085, "incorrect_loss_per_token": 6.8190319538116455, "correct_loss_uncond": -7.592770099639893, "incorrect_loss_uncond": -8.590426564216614}, "model_output": [{"sum_logits": -13.687654495239258, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.733631134033203, "logits_per_token": -13.687654495239258, "logits_per_char": -2.281275749206543, "num_chars": 6}, {"sum_logits": -3.2543983459472656, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.388150215148926, "logits_per_token": -3.2543983459472656, "logits_per_char": -0.6508796691894532, "num_chars": 5}, {"sum_logits": -7.014538764953613, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.22709083557129, "logits_per_token": -3.5072693824768066, "logits_per_char": -0.6376853422685103, "num_chars": 11}, {"sum_logits": -4.66195821762085, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -4.66195821762085, "logits_per_char": -0.9323916435241699, "num_chars": 5}, {"sum_logits": -6.826805591583252, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.796231269836426, "logits_per_token": -6.826805591583252, "logits_per_char": -0.5251388916602502, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 439, "native_id": "cc917ca0e03c91a5141920f5a902a36c", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.288090705871582, "incorrect_loss_raw": 11.746605634689331, "correct_loss_per_char": 0.9288090705871582, "incorrect_loss_per_char": 1.4674021584647043, "correct_loss_per_token": 3.0960302352905273, "incorrect_loss_per_token": 6.163719654083252, "correct_loss_uncond": -7.788243293762207, "incorrect_loss_uncond": -6.3077168464660645}, "model_output": [{"sum_logits": -9.288090705871582, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.07633399963379, "logits_per_token": -3.0960302352905273, "logits_per_char": -0.9288090705871582, "num_chars": 10}, {"sum_logits": -8.352749824523926, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.026251792907715, "logits_per_token": -8.352749824523926, "logits_per_char": -1.1932499749319894, "num_chars": 7}, {"sum_logits": -8.418322563171387, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.043804168701172, "logits_per_token": -4.209161281585693, "logits_per_char": -1.2026175090244837, "num_chars": 7}, {"sum_logits": -18.088245391845703, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -23.864871978759766, "logits_per_token": -6.029415130615234, "logits_per_char": -2.261030673980713, "num_chars": 8}, {"sum_logits": -12.127104759216309, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.28236198425293, "logits_per_token": -6.063552379608154, "logits_per_char": -1.212710475921631, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 440, "native_id": "a7d51b753c2113d8b2dbd0ebb5375855_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.7342245578765869, "incorrect_loss_raw": 11.588841199874878, "correct_loss_per_char": 0.06118537982304891, "incorrect_loss_per_char": 0.7538017302751542, "correct_loss_per_token": 0.36711227893829346, "incorrect_loss_per_token": 4.80890409151713, "correct_loss_uncond": -16.56212019920349, "incorrect_loss_uncond": -9.837048292160034}, "model_output": [{"sum_logits": -13.36332893371582, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -26.330224990844727, "logits_per_token": -4.454442977905273, "logits_per_char": -0.6681664466857911, "num_chars": 20}, {"sum_logits": -11.276416778564453, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -18.00812530517578, "logits_per_token": -5.638208389282227, "logits_per_char": -0.7517611185709635, "num_chars": 15}, {"sum_logits": -10.289067268371582, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -23.26673126220703, "logits_per_token": -3.429689089457194, "logits_per_char": -0.6430667042732239, "num_chars": 16}, {"sum_logits": -0.7342245578765869, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": true, "sum_logits_uncond": -17.296344757080078, "logits_per_token": -0.36711227893829346, "logits_per_char": -0.06118537982304891, "num_chars": 12}, {"sum_logits": -11.426551818847656, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -18.09847640991211, "logits_per_token": -5.713275909423828, "logits_per_char": -0.9522126515706381, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 441, "native_id": "e71da9e95b321763c86e879a47bbd327", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 9.017051696777344, "incorrect_loss_raw": 10.685794353485107, "correct_loss_per_char": 0.7514209747314453, "incorrect_loss_per_char": 1.2059393196997017, "correct_loss_per_token": 9.017051696777344, "incorrect_loss_per_token": 7.96929669380188, "correct_loss_uncond": -4.04850959777832, "incorrect_loss_uncond": -2.3277199268341064}, "model_output": [{"sum_logits": -9.853673934936523, "num_tokens": 1, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -14.045679092407227, "logits_per_token": -9.853673934936523, "logits_per_char": -1.2317092418670654, "num_chars": 8}, {"sum_logits": -11.157522201538086, "num_tokens": 1, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -11.958901405334473, "logits_per_token": -11.157522201538086, "logits_per_char": -1.2397246890597873, "num_chars": 9}, {"sum_logits": -10.682446479797363, "num_tokens": 2, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -13.630334854125977, "logits_per_token": -5.341223239898682, "logits_per_char": -0.9711314981633966, "num_chars": 11}, {"sum_logits": -11.049534797668457, "num_tokens": 2, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -12.41914176940918, "logits_per_token": -5.5247673988342285, "logits_per_char": -1.3811918497085571, "num_chars": 8}, {"sum_logits": -9.017051696777344, "num_tokens": 1, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -13.065561294555664, "logits_per_token": -9.017051696777344, "logits_per_char": -0.7514209747314453, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 442, "native_id": "ec86900559a0faf2aef066e511a4cfa6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.198556423187256, "incorrect_loss_raw": 8.331467986106873, "correct_loss_per_char": 0.322965878706712, "incorrect_loss_per_char": 1.0113340049982071, "correct_loss_per_token": 2.099278211593628, "incorrect_loss_per_token": 5.494975209236145, "correct_loss_uncond": -11.366642475128174, "incorrect_loss_uncond": -6.5745896100997925}, "model_output": [{"sum_logits": -11.70050048828125, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.878514289855957, "logits_per_token": -5.850250244140625, "logits_per_char": -1.9500834147135417, "num_chars": 6}, {"sum_logits": -4.198556423187256, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.56519889831543, "logits_per_token": -2.099278211593628, "logits_per_char": -0.322965878706712, "num_chars": 13}, {"sum_logits": -10.99144172668457, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.810306549072266, "logits_per_token": -5.495720863342285, "logits_per_char": -0.9159534772237142, "num_chars": 12}, {"sum_logits": -5.997683525085449, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.08862590789795, "logits_per_token": -5.997683525085449, "logits_per_char": -0.5997683525085449, "num_chars": 10}, {"sum_logits": -4.636246204376221, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.846783638000488, "logits_per_token": -4.636246204376221, "logits_per_char": -0.5795307755470276, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 443, "native_id": "d312741df1b14bcbe358f4f30aff3994", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.713613033294678, "incorrect_loss_raw": 11.878398418426514, "correct_loss_per_char": 0.7713613033294677, "incorrect_loss_per_char": 1.4688576173578571, "correct_loss_per_token": 7.713613033294678, "incorrect_loss_per_token": 9.05418735742569, "correct_loss_uncond": -6.741425037384033, "incorrect_loss_uncond": -4.3284008502960205}, "model_output": [{"sum_logits": -8.6245698928833, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.337454795837402, "logits_per_token": -8.6245698928833, "logits_per_char": -0.9582855436537001, "num_chars": 9}, {"sum_logits": -13.424249649047852, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.265377044677734, "logits_per_token": -13.424249649047852, "logits_per_char": -1.6780312061309814, "num_chars": 8}, {"sum_logits": -10.402315139770508, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.582307815551758, "logits_per_token": -10.402315139770508, "logits_per_char": -2.0804630279541017, "num_chars": 5}, {"sum_logits": -15.062458992004395, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -22.642057418823242, "logits_per_token": -3.7656147480010986, "logits_per_char": -1.1586506916926458, "num_chars": 13}, {"sum_logits": -7.713613033294678, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.455038070678711, "logits_per_token": -7.713613033294678, "logits_per_char": -0.7713613033294677, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 444, "native_id": "0df3f58645b4bc306093845fb297a50e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.488748073577881, "incorrect_loss_raw": 8.486257791519165, "correct_loss_per_char": 0.44887480735778806, "incorrect_loss_per_char": 0.8470067060910739, "correct_loss_per_token": 2.2443740367889404, "incorrect_loss_per_token": 5.535523613293966, "correct_loss_uncond": -11.33143949508667, "incorrect_loss_uncond": -7.254856586456299}, "model_output": [{"sum_logits": -12.545451164245605, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.926565170288086, "logits_per_token": -6.272725582122803, "logits_per_char": -1.3939390182495117, "num_chars": 9}, {"sum_logits": -4.825374603271484, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -11.941009521484375, "logits_per_token": -4.825374603271484, "logits_per_char": -0.8042291005452474, "num_chars": 6}, {"sum_logits": -8.295316696166992, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.28975486755371, "logits_per_token": -2.7651055653889975, "logits_per_char": -0.5530211130777994, "num_chars": 15}, {"sum_logits": -8.278888702392578, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.807127952575684, "logits_per_token": -8.278888702392578, "logits_per_char": -0.6368375924917368, "num_chars": 13}, {"sum_logits": -4.488748073577881, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.82018756866455, "logits_per_token": -2.2443740367889404, "logits_per_char": -0.44887480735778806, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 445, "native_id": "27d9b4df2ca50112d282331df4923e96", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.4732916355133057, "incorrect_loss_raw": 7.234346985816956, "correct_loss_per_char": 0.12277430295944214, "incorrect_loss_per_char": 0.9338708855889061, "correct_loss_per_token": 0.7366458177566528, "incorrect_loss_per_token": 4.44186669588089, "correct_loss_uncond": -14.660764455795288, "incorrect_loss_uncond": -8.427348256111145}, "model_output": [{"sum_logits": -4.983300685882568, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -17.135272979736328, "logits_per_token": -2.491650342941284, "logits_per_char": -0.45302733508023346, "num_chars": 11}, {"sum_logits": -9.29809856414795, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.788320541381836, "logits_per_token": -4.649049282073975, "logits_per_char": -0.6198732376098632, "num_chars": 15}, {"sum_logits": -8.058443069458008, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.335037231445312, "logits_per_token": -4.029221534729004, "logits_per_char": -1.343073844909668, "num_chars": 6}, {"sum_logits": -6.597545623779297, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.388150215148926, "logits_per_token": -6.597545623779297, "logits_per_char": -1.3195091247558595, "num_chars": 5}, {"sum_logits": -1.4732916355133057, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": true, "sum_logits_uncond": -16.134056091308594, "logits_per_token": -0.7366458177566528, "logits_per_char": -0.12277430295944214, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 446, "native_id": "ab755203f41a2e241f0ee8a53c54f287", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.816579818725586, "incorrect_loss_raw": 13.634899616241455, "correct_loss_per_char": 0.6781984475942758, "incorrect_loss_per_char": 1.3476242986790865, "correct_loss_per_token": 4.408289909362793, "incorrect_loss_per_token": 7.630466381708781, "correct_loss_uncond": -8.742128372192383, "incorrect_loss_uncond": -3.633469581604004}, "model_output": [{"sum_logits": -11.322389602661133, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.418514251708984, "logits_per_token": -11.322389602661133, "logits_per_char": -1.6174842289515905, "num_chars": 7}, {"sum_logits": -10.816097259521484, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.05072021484375, "logits_per_token": -5.408048629760742, "logits_per_char": -0.8320074815016526, "num_chars": 13}, {"sum_logits": -17.946340560913086, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.606101989746094, "logits_per_token": -8.973170280456543, "logits_per_char": -1.4955283800760906, "num_chars": 12}, {"sum_logits": -8.816579818725586, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.55870819091797, "logits_per_token": -4.408289909362793, "logits_per_char": -0.6781984475942758, "num_chars": 13}, {"sum_logits": -14.454771041870117, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.998140335083008, "logits_per_token": -4.818257013956706, "logits_per_char": -1.4454771041870118, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 447, "native_id": "f13efb91090dd28fd2b3c1f4dde680fd", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.988335609436035, "incorrect_loss_raw": 10.434796571731567, "correct_loss_per_char": 0.35225503584917855, "incorrect_loss_per_char": 1.0113819334242078, "correct_loss_per_token": 2.9941678047180176, "incorrect_loss_per_token": 9.03329086303711, "correct_loss_uncond": -15.415743827819824, "incorrect_loss_uncond": -3.0010416507720947}, "model_output": [{"sum_logits": -5.988335609436035, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.40407943725586, "logits_per_token": -2.9941678047180176, "logits_per_char": -0.35225503584917855, "num_chars": 17}, {"sum_logits": -13.251346588134766, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.104439735412598, "logits_per_token": -13.251346588134766, "logits_per_char": -1.3251346588134765, "num_chars": 10}, {"sum_logits": -11.212045669555664, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.05838394165039, "logits_per_token": -5.606022834777832, "logits_per_char": -0.8008604049682617, "num_chars": 14}, {"sum_logits": -11.724477767944336, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -11.986747741699219, "logits_per_token": -11.724477767944336, "logits_per_char": -1.3027197519938152, "num_chars": 9}, {"sum_logits": -5.551316261291504, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.593781471252441, "logits_per_token": -5.551316261291504, "logits_per_char": -0.6168129179212782, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 448, "native_id": "e98031901c815e55040d9fe28c4d9387", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 10.351791381835938, "incorrect_loss_raw": 9.59431380033493, "correct_loss_per_char": 0.5750995212131076, "incorrect_loss_per_char": 1.0813949475685756, "correct_loss_per_token": 5.175895690917969, "incorrect_loss_per_token": 5.8601881464322405, "correct_loss_uncond": -8.095462799072266, "incorrect_loss_uncond": -6.88185054063797}, "model_output": [{"sum_logits": -9.68629264831543, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.586409568786621, "logits_per_token": -9.68629264831543, "logits_per_char": -1.6143821080525715, "num_chars": 6}, {"sum_logits": -13.301774978637695, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.392850875854492, "logits_per_token": -4.4339249928792315, "logits_per_char": -0.8867849985758464, "num_chars": 15}, {"sum_logits": -3.251882314682007, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.9362211227417, "logits_per_token": -3.251882314682007, "logits_per_char": -0.8129705786705017, "num_chars": 4}, {"sum_logits": -10.351791381835938, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.447254180908203, "logits_per_token": -5.175895690917969, "logits_per_char": -0.5750995212131076, "num_chars": 18}, {"sum_logits": -12.13730525970459, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.98917579650879, "logits_per_token": -6.068652629852295, "logits_per_char": -1.0114421049753826, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 449, "native_id": "fb64149cf01c5b496d986f56852273e9", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.490468978881836, "incorrect_loss_raw": 10.87293291091919, "correct_loss_per_char": 0.862769907171076, "incorrect_loss_per_char": 0.9835229533231276, "correct_loss_per_token": 4.745234489440918, "incorrect_loss_per_token": 7.426890254020691, "correct_loss_uncond": -7.671237945556641, "incorrect_loss_uncond": -4.880031585693359}, "model_output": [{"sum_logits": -12.586043357849121, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.614110946655273, "logits_per_token": -6.2930216789245605, "logits_per_char": -1.0488369464874268, "num_chars": 12}, {"sum_logits": -14.982297897338867, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.279067993164062, "logits_per_token": -7.491148948669434, "logits_per_char": -0.8323498831854926, "num_chars": 18}, {"sum_logits": -11.652487754821777, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.768898963928223, "logits_per_token": -11.652487754821777, "logits_per_char": -1.6646411078316825, "num_chars": 7}, {"sum_logits": -9.490468978881836, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.161706924438477, "logits_per_token": -4.745234489440918, "logits_per_char": -0.862769907171076, "num_chars": 11}, {"sum_logits": -4.270902633666992, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.349780082702637, "logits_per_token": -4.270902633666992, "logits_per_char": -0.3882638757879084, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 450, "native_id": "2ac72eaf30a633c410b1bd658bbef0ba", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.759151458740234, "incorrect_loss_raw": 7.1132091879844666, "correct_loss_per_char": 0.5235592235218395, "incorrect_loss_per_char": 0.9724998200933138, "correct_loss_per_token": 2.879575729370117, "incorrect_loss_per_token": 5.367429703474045, "correct_loss_uncond": -14.892818450927734, "incorrect_loss_uncond": -8.362687885761261}, "model_output": [{"sum_logits": -3.862443685531616, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.171823501586914, "logits_per_token": -1.931221842765808, "logits_per_char": -0.482805460691452, "num_chars": 8}, {"sum_logits": -4.523329734802246, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.040182113647461, "logits_per_token": -4.523329734802246, "logits_per_char": -0.9046659469604492, "num_chars": 5}, {"sum_logits": -5.759151458740234, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.65196990966797, "logits_per_token": -2.879575729370117, "logits_per_char": -0.5235592235218395, "num_chars": 11}, {"sum_logits": -9.963271141052246, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.118742942810059, "logits_per_token": -9.963271141052246, "logits_per_char": -1.6605451901753743, "num_chars": 6}, {"sum_logits": -10.103792190551758, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.572839736938477, "logits_per_token": -5.051896095275879, "logits_per_char": -0.8419826825459799, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 451, "native_id": "22fc45d9e6d0baea4a5b0526504225b8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.409213066101074, "incorrect_loss_raw": 12.06812047958374, "correct_loss_per_char": 0.9015355110168457, "incorrect_loss_per_char": 1.0919131571768337, "correct_loss_per_token": 5.409213066101074, "incorrect_loss_per_token": 8.157020092010498, "correct_loss_uncond": -7.071051597595215, "incorrect_loss_uncond": -5.258478879928589}, "model_output": [{"sum_logits": -15.422178268432617, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.77122688293457, "logits_per_token": -7.711089134216309, "logits_per_char": -0.7711089134216309, "num_chars": 20}, {"sum_logits": -5.409213066101074, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.480264663696289, "logits_per_token": -5.409213066101074, "logits_per_char": -0.9015355110168457, "num_chars": 6}, {"sum_logits": -8.412007331848145, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.048408508300781, "logits_per_token": -8.412007331848145, "logits_per_char": -1.2017153331211634, "num_chars": 7}, {"sum_logits": -8.571671485900879, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.03420352935791, "logits_per_token": -8.571671485900879, "logits_per_char": -0.9524079428778754, "num_chars": 9}, {"sum_logits": -15.86662483215332, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -21.452558517456055, "logits_per_token": -7.93331241607666, "logits_per_char": -1.4424204392866655, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 452, "native_id": "4ef3d70648ee3cea028bc5ed0fdfda28", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.5961146354675293, "incorrect_loss_raw": 10.75672197341919, "correct_loss_per_char": 0.2996762196222941, "incorrect_loss_per_char": 1.5200649499893188, "correct_loss_per_token": 1.7980573177337646, "incorrect_loss_per_token": 6.788209915161133, "correct_loss_uncond": -13.683460712432861, "incorrect_loss_uncond": -4.310194492340088}, "model_output": [{"sum_logits": -10.203533172607422, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.541193962097168, "logits_per_token": -10.203533172607422, "logits_per_char": -2.0407066345214844, "num_chars": 5}, {"sum_logits": -4.712444305419922, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.31043529510498, "logits_per_token": -4.712444305419922, "logits_per_char": -0.6732063293457031, "num_chars": 7}, {"sum_logits": -3.5961146354675293, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": true, "sum_logits_uncond": -17.27957534790039, "logits_per_token": -1.7980573177337646, "logits_per_char": -0.2996762196222941, "num_chars": 12}, {"sum_logits": -10.911558151245117, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.289777755737305, "logits_per_token": -3.637186050415039, "logits_per_char": -0.9092965126037598, "num_chars": 12}, {"sum_logits": -17.199352264404297, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.126258850097656, "logits_per_token": -8.599676132202148, "logits_per_char": -2.457050323486328, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 453, "native_id": "059155c50d1b04da7373e309868e67d2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.81436824798584, "incorrect_loss_raw": 5.8312007188797, "correct_loss_per_char": 1.081436824798584, "incorrect_loss_per_char": 0.8730402124779566, "correct_loss_per_token": 5.40718412399292, "incorrect_loss_per_token": 4.706321716308594, "correct_loss_uncond": -6.874175071716309, "incorrect_loss_uncond": -7.795828938484192}, "model_output": [{"sum_logits": -4.026224613189697, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -4.026224613189697, "logits_per_char": -0.5032780766487122, "num_chars": 8}, {"sum_logits": -6.287768363952637, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.063604354858398, "logits_per_token": -6.287768363952637, "logits_per_char": -1.2575536727905274, "num_chars": 5}, {"sum_logits": -10.81436824798584, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.68854331970215, "logits_per_token": -5.40718412399292, "logits_per_char": -1.081436824798584, "num_chars": 10}, {"sum_logits": -4.011777877807617, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.893023490905762, "logits_per_token": -4.011777877807617, "logits_per_char": -0.4457530975341797, "num_chars": 9}, {"sum_logits": -8.999032020568848, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.974644660949707, "logits_per_token": -4.499516010284424, "logits_per_char": -1.285576002938407, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 454, "native_id": "33d023a6806390eb8195380331e17404_1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.9242463111877441, "incorrect_loss_raw": 8.218066930770874, "correct_loss_per_char": 0.21380514568752712, "incorrect_loss_per_char": 1.0980201619012016, "correct_loss_per_token": 1.9242463111877441, "incorrect_loss_per_token": 5.88692843914032, "correct_loss_uncond": -10.968777179718018, "incorrect_loss_uncond": -8.919515371322632}, "model_output": [{"sum_logits": -8.972667694091797, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.932482719421387, "logits_per_token": -8.972667694091797, "logits_per_char": -1.495444615681966, "num_chars": 6}, {"sum_logits": -7.85335636138916, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.04192352294922, "logits_per_token": -3.92667818069458, "logits_per_char": -1.1219080516270228, "num_chars": 7}, {"sum_logits": -1.9242463111877441, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.893023490905762, "logits_per_token": -1.9242463111877441, "logits_per_char": -0.21380514568752712, "num_chars": 9}, {"sum_logits": -5.250492095947266, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.363898277282715, "logits_per_token": -5.250492095947266, "logits_per_char": -0.8750820159912109, "num_chars": 6}, {"sum_logits": -10.795751571655273, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -21.212024688720703, "logits_per_token": -5.397875785827637, "logits_per_char": -0.8996459643046061, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 455, "native_id": "63f7ad481a63fc8c6dffe00519d4a167", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.94823932647705, "incorrect_loss_raw": 13.556822299957275, "correct_loss_per_char": 0.5213447298322406, "incorrect_loss_per_char": 1.4809987054930793, "correct_loss_per_token": 3.6494131088256836, "incorrect_loss_per_token": 9.566516757011414, "correct_loss_uncond": -13.580185890197754, "incorrect_loss_uncond": -2.008150577545166}, "model_output": [{"sum_logits": -10.94823932647705, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -24.528425216674805, "logits_per_token": -3.6494131088256836, "logits_per_char": -0.5213447298322406, "num_chars": 21}, {"sum_logits": -10.18320369720459, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.641867637634277, "logits_per_token": -10.18320369720459, "logits_per_char": -1.018320369720459, "num_chars": 10}, {"sum_logits": -14.082205772399902, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.72606658935547, "logits_per_token": -7.041102886199951, "logits_per_char": -1.4082205772399903, "num_chars": 10}, {"sum_logits": -12.121641159057617, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.102335929870605, "logits_per_token": -12.121641159057617, "logits_per_char": -1.5152051448822021, "num_chars": 8}, {"sum_logits": -17.840238571166992, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.789621353149414, "logits_per_token": -8.920119285583496, "logits_per_char": -1.9822487301296658, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 456, "native_id": "a2daf73d33541af0846673afd8e49abe", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.484295845031738, "incorrect_loss_raw": 11.084521055221558, "correct_loss_per_char": 0.5403579870859782, "incorrect_loss_per_char": 1.3360533132181538, "correct_loss_per_token": 6.484295845031738, "incorrect_loss_per_token": 9.663342595100403, "correct_loss_uncond": -7.393662452697754, "incorrect_loss_uncond": -4.375579595565796}, "model_output": [{"sum_logits": -6.484295845031738, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.877958297729492, "logits_per_token": -6.484295845031738, "logits_per_char": -0.5403579870859782, "num_chars": 12}, {"sum_logits": -9.314180374145508, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.119851112365723, "logits_per_token": -9.314180374145508, "logits_per_char": -0.9314180374145508, "num_chars": 10}, {"sum_logits": -11.369427680969238, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.92629623413086, "logits_per_token": -5.684713840484619, "logits_per_char": -1.0335843346335671, "num_chars": 11}, {"sum_logits": -12.83531379699707, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.392402648925781, "logits_per_token": -12.83531379699707, "logits_per_char": -1.8336162567138672, "num_chars": 7}, {"sum_logits": -10.819162368774414, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.40185260772705, "logits_per_token": -10.819162368774414, "logits_per_char": -1.5455946241106306, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 457, "native_id": "7d70208061ae3185bcfc9e912ee9e141", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.2146527767181396, "incorrect_loss_raw": 16.148528575897217, "correct_loss_per_char": 0.15818948405129568, "incorrect_loss_per_char": 1.1195768066087193, "correct_loss_per_token": 0.5536631941795349, "incorrect_loss_per_token": 8.251974662144978, "correct_loss_uncond": -14.562988519668579, "incorrect_loss_uncond": -2.605433702468872}, "model_output": [{"sum_logits": -2.2146527767181396, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.77764129638672, "logits_per_token": -0.5536631941795349, "logits_per_char": -0.15818948405129568, "num_chars": 14}, {"sum_logits": -19.37437629699707, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.934391021728516, "logits_per_token": -9.687188148498535, "logits_per_char": -0.9687188148498536, "num_chars": 20}, {"sum_logits": -12.371196746826172, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.339823722839355, "logits_per_token": -12.371196746826172, "logits_per_char": -1.3745774163140192, "num_chars": 9}, {"sum_logits": -13.370170593261719, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.821319580078125, "logits_per_token": -4.456723531087239, "logits_per_char": -0.6366747901553199, "num_chars": 21}, {"sum_logits": -19.478370666503906, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.92031478881836, "logits_per_token": -6.492790222167969, "logits_per_char": -1.498336205115685, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 458, "native_id": "9003c4748b08d5a734747e499599ff20", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.171562194824219, "incorrect_loss_raw": 9.647847294807434, "correct_loss_per_char": 0.5959374564034599, "incorrect_loss_per_char": 1.3681975867067064, "correct_loss_per_token": 4.171562194824219, "incorrect_loss_per_token": 8.217150926589966, "correct_loss_uncond": -10.558158874511719, "incorrect_loss_uncond": -4.015558838844299}, "model_output": [{"sum_logits": -6.358669757843018, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.55066967010498, "logits_per_token": -6.358669757843018, "logits_per_char": -1.2717339515686035, "num_chars": 5}, {"sum_logits": -11.445570945739746, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.719467163085938, "logits_per_token": -5.722785472869873, "logits_per_char": -1.4306963682174683, "num_chars": 8}, {"sum_logits": -4.171562194824219, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.729721069335938, "logits_per_token": -4.171562194824219, "logits_per_char": -0.5959374564034599, "num_chars": 7}, {"sum_logits": -9.630122184753418, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.684775352478027, "logits_per_token": -9.630122184753418, "logits_per_char": -1.3757317406790597, "num_chars": 7}, {"sum_logits": -11.157026290893555, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.698712348937988, "logits_per_token": -11.157026290893555, "logits_per_char": -1.3946282863616943, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 459, "native_id": "28aac6d39cdd270d2a6a28e1985484cb", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.8978259563446045, "incorrect_loss_raw": 8.239446580410004, "correct_loss_per_char": 0.23722824454307556, "incorrect_loss_per_char": 1.4676863910296025, "correct_loss_per_token": 1.8978259563446045, "incorrect_loss_per_token": 8.239446580410004, "correct_loss_uncond": -13.511959314346313, "incorrect_loss_uncond": -6.149803698062897}, "model_output": [{"sum_logits": -11.388225555419922, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.916854858398438, "logits_per_token": -11.388225555419922, "logits_per_char": -1.898037592569987, "num_chars": 6}, {"sum_logits": -11.834531784057617, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.9362211227417, "logits_per_token": -11.834531784057617, "logits_per_char": -2.9586329460144043, "num_chars": 4}, {"sum_logits": -1.8978259563446045, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.409785270690918, "logits_per_token": -1.8978259563446045, "logits_per_char": -0.23722824454307556, "num_chars": 8}, {"sum_logits": -2.9553825855255127, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.907693862915039, "logits_per_token": -2.9553825855255127, "logits_per_char": -0.49256376425425213, "num_chars": 6}, {"sum_logits": -6.779646396636963, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.796231269836426, "logits_per_token": -6.779646396636963, "logits_per_char": -0.5215112612797663, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 460, "native_id": "8bdbb8caefcc607a9ec7579aa0c87cba", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.31583309173584, "incorrect_loss_raw": 12.369155168533325, "correct_loss_per_char": 0.4891666524550494, "incorrect_loss_per_char": 1.3578477440458356, "correct_loss_per_token": 2.7719443639119468, "incorrect_loss_per_token": 5.912373638153076, "correct_loss_uncond": -10.965144157409668, "incorrect_loss_uncond": -2.0700318813323975}, "model_output": [{"sum_logits": -8.31583309173584, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.280977249145508, "logits_per_token": -2.7719443639119468, "logits_per_char": -0.4891666524550494, "num_chars": 17}, {"sum_logits": -11.189971923828125, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -10.885873794555664, "logits_per_token": -11.189971923828125, "logits_per_char": -1.0172701748934658, "num_chars": 11}, {"sum_logits": -13.852130889892578, "num_tokens": 5, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -19.064041137695312, "logits_per_token": -2.7704261779785155, "logits_per_char": -0.9234753926595052, "num_chars": 15}, {"sum_logits": -15.168974876403809, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.82971477508545, "logits_per_token": -5.0563249588012695, "logits_per_char": -2.16699641091483, "num_chars": 7}, {"sum_logits": -9.265542984008789, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.977118492126465, "logits_per_token": -4.6327714920043945, "logits_per_char": -1.3236489977155412, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 461, "native_id": "95a85df48902d23eb3fda25a99fca1a0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.441244602203369, "incorrect_loss_raw": 13.413958430290222, "correct_loss_per_char": 0.744124460220337, "incorrect_loss_per_char": 1.1878649592399597, "correct_loss_per_token": 3.7206223011016846, "incorrect_loss_per_token": 6.48140013217926, "correct_loss_uncond": -9.871530055999756, "incorrect_loss_uncond": -6.33945095539093}, "model_output": [{"sum_logits": -17.342830657958984, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.000612258911133, "logits_per_token": -8.671415328979492, "logits_per_char": -1.7342830657958985, "num_chars": 10}, {"sum_logits": -7.441244602203369, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.312774658203125, "logits_per_token": -3.7206223011016846, "logits_per_char": -0.744124460220337, "num_chars": 10}, {"sum_logits": -5.41389799118042, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.98017120361328, "logits_per_token": -1.8046326637268066, "logits_per_char": -0.541389799118042, "num_chars": 10}, {"sum_logits": -18.4237060546875, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -22.644763946533203, "logits_per_token": -9.21185302734375, "logits_per_char": -1.2282470703125, "num_chars": 15}, {"sum_logits": -12.475399017333984, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.388090133666992, "logits_per_token": -6.237699508666992, "logits_per_char": -1.2475399017333983, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 462, "native_id": "79c3378b7660d328902d7c0ad442a37f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.171927452087402, "incorrect_loss_raw": 14.161128878593445, "correct_loss_per_char": 1.0343854904174805, "incorrect_loss_per_char": 1.0830530219607881, "correct_loss_per_token": 5.171927452087402, "incorrect_loss_per_token": 6.112408896287282, "correct_loss_uncond": -6.646827697753906, "incorrect_loss_uncond": -7.243592381477356}, "model_output": [{"sum_logits": -6.965198040008545, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.626487731933594, "logits_per_token": -3.4825990200042725, "logits_per_char": -0.7739108933342828, "num_chars": 9}, {"sum_logits": -14.780801773071289, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.89868927001953, "logits_per_token": -7.3904008865356445, "logits_per_char": -1.2317334810892742, "num_chars": 12}, {"sum_logits": -23.235733032226562, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -28.984813690185547, "logits_per_token": -7.7452443440755205, "logits_per_char": -1.549048868815104, "num_chars": 15}, {"sum_logits": -11.662782669067383, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.10889434814453, "logits_per_token": -5.831391334533691, "logits_per_char": -0.7775188446044922, "num_chars": 15}, {"sum_logits": -5.171927452087402, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.818755149841309, "logits_per_token": -5.171927452087402, "logits_per_char": -1.0343854904174805, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 463, "native_id": "8c12e5864463cfcd03f4d0ab67949d01", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.051466941833496, "incorrect_loss_raw": 15.808671474456787, "correct_loss_per_char": 0.7319515401666815, "incorrect_loss_per_char": 1.0258353056342902, "correct_loss_per_token": 4.025733470916748, "incorrect_loss_per_token": 5.045274972915649, "correct_loss_uncond": -10.876856803894043, "incorrect_loss_uncond": -6.120184659957886}, "model_output": [{"sum_logits": -20.228925704956055, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -27.451038360595703, "logits_per_token": -5.057231426239014, "logits_per_char": -0.8795185089111328, "num_chars": 23}, {"sum_logits": -8.051466941833496, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.92832374572754, "logits_per_token": -4.025733470916748, "logits_per_char": -0.7319515401666815, "num_chars": 11}, {"sum_logits": -20.153942108154297, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -25.532169342041016, "logits_per_token": -5.038485527038574, "logits_per_char": -0.9597115289597284, "num_chars": 21}, {"sum_logits": -14.808661460876465, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.789621353149414, "logits_per_token": -7.404330730438232, "logits_per_char": -1.6454068289862738, "num_chars": 9}, {"sum_logits": -8.043156623840332, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.942595481872559, "logits_per_token": -2.6810522079467773, "logits_per_char": -0.6187043556800256, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 464, "native_id": "e145618c2062eb9ea8928fdb0d42185e", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.231205940246582, "incorrect_loss_raw": 15.549514532089233, "correct_loss_per_char": 1.1231205940246582, "incorrect_loss_per_char": 1.275030555234705, "correct_loss_per_token": 5.615602970123291, "incorrect_loss_per_token": 8.105882048606873, "correct_loss_uncond": -8.359803199768066, "incorrect_loss_uncond": -4.2211503982543945}, "model_output": [{"sum_logits": -11.231205940246582, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.59100914001465, "logits_per_token": -5.615602970123291, "logits_per_char": -1.1231205940246582, "num_chars": 10}, {"sum_logits": -24.82217788696289, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -26.71526336669922, "logits_per_token": -12.411088943481445, "logits_per_char": -1.9093982989971454, "num_chars": 13}, {"sum_logits": -7.8983259201049805, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.80385971069336, "logits_per_token": -7.8983259201049805, "logits_per_char": -0.789832592010498, "num_chars": 10}, {"sum_logits": -15.7479829788208, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -24.342464447021484, "logits_per_token": -5.249327659606934, "logits_per_char": -0.6846949121226436, "num_chars": 23}, {"sum_logits": -13.729571342468262, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.22107219696045, "logits_per_token": -6.864785671234131, "logits_per_char": -1.7161964178085327, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 465, "native_id": "35872be88df5f6c4a6600020266a5458", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.914315223693848, "incorrect_loss_raw": 8.818184733390808, "correct_loss_per_char": 0.4224510874067034, "incorrect_loss_per_char": 1.0629016191028713, "correct_loss_per_token": 2.957157611846924, "incorrect_loss_per_token": 5.704833090305328, "correct_loss_uncond": -13.996933937072754, "incorrect_loss_uncond": -7.637719511985779}, "model_output": [{"sum_logits": -16.199926376342773, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.102752685546875, "logits_per_token": -8.099963188171387, "logits_per_char": -2.0249907970428467, "num_chars": 8}, {"sum_logits": -4.582196235656738, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.474275588989258, "logits_per_token": -2.291098117828369, "logits_per_char": -0.241168222929302, "num_chars": 19}, {"sum_logits": -5.914315223693848, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.9112491607666, "logits_per_token": -2.957157611846924, "logits_per_char": -0.4224510874067034, "num_chars": 14}, {"sum_logits": -4.124690532684326, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.54778289794922, "logits_per_token": -2.062345266342163, "logits_per_char": -0.2577931582927704, "num_chars": 16}, {"sum_logits": -10.365925788879395, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.698805809020996, "logits_per_token": -10.365925788879395, "logits_per_char": -1.7276542981465657, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 466, "native_id": "055817d8d703d3c2802545e3fccdcde3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.342789649963379, "incorrect_loss_raw": 6.996744155883789, "correct_loss_per_char": 0.6203985214233398, "incorrect_loss_per_char": 0.9062895630087171, "correct_loss_per_token": 4.342789649963379, "incorrect_loss_per_token": 6.083832144737244, "correct_loss_uncond": -10.601112365722656, "incorrect_loss_uncond": -6.954878807067871}, "model_output": [{"sum_logits": -5.071165084838867, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.322620391845703, "logits_per_token": -5.071165084838867, "logits_per_char": -0.5071165084838867, "num_chars": 10}, {"sum_logits": -4.342789649963379, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.943902015686035, "logits_per_token": -4.342789649963379, "logits_per_char": -0.6203985214233398, "num_chars": 7}, {"sum_logits": -8.477389335632324, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.866536140441895, "logits_per_token": -8.477389335632324, "logits_per_char": -1.6954778671264648, "num_chars": 5}, {"sum_logits": -7.303296089172363, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.911409378051758, "logits_per_token": -3.6516480445861816, "logits_per_char": -0.9129120111465454, "num_chars": 8}, {"sum_logits": -7.135126113891602, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.705925941467285, "logits_per_token": -7.135126113891602, "logits_per_char": -0.5096518652779716, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 467, "native_id": "5ef6cdb85468df482e3aa6fa339d6e41", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.647302627563477, "incorrect_loss_raw": 10.947593212127686, "correct_loss_per_char": 0.3574848175048828, "incorrect_loss_per_char": 1.4167701105276744, "correct_loss_per_token": 2.3236513137817383, "incorrect_loss_per_token": 7.549679756164551, "correct_loss_uncond": -13.27208137512207, "incorrect_loss_uncond": -5.093225002288818}, "model_output": [{"sum_logits": -4.647302627563477, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.919384002685547, "logits_per_token": -2.3236513137817383, "logits_per_char": -0.3574848175048828, "num_chars": 13}, {"sum_logits": -8.303532600402832, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.506343841552734, "logits_per_token": -8.303532600402832, "logits_per_char": -1.6607065200805664, "num_chars": 5}, {"sum_logits": -13.879545211791992, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.610618591308594, "logits_per_token": -6.939772605895996, "logits_per_char": -0.8674715757369995, "num_chars": 16}, {"sum_logits": -8.303532600402832, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.506343841552734, "logits_per_token": -8.303532600402832, "logits_per_char": -1.6607065200805664, "num_chars": 5}, {"sum_logits": -13.303762435913086, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.539966583251953, "logits_per_token": -6.651881217956543, "logits_per_char": -1.4781958262125652, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 468, "native_id": "1e939cc6fef999953d692b57caab254b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 12.409835815429688, "incorrect_loss_raw": 9.168319940567017, "correct_loss_per_char": 0.8273223876953125, "incorrect_loss_per_char": 1.7696965654691061, "correct_loss_per_token": 6.204917907714844, "incorrect_loss_per_token": 9.168319940567017, "correct_loss_uncond": -4.582555770874023, "incorrect_loss_uncond": -5.289878845214844}, "model_output": [{"sum_logits": -11.998006820678711, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.51136589050293, "logits_per_token": -11.998006820678711, "logits_per_char": -1.999667803446452, "num_chars": 6}, {"sum_logits": -12.409835815429688, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.99239158630371, "logits_per_token": -6.204917907714844, "logits_per_char": -0.8273223876953125, "num_chars": 15}, {"sum_logits": -10.530771255493164, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.693686485290527, "logits_per_token": -10.530771255493164, "logits_per_char": -2.1061542510986326, "num_chars": 5}, {"sum_logits": -6.7579345703125, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.303086280822754, "logits_per_token": -6.7579345703125, "logits_per_char": -1.1263224283854167, "num_chars": 6}, {"sum_logits": -7.386567115783691, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.32465648651123, "logits_per_token": -7.386567115783691, "logits_per_char": -1.8466417789459229, "num_chars": 4}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 469, "native_id": "3a3b5d4a517ef70d25eb558f1a622937", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.41365909576416, "incorrect_loss_raw": 15.000098466873169, "correct_loss_per_char": 0.3103326450694691, "incorrect_loss_per_char": 2.0344838116850172, "correct_loss_per_token": 3.41365909576416, "incorrect_loss_per_token": 9.973519444465637, "correct_loss_uncond": -11.011032104492188, "incorrect_loss_uncond": 1.2022268772125244}, "model_output": [{"sum_logits": -12.137650489807129, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -12.137650489807129, "logits_per_char": -2.4275300979614256, "num_chars": 5}, {"sum_logits": -13.154511451721191, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -12.977118492126465, "logits_per_token": -6.577255725860596, "logits_per_char": -1.879215921674456, "num_chars": 7}, {"sum_logits": -20.293590545654297, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -16.695594787597656, "logits_per_token": -6.764530181884766, "logits_per_char": -2.02935905456543, "num_chars": 10}, {"sum_logits": -14.414641380310059, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -13.264044761657715, "logits_per_token": -14.414641380310059, "logits_per_char": -1.8018301725387573, "num_chars": 8}, {"sum_logits": -3.41365909576416, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -14.424691200256348, "logits_per_token": -3.41365909576416, "logits_per_char": -0.3103326450694691, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 470, "native_id": "a943522f7d407cef369d5d3f1bf48589", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.6839494705200195, "incorrect_loss_raw": 8.426496863365173, "correct_loss_per_char": 0.4519970276776482, "incorrect_loss_per_char": 0.9126127301118314, "correct_loss_per_token": 2.56131649017334, "incorrect_loss_per_token": 5.65496701002121, "correct_loss_uncond": -14.320887565612793, "incorrect_loss_uncond": -8.96933400630951}, "model_output": [{"sum_logits": -5.275083065032959, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.984527587890625, "logits_per_token": -2.6375415325164795, "logits_per_char": -0.4057756203871507, "num_chars": 13}, {"sum_logits": -3.5260133743286133, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.781972885131836, "logits_per_token": -1.7630066871643066, "logits_per_char": -0.2938344478607178, "num_chars": 12}, {"sum_logits": -7.6839494705200195, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -22.004837036132812, "logits_per_token": -2.56131649017334, "logits_per_char": -0.4519970276776482, "num_chars": 17}, {"sum_logits": -11.533748626708984, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.237130165100098, "logits_per_token": -11.533748626708984, "logits_per_char": -1.9222914377848308, "num_chars": 6}, {"sum_logits": -13.371142387390137, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.579692840576172, "logits_per_token": -6.685571193695068, "logits_per_char": -1.0285494144146259, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 471, "native_id": "57a343d72031b668e5eb91868420e915", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.67578125, "incorrect_loss_raw": 10.183778285980225, "correct_loss_per_char": 0.33386948529411764, "incorrect_loss_per_char": 1.1522909466064337, "correct_loss_per_token": 2.837890625, "incorrect_loss_per_token": 6.040627320607503, "correct_loss_uncond": -10.91299819946289, "incorrect_loss_uncond": -5.571918487548828}, "model_output": [{"sum_logits": -5.67578125, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.58877944946289, "logits_per_token": -2.837890625, "logits_per_char": -0.33386948529411764, "num_chars": 17}, {"sum_logits": -8.767247200012207, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.953621864318848, "logits_per_token": -8.767247200012207, "logits_per_char": -0.7970224727283824, "num_chars": 11}, {"sum_logits": -7.108960151672363, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.90507698059082, "logits_per_token": -7.108960151672363, "logits_per_char": -1.1848266919453938, "num_chars": 6}, {"sum_logits": -14.081425666809082, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.796571731567383, "logits_per_token": -4.693808555603027, "logits_per_char": -1.280129606073553, "num_chars": 11}, {"sum_logits": -10.777480125427246, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.36751651763916, "logits_per_token": -3.5924933751424155, "logits_per_char": -1.3471850156784058, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 472, "native_id": "c4b1a57e7880b9cb367f9c67abf5605f", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.019270181655884, "incorrect_loss_raw": 12.591125249862671, "correct_loss_per_char": 0.2524087727069855, "incorrect_loss_per_char": 1.8067317684491475, "correct_loss_per_token": 2.019270181655884, "incorrect_loss_per_token": 10.934099316596985, "correct_loss_uncond": -12.459933996200562, "incorrect_loss_uncond": -1.6118485927581787}, "model_output": [{"sum_logits": -13.256207466125488, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.972175598144531, "logits_per_token": -6.628103733062744, "logits_per_char": -2.651241493225098, "num_chars": 5}, {"sum_logits": -2.019270181655884, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -14.479204177856445, "logits_per_token": -2.019270181655884, "logits_per_char": -0.2524087727069855, "num_chars": 8}, {"sum_logits": -12.970766067504883, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.12393856048584, "logits_per_token": -12.970766067504883, "logits_per_char": -1.6213457584381104, "num_chars": 8}, {"sum_logits": -11.314550399780273, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.82158088684082, "logits_per_token": -11.314550399780273, "logits_per_char": -1.885758399963379, "num_chars": 6}, {"sum_logits": -12.822977066040039, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.894200325012207, "logits_per_token": -12.822977066040039, "logits_per_char": -1.0685814221700032, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 473, "native_id": "e313d7967f72c2b880213daaaf4b7181", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.619220733642578, "incorrect_loss_raw": 11.631567239761353, "correct_loss_per_char": 0.9728014809744698, "incorrect_loss_per_char": 1.0280834469761881, "correct_loss_per_token": 6.809610366821289, "incorrect_loss_per_token": 5.292769432067871, "correct_loss_uncond": -9.291839599609375, "incorrect_loss_uncond": -6.888664484024048}, "model_output": [{"sum_logits": -12.552340507507324, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.84388542175293, "logits_per_token": -4.184113502502441, "logits_per_char": -0.9655646544236404, "num_chars": 13}, {"sum_logits": -15.401212692260742, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.107040405273438, "logits_per_token": -7.700606346130371, "logits_per_char": -1.4001102447509766, "num_chars": 11}, {"sum_logits": -13.619220733642578, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -22.911060333251953, "logits_per_token": -6.809610366821289, "logits_per_char": -0.9728014809744698, "num_chars": 14}, {"sum_logits": -12.16739559173584, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.773880004882812, "logits_per_token": -6.08369779586792, "logits_per_char": -1.1061268719759854, "num_chars": 11}, {"sum_logits": -6.405320167541504, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.356121063232422, "logits_per_token": -3.202660083770752, "logits_per_char": -0.6405320167541504, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 474, "native_id": "3c7992df7fda23bcdeacb1f1f6b73448", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.5560052394866943, "incorrect_loss_raw": 11.530472040176392, "correct_loss_per_char": 0.12966710329055786, "incorrect_loss_per_char": 0.9616858021362678, "correct_loss_per_token": 0.7780026197433472, "incorrect_loss_per_token": 6.336493810017903, "correct_loss_uncond": -15.416397333145142, "incorrect_loss_uncond": -5.5203704833984375}, "model_output": [{"sum_logits": -16.8331356048584, "num_tokens": 3, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -18.79824447631836, "logits_per_token": -5.611045201619466, "logits_per_char": -1.2948565849891076, "num_chars": 13}, {"sum_logits": -4.4916486740112305, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -17.307884216308594, "logits_per_token": -2.2458243370056152, "logits_per_char": -0.4083316976373846, "num_chars": 11}, {"sum_logits": -1.5560052394866943, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": true, "sum_logits_uncond": -16.972402572631836, "logits_per_token": -0.7780026197433472, "logits_per_char": -0.12966710329055786, "num_chars": 12}, {"sum_logits": -10.181107521057129, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.239903450012207, "logits_per_token": -10.181107521057129, "logits_per_char": -0.9255552291870117, "num_chars": 11}, {"sum_logits": -14.615996360778809, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -17.857337951660156, "logits_per_token": -7.307998180389404, "logits_per_char": -1.2179996967315674, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 475, "native_id": "d6644eacdb543a60545d2eb1ac7e6dbd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.755427360534668, "incorrect_loss_raw": 12.383338212966919, "correct_loss_per_char": 0.792571226755778, "incorrect_loss_per_char": 1.658473708690741, "correct_loss_per_token": 2.377713680267334, "incorrect_loss_per_token": 6.442209422588348, "correct_loss_uncond": -11.169829368591309, "incorrect_loss_uncond": -3.1095902919769287}, "model_output": [{"sum_logits": -4.755427360534668, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.925256729125977, "logits_per_token": -2.377713680267334, "logits_per_char": -0.792571226755778, "num_chars": 6}, {"sum_logits": -15.52857494354248, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.619625091552734, "logits_per_token": -7.76428747177124, "logits_per_char": -2.5880958239237466, "num_chars": 6}, {"sum_logits": -10.582734107971191, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.600214004516602, "logits_per_token": -2.645683526992798, "logits_per_char": -0.8140564698439378, "num_chars": 13}, {"sum_logits": -12.094765663146973, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.835020065307617, "logits_per_token": -4.031588554382324, "logits_per_char": -1.3438628514607747, "num_chars": 9}, {"sum_logits": -11.327278137207031, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.916854858398438, "logits_per_token": -11.327278137207031, "logits_per_char": -1.8878796895345051, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 476, "native_id": "d1ad9b79f54205b6b9ac19a27f9c2be5", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.037236213684082, "incorrect_loss_raw": 7.509814143180847, "correct_loss_per_char": 0.4230124322991622, "incorrect_loss_per_char": 1.0876076678435007, "correct_loss_per_token": 4.018618106842041, "incorrect_loss_per_token": 6.75025200843811, "correct_loss_uncond": -9.437039375305176, "incorrect_loss_uncond": -7.274967551231384}, "model_output": [{"sum_logits": -6.0764970779418945, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.45522689819336, "logits_per_token": -3.0382485389709473, "logits_per_char": -0.6076497077941895, "num_chars": 10}, {"sum_logits": -9.941057205200195, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.903160095214844, "logits_per_token": -9.941057205200195, "logits_per_char": -1.656842867533366, "num_chars": 6}, {"sum_logits": -7.99740743637085, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -7.99740743637085, "logits_per_char": -1.3329012393951416, "num_chars": 6}, {"sum_logits": -6.024294853210449, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.709348678588867, "logits_per_token": -6.024294853210449, "logits_per_char": -0.7530368566513062, "num_chars": 8}, {"sum_logits": -8.037236213684082, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.474275588989258, "logits_per_token": -4.018618106842041, "logits_per_char": -0.4230124322991622, "num_chars": 19}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 477, "native_id": "f116ee6620c0f171e5db54bc03a5f2e2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.392688751220703, "incorrect_loss_raw": 14.919265270233154, "correct_loss_per_char": 0.6720626137473367, "incorrect_loss_per_char": 1.1217360631949536, "correct_loss_per_token": 3.6963443756103516, "incorrect_loss_per_token": 5.9637370109558105, "correct_loss_uncond": -10.379890441894531, "incorrect_loss_uncond": -6.289445877075195}, "model_output": [{"sum_logits": -13.253726959228516, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -21.405588150024414, "logits_per_token": -6.626863479614258, "logits_per_char": -1.0195174584021935, "num_chars": 13}, {"sum_logits": -15.663446426391602, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -24.25899887084961, "logits_per_token": -5.2211488087972, "logits_per_char": -0.8243919171785054, "num_chars": 19}, {"sum_logits": -7.392688751220703, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -17.772579193115234, "logits_per_token": -3.6963443756103516, "logits_per_char": -0.6720626137473367, "num_chars": 11}, {"sum_logits": -10.521839141845703, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.958040237426758, "logits_per_token": -5.260919570922852, "logits_per_char": -0.9565308310768821, "num_chars": 11}, {"sum_logits": -20.238048553466797, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -24.212217330932617, "logits_per_token": -6.746016184488933, "logits_per_char": -1.6865040461222331, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 478, "native_id": "ea82f9e938cbfce85fb498ce46264253", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.722146511077881, "incorrect_loss_raw": 15.688802003860474, "correct_loss_per_char": 0.42928604646162555, "incorrect_loss_per_char": 1.1023627660634978, "correct_loss_per_token": 2.3610732555389404, "incorrect_loss_per_token": 6.813574433326721, "correct_loss_uncond": -11.606813907623291, "incorrect_loss_uncond": -3.4172589778900146}, "model_output": [{"sum_logits": -18.066862106323242, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.405588150024414, "logits_per_token": -9.033431053161621, "logits_per_char": -1.3897586235633264, "num_chars": 13}, {"sum_logits": -16.49322509765625, "num_tokens": 4, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.648822784423828, "logits_per_token": -4.1233062744140625, "logits_per_char": -0.568731899919181, "num_chars": 29}, {"sum_logits": -4.722146511077881, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.328960418701172, "logits_per_token": -2.3610732555389404, "logits_per_char": -0.42928604646162555, "num_chars": 11}, {"sum_logits": -15.970566749572754, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.956235885620117, "logits_per_token": -7.985283374786377, "logits_per_char": -1.2285051345825195, "num_chars": 13}, {"sum_logits": -12.224554061889648, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.413597106933594, "logits_per_token": -6.112277030944824, "logits_per_char": -1.2224554061889648, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 479, "native_id": "edbb57ac2f476679ae547f75ec2bef3e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.575616836547852, "incorrect_loss_raw": 11.935739517211914, "correct_loss_per_char": 0.6383744557698567, "incorrect_loss_per_char": 1.228765450583564, "correct_loss_per_token": 2.393904209136963, "incorrect_loss_per_token": 5.968348264694214, "correct_loss_uncond": -9.423652648925781, "incorrect_loss_uncond": -4.02519154548645}, "model_output": [{"sum_logits": -13.574115753173828, "num_tokens": 3, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -17.665618896484375, "logits_per_token": -4.524705251057942, "logits_per_char": -1.3574115753173828, "num_chars": 10}, {"sum_logits": -9.575616836547852, "num_tokens": 4, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -18.999269485473633, "logits_per_token": -2.393904209136963, "logits_per_char": -0.6383744557698567, "num_chars": 15}, {"sum_logits": -12.728527069091797, "num_tokens": 3, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -15.288544654846191, "logits_per_token": -4.242842356363933, "logits_per_char": -1.2728527069091797, "num_chars": 10}, {"sum_logits": -12.668939590454102, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.08570098876953, "logits_per_token": -6.334469795227051, "logits_per_char": -1.4076599544949002, "num_chars": 9}, {"sum_logits": -8.77137565612793, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -13.80385971069336, "logits_per_token": -8.77137565612793, "logits_per_char": -0.877137565612793, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 480, "native_id": "07a99d5f2ca7028febeb9f09604b36c8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.5533859729766846, "incorrect_loss_raw": 12.108452320098877, "correct_loss_per_char": 0.5922309954961141, "incorrect_loss_per_char": 1.4538103208397375, "correct_loss_per_token": 3.5533859729766846, "incorrect_loss_per_token": 7.9932825565338135, "correct_loss_uncond": -10.707549810409546, "incorrect_loss_uncond": -5.634830713272095}, "model_output": [{"sum_logits": -9.074548721313477, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.508426666259766, "logits_per_token": -4.537274360656738, "logits_per_char": -1.8149097442626954, "num_chars": 5}, {"sum_logits": -3.5533859729766846, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.26093578338623, "logits_per_token": -3.5533859729766846, "logits_per_char": -0.5922309954961141, "num_chars": 6}, {"sum_logits": -23.84680938720703, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -25.33893585205078, "logits_per_token": -11.923404693603516, "logits_per_char": -2.6496454874674478, "num_chars": 9}, {"sum_logits": -7.858855247497559, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -7.858855247497559, "logits_per_char": -0.6549046039581299, "num_chars": 12}, {"sum_logits": -7.653595924377441, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.968234062194824, "logits_per_token": -7.653595924377441, "logits_per_char": -0.6957814476706765, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 481, "native_id": "b42ef8be1748c19fa5938de5396f8fad", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.57286548614502, "incorrect_loss_raw": 9.595797061920166, "correct_loss_per_char": 0.5042862050673541, "incorrect_loss_per_char": 0.961834535815499, "correct_loss_per_token": 2.8576218287150064, "incorrect_loss_per_token": 7.860806941986084, "correct_loss_uncond": -10.336686134338379, "incorrect_loss_uncond": -6.451875925064087}, "model_output": [{"sum_logits": -10.070539474487305, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.330537796020508, "logits_per_token": -10.070539474487305, "logits_per_char": -0.915503588589755, "num_chars": 11}, {"sum_logits": -8.57286548614502, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.9095516204834, "logits_per_token": -2.8576218287150064, "logits_per_char": -0.5042862050673541, "num_chars": 17}, {"sum_logits": -7.1914777755737305, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.098298072814941, "logits_per_token": -7.1914777755737305, "logits_per_char": -0.8989347219467163, "num_chars": 8}, {"sum_logits": -10.409940719604492, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.276731491088867, "logits_per_token": -3.469980239868164, "logits_per_char": -0.6939960479736328, "num_chars": 15}, {"sum_logits": -10.711230278015137, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.485124588012695, "logits_per_token": -10.711230278015137, "logits_per_char": -1.338903784751892, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 482, "native_id": "236691d38665d7bcdd0c9b9834252a51", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.606480121612549, "incorrect_loss_raw": 5.722773492336273, "correct_loss_per_char": 0.5152114459446498, "incorrect_loss_per_char": 0.9042493246850514, "correct_loss_per_token": 3.606480121612549, "incorrect_loss_per_token": 5.722773492336273, "correct_loss_uncond": -8.226150035858154, "incorrect_loss_uncond": -6.635479986667633}, "model_output": [{"sum_logits": -2.490269422531128, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.187535285949707, "logits_per_token": -2.490269422531128, "logits_per_char": -0.2766966025034587, "num_chars": 9}, {"sum_logits": -7.453200817108154, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.245282173156738, "logits_per_token": -7.453200817108154, "logits_per_char": -1.4906401634216309, "num_chars": 5}, {"sum_logits": -4.066932201385498, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.636459350585938, "logits_per_token": -4.066932201385498, "logits_per_char": -0.5809903144836426, "num_chars": 7}, {"sum_logits": -3.606480121612549, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.832630157470703, "logits_per_token": -3.606480121612549, "logits_per_char": -0.5152114459446498, "num_chars": 7}, {"sum_logits": -8.880691528320312, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.363737106323242, "logits_per_token": -8.880691528320312, "logits_per_char": -1.2686702183314733, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 483, "native_id": "8ef78abb86fc282ccb02bbc495f13030", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.692065715789795, "incorrect_loss_raw": 12.096365451812744, "correct_loss_per_char": 0.19229040827069963, "incorrect_loss_per_char": 1.235538363456726, "correct_loss_per_token": 2.692065715789795, "incorrect_loss_per_token": 9.14131236076355, "correct_loss_uncond": -12.01386022567749, "incorrect_loss_uncond": -3.246951103210449}, "model_output": [{"sum_logits": -6.927093505859375, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.911409378051758, "logits_per_token": -3.4635467529296875, "logits_per_char": -0.8658866882324219, "num_chars": 8}, {"sum_logits": -16.71333122253418, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.556068420410156, "logits_per_token": -8.35666561126709, "logits_per_char": -0.9831371307373047, "num_chars": 17}, {"sum_logits": -11.587250709533691, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.703721046447754, "logits_per_token": -11.587250709533691, "logits_per_char": -1.4484063386917114, "num_chars": 8}, {"sum_logits": -2.692065715789795, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.705925941467285, "logits_per_token": -2.692065715789795, "logits_per_char": -0.19229040827069963, "num_chars": 14}, {"sum_logits": -13.15778636932373, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.202067375183105, "logits_per_token": -13.15778636932373, "logits_per_char": -1.6447232961654663, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 484, "native_id": "313d033c33ec475e04e628f87c5686bd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.022496223449707, "incorrect_loss_raw": 15.446174502372742, "correct_loss_per_char": 0.6171150941115159, "incorrect_loss_per_char": 1.3842961918740047, "correct_loss_per_token": 2.0056240558624268, "incorrect_loss_per_token": 7.723087251186371, "correct_loss_uncond": -8.903088569641113, "incorrect_loss_uncond": -3.085366129875183}, "model_output": [{"sum_logits": -6.75958776473999, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.51654052734375, "logits_per_token": -3.379793882369995, "logits_per_char": -0.675958776473999, "num_chars": 10}, {"sum_logits": -21.282787322998047, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -22.158645629882812, "logits_per_token": -10.641393661499023, "logits_per_char": -1.7735656102498372, "num_chars": 12}, {"sum_logits": -8.022496223449707, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.92558479309082, "logits_per_token": -2.0056240558624268, "logits_per_char": -0.6171150941115159, "num_chars": 13}, {"sum_logits": -21.5621337890625, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.47633171081543, "logits_per_token": -10.78106689453125, "logits_per_char": -1.3476333618164062, "num_chars": 16}, {"sum_logits": -12.18018913269043, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.974644660949707, "logits_per_token": -6.090094566345215, "logits_per_char": -1.7400270189557756, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 485, "native_id": "d581e0ad6a4c89465dc1a527bd2d3f77", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.3313307762145996, "incorrect_loss_raw": 8.371495246887207, "correct_loss_per_char": 0.33313307762145994, "incorrect_loss_per_char": 1.0115716123993779, "correct_loss_per_token": 3.3313307762145996, "incorrect_loss_per_token": 8.371495246887207, "correct_loss_uncond": -9.209826946258545, "incorrect_loss_uncond": -6.071969032287598}, "model_output": [{"sum_logits": -11.058121681213379, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.056010246276855, "logits_per_token": -11.058121681213379, "logits_per_char": -0.7898658343723842, "num_chars": 14}, {"sum_logits": -3.701411247253418, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.741337776184082, "logits_per_token": -3.701411247253418, "logits_per_char": -0.4112679163614909, "num_chars": 9}, {"sum_logits": -10.47519302368164, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.566081047058105, "logits_per_token": -10.47519302368164, "logits_per_char": -2.0950386047363283, "num_chars": 5}, {"sum_logits": -8.25125503540039, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.410428047180176, "logits_per_token": -8.25125503540039, "logits_per_char": -0.7501140941273082, "num_chars": 11}, {"sum_logits": -3.3313307762145996, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.541157722473145, "logits_per_token": -3.3313307762145996, "logits_per_char": -0.33313307762145994, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 486, "native_id": "f232bfea2a7611999688a252e476c040", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.8206489086151123, "incorrect_loss_raw": 9.540544509887695, "correct_loss_per_char": 0.31340543429056805, "incorrect_loss_per_char": 1.4859108236100937, "correct_loss_per_token": 2.8206489086151123, "incorrect_loss_per_token": 8.761947393417358, "correct_loss_uncond": -9.442968606948853, "incorrect_loss_uncond": -4.171031951904297}, "model_output": [{"sum_logits": -8.895930290222168, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -8.895930290222168, "logits_per_char": -1.7791860580444336, "num_chars": 5}, {"sum_logits": -2.8206489086151123, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": true, "sum_logits_uncond": -12.263617515563965, "logits_per_token": -2.8206489086151123, "logits_per_char": -0.31340543429056805, "num_chars": 9}, {"sum_logits": -11.686238288879395, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -11.725452423095703, "logits_per_token": -11.686238288879395, "logits_per_char": -2.3372476577758787, "num_chars": 5}, {"sum_logits": -11.351232528686523, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.1304292678833, "logits_per_token": -11.351232528686523, "logits_per_char": -1.1351232528686523, "num_chars": 10}, {"sum_logits": -6.228776931762695, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.162302017211914, "logits_per_token": -3.1143884658813477, "logits_per_char": -0.6920863257514106, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 487, "native_id": "91756d8e475d8d59fa0a4e35f408e366", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.01810359954834, "incorrect_loss_raw": 8.664551615715027, "correct_loss_per_char": 0.50301726659139, "incorrect_loss_per_char": 1.3953199704488117, "correct_loss_per_token": 3.01810359954834, "incorrect_loss_per_token": 6.755721688270569, "correct_loss_uncond": -7.315556526184082, "incorrect_loss_uncond": -5.270119547843933}, "model_output": [{"sum_logits": -15.270639419555664, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.85593605041504, "logits_per_token": -7.635319709777832, "logits_per_char": -1.908829927444458, "num_chars": 8}, {"sum_logits": -4.7536211013793945, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.326245307922363, "logits_per_token": -4.7536211013793945, "logits_per_char": -0.9507242202758789, "num_chars": 5}, {"sum_logits": -8.48204231262207, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.807665824890137, "logits_per_token": -8.48204231262207, "logits_per_char": -1.696408462524414, "num_chars": 5}, {"sum_logits": -3.01810359954834, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -10.333660125732422, "logits_per_token": -3.01810359954834, "logits_per_char": -0.50301726659139, "num_chars": 6}, {"sum_logits": -6.1519036293029785, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.7488374710083, "logits_per_token": -6.1519036293029785, "logits_per_char": -1.0253172715504963, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 488, "native_id": "866ea9c668c0b42df19fa20865e31f77", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.8389626741409302, "incorrect_loss_raw": 16.33946967124939, "correct_loss_per_char": 0.0932180749045478, "incorrect_loss_per_char": 1.4127730809006038, "correct_loss_per_token": 0.8389626741409302, "incorrect_loss_per_token": 9.573446035385132, "correct_loss_uncond": -14.147517085075378, "incorrect_loss_uncond": -2.3589437007904053}, "model_output": [{"sum_logits": -10.58652114868164, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.307762145996094, "logits_per_token": -10.58652114868164, "logits_per_char": -1.323315143585205, "num_chars": 8}, {"sum_logits": -10.255011558532715, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.77853012084961, "logits_per_token": -10.255011558532715, "logits_per_char": -1.7091685930887859, "num_chars": 6}, {"sum_logits": -15.680816650390625, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.797832489013672, "logits_per_token": -7.8404083251953125, "logits_per_char": -0.9224009794347426, "num_chars": 17}, {"sum_logits": -0.8389626741409302, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": true, "sum_logits_uncond": -14.986479759216309, "logits_per_token": -0.8389626741409302, "logits_per_char": -0.0932180749045478, "num_chars": 9}, {"sum_logits": -28.835529327392578, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -27.909528732299805, "logits_per_token": -9.61184310913086, "logits_per_char": -1.6962076074936812, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 489, "native_id": "22015315e7ff79386877828b4fa27799", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.686471462249756, "incorrect_loss_raw": 10.51121473312378, "correct_loss_per_char": 0.3072059551874797, "incorrect_loss_per_char": 1.7676300730024068, "correct_loss_per_token": 1.843235731124878, "incorrect_loss_per_token": 8.293722152709961, "correct_loss_uncond": -12.97179651260376, "incorrect_loss_uncond": -3.8119924068450928}, "model_output": [{"sum_logits": -17.739940643310547, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.308828353881836, "logits_per_token": -8.869970321655273, "logits_per_char": -2.5342772347586497, "num_chars": 7}, {"sum_logits": -10.020452499389648, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.693686485290527, "logits_per_token": -10.020452499389648, "logits_per_char": -2.00409049987793, "num_chars": 5}, {"sum_logits": -9.742218017578125, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -9.742218017578125, "logits_per_char": -1.6237030029296875, "num_chars": 6}, {"sum_logits": -3.686471462249756, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.658267974853516, "logits_per_token": -1.843235731124878, "logits_per_char": -0.3072059551874797, "num_chars": 12}, {"sum_logits": -4.542247772216797, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.400660514831543, "logits_per_token": -4.542247772216797, "logits_per_char": -0.9084495544433594, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 490, "native_id": "484f6e4fb8e6431b010c299490b72e3c", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.116013765335083, "incorrect_loss_raw": 14.284478664398193, "correct_loss_per_char": 0.3116013765335083, "incorrect_loss_per_char": 1.598005837864346, "correct_loss_per_token": 1.5580068826675415, "incorrect_loss_per_token": 12.07666540145874, "correct_loss_uncond": -13.843638181686401, "incorrect_loss_uncond": -0.22604131698608398}, "model_output": [{"sum_logits": -17.252368927001953, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.202067375183105, "logits_per_token": -17.252368927001953, "logits_per_char": -2.156546115875244, "num_chars": 8}, {"sum_logits": -3.116013765335083, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.959651947021484, "logits_per_token": -1.5580068826675415, "logits_per_char": -0.3116013765335083, "num_chars": 10}, {"sum_logits": -17.662506103515625, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.571216583251953, "logits_per_token": -8.831253051757812, "logits_per_char": -1.7662506103515625, "num_chars": 10}, {"sum_logits": -12.181201934814453, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.090190887451172, "logits_per_token": -12.181201934814453, "logits_per_char": -1.3534668816460504, "num_chars": 9}, {"sum_logits": -10.041837692260742, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.178605079650879, "logits_per_token": -10.041837692260742, "logits_per_char": -1.115759743584527, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 491, "native_id": "7322d0dcf2e27c7032626a3639f5696b", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.9998050928115845, "incorrect_loss_raw": 13.686284065246582, "correct_loss_per_char": 0.3999610185623169, "incorrect_loss_per_char": 1.4884309794479633, "correct_loss_per_token": 1.9998050928115845, "incorrect_loss_per_token": 7.462425669034322, "correct_loss_uncond": -11.715312361717224, "incorrect_loss_uncond": -2.1212215423583984}, "model_output": [{"sum_logits": -11.321194648742676, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -11.321194648742676, "logits_per_char": -1.886865774790446, "num_chars": 6}, {"sum_logits": -19.10077667236328, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.509748458862305, "logits_per_token": -6.366925557454427, "logits_per_char": -1.7364342429421165, "num_chars": 11}, {"sum_logits": -16.020395278930664, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.855976104736328, "logits_per_token": -8.010197639465332, "logits_per_char": -1.1443139484950475, "num_chars": 14}, {"sum_logits": -8.302769660949707, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.974644660949707, "logits_per_token": -4.1513848304748535, "logits_per_char": -1.186109951564244, "num_chars": 7}, {"sum_logits": -1.9998050928115845, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.715117454528809, "logits_per_token": -1.9998050928115845, "logits_per_char": -0.3999610185623169, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 492, "native_id": "0519b0b0869681c2884f53dbfa43e538", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.305108070373535, "incorrect_loss_raw": 9.298734426498413, "correct_loss_per_char": 0.478345341152615, "incorrect_loss_per_char": 0.9207891239060296, "correct_loss_per_token": 2.1525540351867676, "incorrect_loss_per_token": 4.1793595155080165, "correct_loss_uncond": -10.84899616241455, "incorrect_loss_uncond": -7.875620365142822}, "model_output": [{"sum_logits": -9.654760360717773, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.102752685546875, "logits_per_token": -4.827380180358887, "logits_per_char": -1.2068450450897217, "num_chars": 8}, {"sum_logits": -7.413003921508789, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.959651947021484, "logits_per_token": -3.7065019607543945, "logits_per_char": -0.741300392150879, "num_chars": 10}, {"sum_logits": -4.305108070373535, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.154104232788086, "logits_per_token": -2.1525540351867676, "logits_per_char": -0.478345341152615, "num_chars": 9}, {"sum_logits": -11.280184745788574, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -21.327354431152344, "logits_per_token": -3.760061581929525, "logits_per_char": -0.7520123163859049, "num_chars": 15}, {"sum_logits": -8.846988677978516, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.307660102844238, "logits_per_token": -4.423494338989258, "logits_per_char": -0.9829987419976128, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 493, "native_id": "1ab04c0501b815b2a48f2581f04215a8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.4254732131958, "incorrect_loss_raw": 7.026169657707214, "correct_loss_per_char": 0.78545610109965, "incorrect_loss_per_char": 0.9155198632896722, "correct_loss_per_token": 4.7127366065979, "incorrect_loss_per_token": 4.289736807346344, "correct_loss_uncond": -11.97455883026123, "incorrect_loss_uncond": -10.10788357257843}, "model_output": [{"sum_logits": -6.2132158279418945, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.10049819946289, "logits_per_token": -6.2132158279418945, "logits_per_char": -1.242643165588379, "num_chars": 5}, {"sum_logits": -5.317952632904053, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.692176818847656, "logits_per_token": -2.6589763164520264, "logits_per_char": -0.4834502393549139, "num_chars": 11}, {"sum_logits": -9.4254732131958, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.40003204345703, "logits_per_token": -4.7127366065979, "logits_per_char": -0.78545610109965, "num_chars": 12}, {"sum_logits": -8.30942153930664, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.60676383972168, "logits_per_token": -4.15471076965332, "logits_per_char": -0.7554019581187855, "num_chars": 11}, {"sum_logits": -8.26408863067627, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.13677406311035, "logits_per_token": -4.132044315338135, "logits_per_char": -1.18058409009661, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 494, "native_id": "7776b10c7bb96f3fe5e026678673634d", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.382013320922852, "incorrect_loss_raw": 10.06401014328003, "correct_loss_per_char": 0.6260019029889788, "incorrect_loss_per_char": 1.1949766823739716, "correct_loss_per_token": 4.382013320922852, "incorrect_loss_per_token": 7.418562054634094, "correct_loss_uncond": -9.296921730041504, "incorrect_loss_uncond": -4.099060773849487}, "model_output": [{"sum_logits": -8.500059127807617, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.10049819946289, "logits_per_token": -8.500059127807617, "logits_per_char": -1.7000118255615235, "num_chars": 5}, {"sum_logits": -4.382013320922852, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.678935050964355, "logits_per_token": -4.382013320922852, "logits_per_char": -0.6260019029889788, "num_chars": 7}, {"sum_logits": -10.59239673614502, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.08215045928955, "logits_per_token": -10.59239673614502, "logits_per_char": -0.9629451578313654, "num_chars": 11}, {"sum_logits": -11.610703468322754, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.697437286376953, "logits_per_token": -5.805351734161377, "logits_per_char": -1.0555184971202503, "num_chars": 11}, {"sum_logits": -9.552881240844727, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.772197723388672, "logits_per_token": -4.776440620422363, "logits_per_char": -1.0614312489827473, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 495, "native_id": "f7c005244d406b9bde48dc8c22003af1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.8105388879776, "incorrect_loss_raw": 7.08232057094574, "correct_loss_per_char": 0.18105388879776002, "incorrect_loss_per_char": 0.9811077592464594, "correct_loss_per_token": 1.8105388879776, "incorrect_loss_per_token": 5.525548100471497, "correct_loss_uncond": -12.727456450462341, "incorrect_loss_uncond": -8.432997345924377}, "model_output": [{"sum_logits": -1.8105388879776, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -14.537995338439941, "logits_per_token": -1.8105388879776, "logits_per_char": -0.18105388879776002, "num_chars": 10}, {"sum_logits": -6.6569743156433105, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.52684497833252, "logits_per_token": -6.6569743156433105, "logits_per_char": -0.8321217894554138, "num_chars": 8}, {"sum_logits": -5.448724746704102, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.12881088256836, "logits_per_token": -2.724362373352051, "logits_per_char": -0.41913267282339245, "num_chars": 13}, {"sum_logits": -7.005455017089844, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.040109634399414, "logits_per_token": -3.502727508544922, "logits_per_char": -1.751363754272461, "num_chars": 4}, {"sum_logits": -9.218128204345703, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.365506172180176, "logits_per_token": -9.218128204345703, "logits_per_char": -0.9218128204345704, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 496, "native_id": "88501d528c855e2b533b3fea2f86183d", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.39047384262085, "incorrect_loss_raw": 11.128238439559937, "correct_loss_per_char": 0.5809521675109863, "incorrect_loss_per_char": 1.2289073969188489, "correct_loss_per_token": 3.195236921310425, "incorrect_loss_per_token": 7.6019275188446045, "correct_loss_uncond": -12.31109094619751, "incorrect_loss_uncond": -5.615758895874023}, "model_output": [{"sum_logits": -10.782941818237305, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -10.782941818237305, "logits_per_char": -1.7971569697062175, "num_chars": 6}, {"sum_logits": -18.806991577148438, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -27.030471801757812, "logits_per_token": -4.701747894287109, "logits_per_char": -0.989841661955181, "num_chars": 19}, {"sum_logits": -8.604938507080078, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.709348678588867, "logits_per_token": -8.604938507080078, "logits_per_char": -1.0756173133850098, "num_chars": 8}, {"sum_logits": -6.39047384262085, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.70156478881836, "logits_per_token": -3.195236921310425, "logits_per_char": -0.5809521675109863, "num_chars": 11}, {"sum_logits": -6.318081855773926, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.164777755737305, "logits_per_token": -6.318081855773926, "logits_per_char": -1.0530136426289876, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 497, "native_id": "3d9c3253e24fb108cea9083e8a853cf2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.75053071975708, "incorrect_loss_raw": 9.798957228660583, "correct_loss_per_char": 0.7501061439514161, "incorrect_loss_per_char": 1.3621483251215918, "correct_loss_per_token": 3.75053071975708, "incorrect_loss_per_token": 7.938843667507172, "correct_loss_uncond": -10.07759141921997, "incorrect_loss_uncond": -5.323896050453186}, "model_output": [{"sum_logits": -7.159262180328369, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -18.17926597595215, "logits_per_token": -3.5796310901641846, "logits_per_char": -0.5966051816940308, "num_chars": 12}, {"sum_logits": -12.803674697875977, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.75673770904541, "logits_per_token": -12.803674697875977, "logits_per_char": -1.4226305219862196, "num_chars": 9}, {"sum_logits": -7.721646308898926, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.714458465576172, "logits_per_token": -3.860823154449463, "logits_per_char": -0.5515461649213519, "num_chars": 14}, {"sum_logits": -3.75053071975708, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -3.75053071975708, "logits_per_char": -0.7501061439514161, "num_chars": 5}, {"sum_logits": -11.511245727539062, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -11.840950965881348, "logits_per_token": -11.511245727539062, "logits_per_char": -2.8778114318847656, "num_chars": 4}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 498, "native_id": "9808782b2e2e1bfbfa27c41e605bfffe", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.194124221801758, "incorrect_loss_raw": 9.23633897304535, "correct_loss_per_char": 1.865687370300293, "incorrect_loss_per_char": 1.487825993413017, "correct_loss_per_token": 5.597062110900879, "incorrect_loss_per_token": 8.245225608348846, "correct_loss_uncond": -1.1735868453979492, "incorrect_loss_uncond": -4.881550908088684}, "model_output": [{"sum_logits": -8.97274112701416, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.858729362487793, "logits_per_token": -8.97274112701416, "logits_per_char": -1.2818201610020228, "num_chars": 7}, {"sum_logits": -11.408778190612793, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.328857421875, "logits_per_token": -11.408778190612793, "logits_per_char": -2.2817556381225588, "num_chars": 5}, {"sum_logits": -11.194124221801758, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.367711067199707, "logits_per_token": -5.597062110900879, "logits_per_char": -1.865687370300293, "num_chars": 6}, {"sum_logits": -8.634929656982422, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.533038139343262, "logits_per_token": -8.634929656982422, "logits_per_char": -1.7269859313964844, "num_chars": 5}, {"sum_logits": -7.9289069175720215, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.750934600830078, "logits_per_token": -3.9644534587860107, "logits_per_char": -0.6607422431310018, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 499, "native_id": "c432b860fcd7297751ff5254ec4a7956", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.8744170665740967, "incorrect_loss_raw": 11.442999601364136, "correct_loss_per_char": 0.9686042666435242, "incorrect_loss_per_char": 1.4952646957503424, "correct_loss_per_token": 3.8744170665740967, "incorrect_loss_per_token": 8.217975060145061, "correct_loss_uncond": -10.538999795913696, "incorrect_loss_uncond": -3.650817394256592}, "model_output": [{"sum_logits": -3.8744170665740967, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.413416862487793, "logits_per_token": -3.8744170665740967, "logits_per_char": -0.9686042666435242, "num_chars": 4}, {"sum_logits": -8.510055541992188, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.509245872497559, "logits_per_token": -8.510055541992188, "logits_per_char": -1.7020111083984375, "num_chars": 5}, {"sum_logits": -7.191636085510254, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.898821830749512, "logits_per_token": -7.191636085510254, "logits_per_char": -1.7979090213775635, "num_chars": 4}, {"sum_logits": -19.350147247314453, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.074176788330078, "logits_per_token": -6.450049082438151, "logits_per_char": -1.2900098164876301, "num_chars": 15}, {"sum_logits": -10.720159530639648, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.893023490905762, "logits_per_token": -10.720159530639648, "logits_per_char": -1.1911288367377386, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 500, "native_id": "732af155f677a51d05d0c9e080d598b6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.434891700744629, "incorrect_loss_raw": 9.711720705032349, "correct_loss_per_char": 0.7764131001063755, "incorrect_loss_per_char": 1.6219858305794852, "correct_loss_per_token": 5.434891700744629, "incorrect_loss_per_token": 9.711720705032349, "correct_loss_uncond": -7.530050277709961, "incorrect_loss_uncond": -3.1498146057128906}, "model_output": [{"sum_logits": -8.945494651794434, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.669677734375, "logits_per_token": -8.945494651794434, "logits_per_char": -1.7890989303588867, "num_chars": 5}, {"sum_logits": -12.163074493408203, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.188199996948242, "logits_per_token": -12.163074493408203, "logits_per_char": -1.737582070486886, "num_chars": 7}, {"sum_logits": -9.822107315063477, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -9.822107315063477, "logits_per_char": -0.9822107315063476, "num_chars": 10}, {"sum_logits": -5.434891700744629, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.96494197845459, "logits_per_token": -5.434891700744629, "logits_per_char": -0.7764131001063755, "num_chars": 7}, {"sum_logits": -7.916206359863281, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -10.853755950927734, "logits_per_token": -7.916206359863281, "logits_per_char": -1.9790515899658203, "num_chars": 4}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 501, "native_id": "48abc2c113623fd72f758502529f93a5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.237739562988281, "incorrect_loss_raw": 12.585090398788452, "correct_loss_per_char": 0.6022199742934283, "incorrect_loss_per_char": 0.8694239255690599, "correct_loss_per_token": 3.4125798543294272, "incorrect_loss_per_token": 7.464292883872986, "correct_loss_uncond": -12.07761001586914, "incorrect_loss_uncond": -4.118885517120361}, "model_output": [{"sum_logits": -9.373981475830078, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.98448371887207, "logits_per_token": -9.373981475830078, "logits_per_char": -1.0415534973144531, "num_chars": 9}, {"sum_logits": -14.234275817871094, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.91391372680664, "logits_per_token": -7.117137908935547, "logits_per_char": -0.8373103422277114, "num_chars": 17}, {"sum_logits": -10.237739562988281, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -22.315349578857422, "logits_per_token": -3.4125798543294272, "logits_per_char": -0.6022199742934283, "num_chars": 17}, {"sum_logits": -13.671379089355469, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.115376472473145, "logits_per_token": -6.835689544677734, "logits_per_char": -0.9114252726236979, "num_chars": 15}, {"sum_logits": -13.060725212097168, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.8021297454834, "logits_per_token": -6.530362606048584, "logits_per_char": -0.6874065901103773, "num_chars": 19}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 502, "native_id": "03f06f77aaf80b5f5e296ffbd11e9d82", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.849344253540039, "incorrect_loss_raw": 12.201582670211792, "correct_loss_per_char": 0.34994038668545807, "incorrect_loss_per_char": 1.3005110166289588, "correct_loss_per_token": 3.849344253540039, "incorrect_loss_per_token": 7.930401921272278, "correct_loss_uncond": -10.71279525756836, "incorrect_loss_uncond": -3.9908173084259033}, "model_output": [{"sum_logits": -15.1426420211792, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.074176788330078, "logits_per_token": -5.047547340393066, "logits_per_char": -1.0095094680786132, "num_chars": 15}, {"sum_logits": -3.849344253540039, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.562139511108398, "logits_per_token": -3.849344253540039, "logits_per_char": -0.34994038668545807, "num_chars": 11}, {"sum_logits": -13.979256629943848, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.055910110473633, "logits_per_token": -6.989628314971924, "logits_per_char": -1.2708415118130771, "num_chars": 11}, {"sum_logits": -8.617094039916992, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.749859809875488, "logits_per_token": -8.617094039916992, "logits_per_char": -1.077136754989624, "num_chars": 8}, {"sum_logits": -11.067337989807129, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -11.067337989807129, "logits_per_char": -1.8445563316345215, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 503, "native_id": "e7084c166ec67d0f983a26e055e845c6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 2.038731098175049, "incorrect_loss_raw": 4.713871330022812, "correct_loss_per_char": 0.3397885163625081, "incorrect_loss_per_char": 0.6966418509682019, "correct_loss_per_token": 2.038731098175049, "incorrect_loss_per_token": 3.8250183761119843, "correct_loss_uncond": -10.032660007476807, "incorrect_loss_uncond": -8.985107809305191}, "model_output": [{"sum_logits": -6.245998382568359, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -13.289554595947266, "logits_per_token": -6.245998382568359, "logits_per_char": -1.0409997304280598, "num_chars": 6}, {"sum_logits": -1.4164971113204956, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": true, "sum_logits_uncond": -11.840950965881348, "logits_per_token": -1.4164971113204956, "logits_per_char": -0.3541242778301239, "num_chars": 4}, {"sum_logits": -2.038731098175049, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -2.038731098175049, "logits_per_char": -0.3397885163625081, "num_chars": 6}, {"sum_logits": -4.0821661949157715, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -4.0821661949157715, "logits_per_char": -0.6803610324859619, "num_chars": 6}, {"sum_logits": -7.110823631286621, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -19.776418685913086, "logits_per_token": -3.5554118156433105, "logits_per_char": -0.7110823631286621, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 504, "native_id": "c55c31b5a2aa996f3b75ad88c017a6b9", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.073536396026611, "incorrect_loss_raw": 7.819675445556641, "correct_loss_per_char": 0.5091920495033264, "incorrect_loss_per_char": 1.2054068984407367, "correct_loss_per_token": 4.073536396026611, "incorrect_loss_per_token": 6.413380940755208, "correct_loss_uncond": -9.97176218032837, "incorrect_loss_uncond": -6.902040481567383}, "model_output": [{"sum_logits": -7.432062149047852, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.800455093383789, "logits_per_token": -7.432062149047852, "logits_per_char": -1.4864124298095702, "num_chars": 5}, {"sum_logits": -4.073536396026611, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.04529857635498, "logits_per_token": -4.073536396026611, "logits_per_char": -0.5091920495033264, "num_chars": 8}, {"sum_logits": -8.777359008789062, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.882275581359863, "logits_per_token": -8.777359008789062, "logits_per_char": -1.4628931681315105, "num_chars": 6}, {"sum_logits": -8.437767028808594, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.822994232177734, "logits_per_token": -2.8125890096028647, "logits_per_char": -0.7670697298916903, "num_chars": 11}, {"sum_logits": -6.631513595581055, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.381138801574707, "logits_per_token": -6.631513595581055, "logits_per_char": -1.1052522659301758, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 505, "native_id": "463521a93ae71e93bea8b97cdf7a6792", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.522525787353516, "incorrect_loss_raw": 13.505427837371826, "correct_loss_per_char": 1.587087631225586, "incorrect_loss_per_char": 1.166178133787015, "correct_loss_per_token": 9.522525787353516, "incorrect_loss_per_token": 8.834120273590088, "correct_loss_uncond": -4.738409996032715, "incorrect_loss_uncond": -2.329124689102173}, "model_output": [{"sum_logits": -9.954957008361816, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.071504592895508, "logits_per_token": -9.954957008361816, "logits_per_char": -1.4221367154802596, "num_chars": 7}, {"sum_logits": -11.092500686645508, "num_tokens": 3, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -19.408584594726562, "logits_per_token": -3.697500228881836, "logits_per_char": -0.7923214776175362, "num_chars": 14}, {"sum_logits": -10.393794059753418, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -12.349780082702637, "logits_per_token": -10.393794059753418, "logits_per_char": -0.9448903690684926, "num_chars": 11}, {"sum_logits": -9.522525787353516, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.26093578338623, "logits_per_token": -9.522525787353516, "logits_per_char": -1.587087631225586, "num_chars": 6}, {"sum_logits": -22.580459594726562, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -17.50834083557129, "logits_per_token": -11.290229797363281, "logits_per_char": -1.505363972981771, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 506, "native_id": "c036ce033bc429ac1aba0a6ac8d057e1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.620301246643066, "incorrect_loss_raw": 12.582959413528442, "correct_loss_per_char": 0.8029001780918666, "incorrect_loss_per_char": 1.4173114256425337, "correct_loss_per_token": 5.620301246643066, "incorrect_loss_per_token": 8.894476652145386, "correct_loss_uncond": -7.469593048095703, "incorrect_loss_uncond": -2.5340683460235596}, "model_output": [{"sum_logits": -8.428388595581055, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.682783126831055, "logits_per_token": -8.428388595581055, "logits_per_char": -1.685677719116211, "num_chars": 5}, {"sum_logits": -5.620301246643066, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.08989429473877, "logits_per_token": -5.620301246643066, "logits_per_char": -0.8029001780918666, "num_chars": 7}, {"sum_logits": -12.395586967468262, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.098958015441895, "logits_per_token": -12.395586967468262, "logits_per_char": -1.1268715424971147, "num_chars": 11}, {"sum_logits": -19.157987594604492, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.52203941345215, "logits_per_token": -9.578993797302246, "logits_per_char": -1.9157987594604493, "num_chars": 10}, {"sum_logits": -10.349874496459961, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.16433048248291, "logits_per_token": -5.1749372482299805, "logits_per_char": -0.9408976814963601, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 507, "native_id": "db7f2bfdabcf53d6778fd7af80b603d2", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 3.613567352294922, "incorrect_loss_raw": 9.09420394897461, "correct_loss_per_char": 0.3613567352294922, "incorrect_loss_per_char": 0.948403942679602, "correct_loss_per_token": 1.806783676147461, "incorrect_loss_per_token": 6.350353002548218, "correct_loss_uncond": -12.163641929626465, "incorrect_loss_uncond": -7.766614675521851}, "model_output": [{"sum_logits": -3.613567352294922, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.777209281921387, "logits_per_token": -1.806783676147461, "logits_per_char": -0.3613567352294922, "num_chars": 10}, {"sum_logits": -12.353398323059082, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.001758575439453, "logits_per_token": -12.353398323059082, "logits_per_char": -1.7647711890084403, "num_chars": 7}, {"sum_logits": -12.045784950256348, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.01970863342285, "logits_per_token": -6.022892475128174, "logits_per_char": -0.6692102750142416, "num_chars": 18}, {"sum_logits": -9.905022621154785, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.398252487182617, "logits_per_token": -4.952511310577393, "logits_per_char": -1.1005580690171983, "num_chars": 9}, {"sum_logits": -2.0726099014282227, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -13.023554801940918, "logits_per_token": -2.0726099014282227, "logits_per_char": -0.25907623767852783, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 508, "native_id": "8605fd2affc796d79073d0f3ef0761c9", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.345042705535889, "incorrect_loss_raw": 10.888967275619507, "correct_loss_per_char": 0.2896695137023926, "incorrect_loss_per_char": 1.3177363631489512, "correct_loss_per_token": 2.1725213527679443, "incorrect_loss_per_token": 8.901939511299133, "correct_loss_uncond": -15.536322116851807, "incorrect_loss_uncond": -2.6999661922454834}, "model_output": [{"sum_logits": -10.784130096435547, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -10.784130096435547, "logits_per_char": -1.5405900137765067, "num_chars": 7}, {"sum_logits": -4.345042705535889, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.881364822387695, "logits_per_token": -2.1725213527679443, "logits_per_char": -0.2896695137023926, "num_chars": 15}, {"sum_logits": -15.896222114562988, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.424861907958984, "logits_per_token": -7.948111057281494, "logits_per_char": -1.2227863165048451, "num_chars": 13}, {"sum_logits": -8.92311954498291, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.018102645874023, "logits_per_token": -8.92311954498291, "logits_per_char": -1.784623908996582, "num_chars": 5}, {"sum_logits": -7.952397346496582, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.349780082702637, "logits_per_token": -7.952397346496582, "logits_per_char": -0.7229452133178711, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 509, "native_id": "ad37795fd9e3a65553683ff305b5113d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.44856071472168, "incorrect_loss_raw": 18.800599575042725, "correct_loss_per_char": 0.7680509740656073, "incorrect_loss_per_char": 1.2924903734417161, "correct_loss_per_token": 4.22428035736084, "incorrect_loss_per_token": 6.771707614262899, "correct_loss_uncond": -5.388964653015137, "incorrect_loss_uncond": -3.329850196838379}, "model_output": [{"sum_logits": -26.777713775634766, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -29.79528045654297, "logits_per_token": -6.694428443908691, "logits_per_char": -1.3388856887817382, "num_chars": 20}, {"sum_logits": -16.29482650756836, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.23303985595703, "logits_per_token": -5.43160883585612, "logits_per_char": -1.2534481928898737, "num_chars": 13}, {"sum_logits": -6.376260757446289, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.289554595947266, "logits_per_token": -6.376260757446289, "logits_per_char": -1.062710126241048, "num_chars": 6}, {"sum_logits": -8.44856071472168, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.837525367736816, "logits_per_token": -4.22428035736084, "logits_per_char": -0.7680509740656073, "num_chars": 11}, {"sum_logits": -25.753597259521484, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -26.20392417907715, "logits_per_token": -8.584532419840494, "logits_per_char": -1.514917485854205, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 510, "native_id": "bcd51af35d691f5c3b6b548096ab1559", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.546833038330078, "incorrect_loss_raw": 14.41166090965271, "correct_loss_per_char": 1.0781190054757255, "incorrect_loss_per_char": 1.8014576137065887, "correct_loss_per_token": 7.546833038330078, "incorrect_loss_per_token": 7.356261809666952, "correct_loss_uncond": -4.027451515197754, "incorrect_loss_uncond": -1.879251480102539}, "model_output": [{"sum_logits": -11.367671966552734, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.366085052490234, "logits_per_token": -5.683835983276367, "logits_per_char": -1.4209589958190918, "num_chars": 8}, {"sum_logits": -16.9033203125, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.79794692993164, "logits_per_token": -5.634440104166667, "logits_per_char": -2.1129150390625, "num_chars": 8}, {"sum_logits": -12.472331047058105, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.20167064666748, "logits_per_token": -12.472331047058105, "logits_per_char": -1.5590413808822632, "num_chars": 8}, {"sum_logits": -7.546833038330078, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -11.574284553527832, "logits_per_token": -7.546833038330078, "logits_per_char": -1.0781190054757255, "num_chars": 7}, {"sum_logits": -16.9033203125, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.79794692993164, "logits_per_token": -5.634440104166667, "logits_per_char": -2.1129150390625, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 511, "native_id": "b5345f15d5b451562ab9e0851e7f394f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.049195289611816, "incorrect_loss_raw": 12.001317501068115, "correct_loss_per_char": 2.762298822402954, "incorrect_loss_per_char": 1.249775068713473, "correct_loss_per_token": 11.049195289611816, "incorrect_loss_per_token": 8.392627358436584, "correct_loss_uncond": -2.5700864791870117, "incorrect_loss_uncond": -6.17113995552063}, "model_output": [{"sum_logits": -13.198968887329102, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.363737106323242, "logits_per_token": -13.198968887329102, "logits_per_char": -1.8855669839041573, "num_chars": 7}, {"sum_logits": -8.511031150817871, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.929132461547852, "logits_per_token": -8.511031150817871, "logits_per_char": -1.0638788938522339, "num_chars": 8}, {"sum_logits": -11.049195289611816, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.619281768798828, "logits_per_token": -11.049195289611816, "logits_per_char": -2.762298822402954, "num_chars": 4}, {"sum_logits": -7.048922538757324, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.354098320007324, "logits_per_token": -7.048922538757324, "logits_per_char": -1.1748204231262207, "num_chars": 6}, {"sum_logits": -19.246347427368164, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -30.042861938476562, "logits_per_token": -4.811586856842041, "logits_per_char": -0.8748339739712802, "num_chars": 22}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 512, "native_id": "6a884d5d8febfdd86fcf68ff1a904d9b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.270881652832031, "incorrect_loss_raw": 8.657740116119385, "correct_loss_per_char": 0.4847254435221354, "incorrect_loss_per_char": 0.9000609282291298, "correct_loss_per_token": 3.6354408264160156, "incorrect_loss_per_token": 5.134467005729675, "correct_loss_uncond": -14.589637756347656, "incorrect_loss_uncond": -10.279915809631348}, "model_output": [{"sum_logits": -7.270881652832031, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.860519409179688, "logits_per_token": -3.6354408264160156, "logits_per_char": -0.4847254435221354, "num_chars": 15}, {"sum_logits": -9.184502601623535, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.470205307006836, "logits_per_token": -9.184502601623535, "logits_per_char": -1.3120718002319336, "num_chars": 7}, {"sum_logits": -9.34083366394043, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.803192138671875, "logits_per_token": -4.670416831970215, "logits_per_char": -0.8491666967218573, "num_chars": 11}, {"sum_logits": -8.219181060791016, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.639436721801758, "logits_per_token": -2.739727020263672, "logits_per_char": -0.9132423400878906, "num_chars": 9}, {"sum_logits": -7.886443138122559, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.83778953552246, "logits_per_token": -3.9432215690612793, "logits_per_char": -0.5257628758748373, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 513, "native_id": "a1303b5177df0a5b653c9abd7d5f5e08", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.575629234313965, "incorrect_loss_raw": 9.193712711334229, "correct_loss_per_char": 1.5959382057189941, "incorrect_loss_per_char": 1.0812845748204452, "correct_loss_per_token": 9.575629234313965, "incorrect_loss_per_token": 7.786226749420166, "correct_loss_uncond": -5.341225624084473, "incorrect_loss_uncond": -7.165186643600464}, "model_output": [{"sum_logits": -11.2598876953125, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.1622257232666, "logits_per_token": -5.62994384765625, "logits_per_char": -0.7506591796875, "num_chars": 15}, {"sum_logits": -8.507492065429688, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.796231269836426, "logits_per_token": -8.507492065429688, "logits_per_char": -0.6544224665715144, "num_chars": 13}, {"sum_logits": -10.588303565979004, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.067355155944824, "logits_per_token": -10.588303565979004, "logits_per_char": -2.117660713195801, "num_chars": 5}, {"sum_logits": -9.575629234313965, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.916854858398438, "logits_per_token": -9.575629234313965, "logits_per_char": -1.5959382057189941, "num_chars": 6}, {"sum_logits": -6.419167518615723, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.409785270690918, "logits_per_token": -6.419167518615723, "logits_per_char": -0.8023959398269653, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 514, "native_id": "315baf79f8dd3673f67a90de0758240e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.632708549499512, "incorrect_loss_raw": 9.105228662490845, "correct_loss_per_char": 0.2316354274749756, "incorrect_loss_per_char": 1.222323712375429, "correct_loss_per_token": 1.544236183166504, "incorrect_loss_per_token": 9.105228662490845, "correct_loss_uncond": -11.410643577575684, "incorrect_loss_uncond": -3.8615145683288574}, "model_output": [{"sum_logits": -5.205632209777832, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.893023490905762, "logits_per_token": -5.205632209777832, "logits_per_char": -0.5784035788642036, "num_chars": 9}, {"sum_logits": -8.312348365783691, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.993132591247559, "logits_per_token": -8.312348365783691, "logits_per_char": -1.0390435457229614, "num_chars": 8}, {"sum_logits": -10.583643913269043, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.242856979370117, "logits_per_token": -10.583643913269043, "logits_per_char": -1.511949130467006, "num_chars": 7}, {"sum_logits": -12.319290161132812, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.737959861755371, "logits_per_token": -12.319290161132812, "logits_per_char": -1.7598985944475447, "num_chars": 7}, {"sum_logits": -4.632708549499512, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.043352127075195, "logits_per_token": -1.544236183166504, "logits_per_char": -0.2316354274749756, "num_chars": 20}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 515, "native_id": "01f01cc3ad152773ef42b30e926912bf", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.016983985900879, "incorrect_loss_raw": 12.969090461730957, "correct_loss_per_char": 0.890775998433431, "incorrect_loss_per_char": 1.366999443088259, "correct_loss_per_token": 4.0084919929504395, "incorrect_loss_per_token": 8.512275457382202, "correct_loss_uncond": -7.127215385437012, "incorrect_loss_uncond": -3.2215144634246826}, "model_output": [{"sum_logits": -8.016983985900879, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.14419937133789, "logits_per_token": -4.0084919929504395, "logits_per_char": -0.890775998433431, "num_chars": 9}, {"sum_logits": -22.685657501220703, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.23076629638672, "logits_per_token": -11.342828750610352, "logits_per_char": -1.6204041072300501, "num_chars": 14}, {"sum_logits": -9.248812675476074, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.568839073181152, "logits_per_token": -9.248812675476074, "logits_per_char": -1.1561015844345093, "num_chars": 8}, {"sum_logits": -6.973029136657715, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.262267112731934, "logits_per_token": -6.973029136657715, "logits_per_char": -1.394605827331543, "num_chars": 5}, {"sum_logits": -12.968862533569336, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.700547218322754, "logits_per_token": -6.484431266784668, "logits_per_char": -1.2968862533569336, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 516, "native_id": "f192cfacbaa2f7e0e879f673c8e076a7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.2211079597473145, "incorrect_loss_raw": 10.785003185272217, "correct_loss_per_char": 0.5368513266245524, "incorrect_loss_per_char": 1.1432102388805814, "correct_loss_per_token": 1.6105539798736572, "incorrect_loss_per_token": 6.749550143877665, "correct_loss_uncond": -11.165249347686768, "incorrect_loss_uncond": -5.814913034439087}, "model_output": [{"sum_logits": -3.2211079597473145, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.386357307434082, "logits_per_token": -1.6105539798736572, "logits_per_char": -0.5368513266245524, "num_chars": 6}, {"sum_logits": -13.03684139251709, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.893023490905762, "logits_per_token": -13.03684139251709, "logits_per_char": -1.4485379325018988, "num_chars": 9}, {"sum_logits": -6.541358947753906, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.344594955444336, "logits_per_token": -2.1804529825846353, "logits_per_char": -0.5451132456461588, "num_chars": 12}, {"sum_logits": -8.54370403289795, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.205810546875, "logits_per_token": -4.271852016448975, "logits_per_char": -1.4239506721496582, "num_chars": 6}, {"sum_logits": -15.018108367919922, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.956235885620117, "logits_per_token": -7.509054183959961, "logits_per_char": -1.1552391052246094, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 517, "native_id": "ab8d5e21a2cf34b60a04768b01f1f8e9", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.544314384460449, "incorrect_loss_raw": 9.386909246444702, "correct_loss_per_char": 0.32221039858731354, "incorrect_loss_per_char": 0.9838889993802467, "correct_loss_per_token": 3.544314384460449, "incorrect_loss_per_token": 9.386909246444702, "correct_loss_uncond": -10.815664291381836, "incorrect_loss_uncond": -4.891693115234375}, "model_output": [{"sum_logits": -7.318371772766113, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.893023490905762, "logits_per_token": -7.318371772766113, "logits_per_char": -0.8131524191962348, "num_chars": 9}, {"sum_logits": -3.544314384460449, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.359978675842285, "logits_per_token": -3.544314384460449, "logits_per_char": -0.32221039858731354, "num_chars": 11}, {"sum_logits": -9.930601119995117, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.365344047546387, "logits_per_token": -9.930601119995117, "logits_per_char": -0.9027819199995561, "num_chars": 11}, {"sum_logits": -9.522624969482422, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.22380542755127, "logits_per_token": -9.522624969482422, "logits_per_char": -0.680187497820173, "num_chars": 14}, {"sum_logits": -10.776039123535156, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.63223648071289, "logits_per_token": -10.776039123535156, "logits_per_char": -1.5394341605050224, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 518, "native_id": "5d1df1daa886efb78db2103ddc1398eb", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.941092491149902, "incorrect_loss_raw": 11.36362099647522, "correct_loss_per_char": 0.7426365613937378, "incorrect_loss_per_char": 1.2186390075418683, "correct_loss_per_token": 2.970546245574951, "incorrect_loss_per_token": 11.36362099647522, "correct_loss_uncond": -8.965094566345215, "incorrect_loss_uncond": -3.6431515216827393}, "model_output": [{"sum_logits": -5.941092491149902, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.906187057495117, "logits_per_token": -2.970546245574951, "logits_per_char": -0.7426365613937378, "num_chars": 8}, {"sum_logits": -14.348063468933105, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.220572471618652, "logits_per_token": -14.348063468933105, "logits_per_char": -1.7935079336166382, "num_chars": 8}, {"sum_logits": -11.756857872009277, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.24791145324707, "logits_per_token": -11.756857872009277, "logits_per_char": -1.3063175413343642, "num_chars": 9}, {"sum_logits": -9.613542556762695, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.629329681396484, "logits_per_token": -9.613542556762695, "logits_per_char": -0.8011285463968912, "num_chars": 12}, {"sum_logits": -9.7360200881958, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.929276466369629, "logits_per_token": -9.7360200881958, "logits_per_char": -0.97360200881958, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 519, "native_id": "2f8b35d352097cc9277599be49fab0b3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.192300796508789, "incorrect_loss_raw": 9.91363799571991, "correct_loss_per_char": 0.5160250663757324, "incorrect_loss_per_char": 0.9388874008542015, "correct_loss_per_token": 3.0961503982543945, "incorrect_loss_per_token": 5.896389842033386, "correct_loss_uncond": -12.589672088623047, "incorrect_loss_uncond": -7.489233136177063}, "model_output": [{"sum_logits": -9.51894760131836, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.214923858642578, "logits_per_token": -9.51894760131836, "logits_per_char": -0.9518947601318359, "num_chars": 10}, {"sum_logits": -6.032115459442139, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.238256454467773, "logits_per_token": -6.032115459442139, "logits_per_char": -0.8617307799203056, "num_chars": 7}, {"sum_logits": -14.05274486541748, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -22.81265640258789, "logits_per_token": -4.684248288472493, "logits_per_char": -0.9368496576944987, "num_chars": 15}, {"sum_logits": -10.05074405670166, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.34564781188965, "logits_per_token": -3.3502480189005532, "logits_per_char": -1.005074405670166, "num_chars": 10}, {"sum_logits": -6.192300796508789, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.781972885131836, "logits_per_token": -3.0961503982543945, "logits_per_char": -0.5160250663757324, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 520, "native_id": "18eb6a3b54ccf4989e268cfb9ea90f9c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.429430961608887, "incorrect_loss_raw": 10.028877854347229, "correct_loss_per_char": 0.4952953974405924, "incorrect_loss_per_char": 0.6444121425836792, "correct_loss_per_token": 3.7147154808044434, "incorrect_loss_per_token": 3.9020270109176636, "correct_loss_uncond": -13.124493598937988, "incorrect_loss_uncond": -10.556739687919617}, "model_output": [{"sum_logits": -4.178123950958252, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.500715255737305, "logits_per_token": -1.3927079836527507, "logits_per_char": -0.26113274693489075, "num_chars": 16}, {"sum_logits": -7.429430961608887, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.553924560546875, "logits_per_token": -3.7147154808044434, "logits_per_char": -0.4952953974405924, "num_chars": 15}, {"sum_logits": -10.226543426513672, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.223072052001953, "logits_per_token": -3.4088478088378906, "logits_per_char": -0.4648428830233487, "num_chars": 22}, {"sum_logits": -12.293218612670898, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.10366439819336, "logits_per_token": -4.097739537556966, "logits_per_char": -0.8195479075113933, "num_chars": 15}, {"sum_logits": -13.417625427246094, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.515018463134766, "logits_per_token": -6.708812713623047, "logits_per_char": -1.0321250328650842, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 521, "native_id": "3e12400bc5a2038a747edf2605787fe8", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.5063252449035645, "incorrect_loss_raw": 16.477686405181885, "correct_loss_per_char": 0.3933089460645403, "incorrect_loss_per_char": 1.543934824871041, "correct_loss_per_token": 1.8354417483011882, "incorrect_loss_per_token": 7.669206778208415, "correct_loss_uncond": -16.782535076141357, "incorrect_loss_uncond": -1.5698041915893555}, "model_output": [{"sum_logits": -19.789533615112305, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.910371780395508, "logits_per_token": -6.596511205037435, "logits_per_char": -1.319302241007487, "num_chars": 15}, {"sum_logits": -13.059867858886719, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.023500442504883, "logits_per_token": -13.059867858886719, "logits_per_char": -2.1766446431477866, "num_chars": 6}, {"sum_logits": -5.5063252449035645, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.288860321044922, "logits_per_token": -1.8354417483011882, "logits_per_char": -0.3933089460645403, "num_chars": 14}, {"sum_logits": -16.992103576660156, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.224353790283203, "logits_per_token": -5.664034525553386, "logits_per_char": -0.8943212408768503, "num_chars": 19}, {"sum_logits": -16.06924057006836, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.031736373901367, "logits_per_token": -5.35641352335612, "logits_per_char": -1.78547117445204, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 522, "native_id": "72baf6ca5c4daa01c2cc7fda22183db8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.3041253089904785, "incorrect_loss_raw": 9.291971802711487, "correct_loss_per_char": 0.6640113917264071, "incorrect_loss_per_char": 1.497357883468851, "correct_loss_per_token": 3.6520626544952393, "incorrect_loss_per_token": 6.189346194267273, "correct_loss_uncond": -10.110700130462646, "incorrect_loss_uncond": -4.6106425523757935}, "model_output": [{"sum_logits": -7.3041253089904785, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.414825439453125, "logits_per_token": -3.6520626544952393, "logits_per_char": -0.6640113917264071, "num_chars": 11}, {"sum_logits": -3.671125888824463, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.475091934204102, "logits_per_token": -3.671125888824463, "logits_per_char": -0.9177814722061157, "num_chars": 4}, {"sum_logits": -10.37615966796875, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.60792350769043, "logits_per_token": -5.188079833984375, "logits_per_char": -0.9432872425426136, "num_chars": 11}, {"sum_logits": -14.444845199584961, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.289185523986816, "logits_per_token": -7.2224225997924805, "logits_per_char": -2.8889690399169923, "num_chars": 5}, {"sum_logits": -8.675756454467773, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.238256454467773, "logits_per_token": -8.675756454467773, "logits_per_char": -1.2393937792096819, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 523, "native_id": "9bac07574c966cae34c85e9f25538cba", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.271230697631836, "incorrect_loss_raw": 14.403645992279053, "correct_loss_per_char": 0.2512488645665786, "incorrect_loss_per_char": 1.1101833099076728, "correct_loss_per_token": 1.4237435658772786, "incorrect_loss_per_token": 6.559970140457153, "correct_loss_uncond": -13.41340446472168, "incorrect_loss_uncond": -4.636573314666748}, "model_output": [{"sum_logits": -10.269645690917969, "num_tokens": 4, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -18.149246215820312, "logits_per_token": -2.567411422729492, "logits_per_char": -0.6040968053481158, "num_chars": 17}, {"sum_logits": -4.271230697631836, "num_tokens": 3, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -17.684635162353516, "logits_per_token": -1.4237435658772786, "logits_per_char": -0.2512488645665786, "num_chars": 17}, {"sum_logits": -14.089061737060547, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -18.789621353149414, "logits_per_token": -7.044530868530273, "logits_per_char": -1.5654513041178386, "num_chars": 9}, {"sum_logits": -21.889270782470703, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -20.172256469726562, "logits_per_token": -10.944635391235352, "logits_per_char": -1.4592847188313802, "num_chars": 15}, {"sum_logits": -11.366605758666992, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -19.049753189086914, "logits_per_token": -5.683302879333496, "logits_per_char": -0.8119004113333566, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 524, "native_id": "fe2a21ddb1bde76025a961126044a9a3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.222923278808594, "incorrect_loss_raw": 11.980298042297363, "correct_loss_per_char": 0.6325325599083533, "incorrect_loss_per_char": 1.8618953418330986, "correct_loss_per_token": 8.222923278808594, "incorrect_loss_per_token": 9.817638397216797, "correct_loss_uncond": -8.11848258972168, "incorrect_loss_uncond": -1.8202340602874756}, "model_output": [{"sum_logits": -14.477348327636719, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.431853294372559, "logits_per_token": -14.477348327636719, "logits_per_char": -2.8954696655273438, "num_chars": 5}, {"sum_logits": -12.396080017089844, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.445282936096191, "logits_per_token": -12.396080017089844, "logits_per_char": -1.7708685738699776, "num_chars": 7}, {"sum_logits": -8.071805953979492, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.587641716003418, "logits_per_token": -8.071805953979492, "logits_per_char": -2.017951488494873, "num_chars": 4}, {"sum_logits": -8.222923278808594, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.341405868530273, "logits_per_token": -8.222923278808594, "logits_per_char": -0.6325325599083533, "num_chars": 13}, {"sum_logits": -12.975957870483398, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.737350463867188, "logits_per_token": -4.325319290161133, "logits_per_char": -0.7632916394402, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 525, "native_id": "d03e09b22927542d6b0d5ebe233e467c", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.257362365722656, "incorrect_loss_raw": 7.087779760360718, "correct_loss_per_char": 0.47304026285807294, "incorrect_loss_per_char": 0.7359954422170466, "correct_loss_per_token": 2.128681182861328, "incorrect_loss_per_token": 3.0444751580556235, "correct_loss_uncond": -10.89674186706543, "incorrect_loss_uncond": -10.203385829925537}, "model_output": [{"sum_logits": -9.013559341430664, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -19.64371109008789, "logits_per_token": -3.004519780476888, "logits_per_char": -0.8194144855846058, "num_chars": 11}, {"sum_logits": -9.013559341430664, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -19.64371109008789, "logits_per_token": -3.004519780476888, "logits_per_char": -0.8194144855846058, "num_chars": 11}, {"sum_logits": -6.232708930969238, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -16.372318267822266, "logits_per_token": -2.0775696436564126, "logits_per_char": -0.6232708930969239, "num_chars": 10}, {"sum_logits": -4.091291427612305, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.504921913146973, "logits_per_token": -4.091291427612305, "logits_per_char": -0.6818819046020508, "num_chars": 6}, {"sum_logits": -4.257362365722656, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.154104232788086, "logits_per_token": -2.128681182861328, "logits_per_char": -0.47304026285807294, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 526, "native_id": "e63a210053cf7f961ca0b5a7e6eb355d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.1468706130981445, "incorrect_loss_raw": 12.18737006187439, "correct_loss_per_char": 0.5955725510915121, "incorrect_loss_per_char": 1.2033300599107495, "correct_loss_per_token": 1.7867176532745361, "incorrect_loss_per_token": 6.643577257792155, "correct_loss_uncond": -9.438851356506348, "incorrect_loss_uncond": -4.604336977005005}, "model_output": [{"sum_logits": -10.92788028717041, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.61902141571045, "logits_per_token": -5.463940143585205, "logits_per_char": -1.3659850358963013, "num_chars": 8}, {"sum_logits": -8.807662963867188, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.267144203186035, "logits_per_token": -8.807662963867188, "logits_per_char": -1.258237566266741, "num_chars": 7}, {"sum_logits": -7.1468706130981445, "num_tokens": 4, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.585721969604492, "logits_per_token": -1.7867176532745361, "logits_per_char": -0.5955725510915121, "num_chars": 12}, {"sum_logits": -15.788361549377441, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.799480438232422, "logits_per_token": -7.894180774688721, "logits_per_char": -0.9867725968360901, "num_chars": 16}, {"sum_logits": -13.22557544708252, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.481182098388672, "logits_per_token": -4.408525149027507, "logits_per_char": -1.2023250406438655, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 527, "native_id": "a4b4242fab25e86a9d7ffedcaecdcdbe", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.519001007080078, "incorrect_loss_raw": 8.59603238105774, "correct_loss_per_char": 0.9312858581542969, "incorrect_loss_per_char": 0.7467231141196358, "correct_loss_per_token": 6.519001007080078, "incorrect_loss_per_token": 4.823969483375549, "correct_loss_uncond": -9.482757568359375, "incorrect_loss_uncond": -8.208648920059204}, "model_output": [{"sum_logits": -4.2076263427734375, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.023554801940918, "logits_per_token": -4.2076263427734375, "logits_per_char": -0.5259532928466797, "num_chars": 8}, {"sum_logits": -14.606871604919434, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.01970863342285, "logits_per_token": -7.303435802459717, "logits_per_char": -0.8114928669399686, "num_chars": 18}, {"sum_logits": -8.323482513427734, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.398252487182617, "logits_per_token": -4.161741256713867, "logits_per_char": -0.9248313903808594, "num_chars": 9}, {"sum_logits": -6.519001007080078, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.001758575439453, "logits_per_token": -6.519001007080078, "logits_per_char": -0.9312858581542969, "num_chars": 7}, {"sum_logits": -7.246149063110352, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.777209281921387, "logits_per_token": -3.623074531555176, "logits_per_char": -0.7246149063110352, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 528, "native_id": "ec8797b12e3c6666ebe70b2a7680b66f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.92220687866211, "incorrect_loss_raw": 18.595675230026245, "correct_loss_per_char": 0.9929278980601918, "incorrect_loss_per_char": 1.1721630322210717, "correct_loss_per_token": 5.461103439331055, "incorrect_loss_per_token": 5.911308360099793, "correct_loss_uncond": -7.373809814453125, "incorrect_loss_uncond": -5.659100770950317}, "model_output": [{"sum_logits": -37.23270797729492, "num_tokens": 5, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -41.89158630371094, "logits_per_token": -7.446541595458984, "logits_per_char": -1.6923958171497693, "num_chars": 22}, {"sum_logits": -14.257827758789062, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.798416137695312, "logits_per_token": -4.7526092529296875, "logits_per_char": -0.5940761566162109, "num_chars": 24}, {"sum_logits": -11.867993354797363, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.86726951599121, "logits_per_token": -5.933996677398682, "logits_per_char": -1.4834991693496704, "num_chars": 8}, {"sum_logits": -10.92220687866211, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.296016693115234, "logits_per_token": -5.461103439331055, "logits_per_char": -0.9929278980601918, "num_chars": 11}, {"sum_logits": -11.024171829223633, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.46183204650879, "logits_per_token": -5.512085914611816, "logits_per_char": -0.9186809857686361, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 529, "native_id": "4536489e5d8e02aadc3fcc7a55effe20", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.669279098510742, "incorrect_loss_raw": 12.469955801963806, "correct_loss_per_char": 1.4586598873138428, "incorrect_loss_per_char": 0.7478367438441829, "correct_loss_per_token": 11.669279098510742, "incorrect_loss_per_token": 5.985664586226146, "correct_loss_uncond": -2.6384830474853516, "incorrect_loss_uncond": -5.591682076454163}, "model_output": [{"sum_logits": -5.8573689460754395, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.777209281921387, "logits_per_token": -2.9286844730377197, "logits_per_char": -0.585736894607544, "num_chars": 10}, {"sum_logits": -4.763636589050293, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.023554801940918, "logits_per_token": -4.763636589050293, "logits_per_char": -0.5954545736312866, "num_chars": 8}, {"sum_logits": -18.98438835144043, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.399694442749023, "logits_per_token": -9.492194175720215, "logits_per_char": -0.9991783342863384, "num_chars": 19}, {"sum_logits": -11.669279098510742, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.307762145996094, "logits_per_token": -11.669279098510742, "logits_per_char": -1.4586598873138428, "num_chars": 8}, {"sum_logits": -20.274429321289062, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -26.046092987060547, "logits_per_token": -6.7581431070963545, "logits_per_char": -0.8109771728515625, "num_chars": 25}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 530, "native_id": "0854478d174c9127064f0d4b58df7e62", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.892581939697266, "incorrect_loss_raw": 8.434706926345825, "correct_loss_per_char": 1.648763656616211, "incorrect_loss_per_char": 0.9513712704181672, "correct_loss_per_token": 9.892581939697266, "incorrect_loss_per_token": 5.442685683568319, "correct_loss_uncond": -3.656060218811035, "incorrect_loss_uncond": -8.381701231002808}, "model_output": [{"sum_logits": -10.824300765991211, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.705764770507812, "logits_per_token": -3.608100255330404, "logits_per_char": -1.082430076599121, "num_chars": 10}, {"sum_logits": -7.127826690673828, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.079689025878906, "logits_per_token": -2.3759422302246094, "logits_per_char": -0.3394203186035156, "num_chars": 21}, {"sum_logits": -9.892581939697266, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.5486421585083, "logits_per_token": -9.892581939697266, "logits_per_char": -1.648763656616211, "num_chars": 6}, {"sum_logits": -5.939568519592285, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -5.939568519592285, "logits_per_char": -0.7424460649490356, "num_chars": 8}, {"sum_logits": -9.847131729125977, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.903332710266113, "logits_per_token": -9.847131729125977, "logits_per_char": -1.641188621520996, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 531, "native_id": "4b7d1b70060cd1f1a7321795f62a7325", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.395200729370117, "incorrect_loss_raw": 10.703247904777527, "correct_loss_per_char": 0.36626672744750977, "incorrect_loss_per_char": 1.5863064024183484, "correct_loss_per_token": 2.1976003646850586, "incorrect_loss_per_token": 7.386714816093445, "correct_loss_uncond": -14.700504302978516, "incorrect_loss_uncond": -4.831701874732971}, "model_output": [{"sum_logits": -15.108114242553711, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.392427444458008, "logits_per_token": -7.5540571212768555, "logits_per_char": -1.6786793602837458, "num_chars": 9}, {"sum_logits": -4.395200729370117, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.095705032348633, "logits_per_token": -2.1976003646850586, "logits_per_char": -0.36626672744750977, "num_chars": 12}, {"sum_logits": -11.424150466918945, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.741013526916504, "logits_per_token": -5.712075233459473, "logits_per_char": -2.284830093383789, "num_chars": 5}, {"sum_logits": -8.744292259216309, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.4810791015625, "logits_per_token": -8.744292259216309, "logits_per_char": -0.8744292259216309, "num_chars": 10}, {"sum_logits": -7.536434650421143, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.52527904510498, "logits_per_token": -7.536434650421143, "logits_per_char": -1.5072869300842284, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 532, "native_id": "0e6a005eec5e6746f3facf4d608bfd8b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.73408842086792, "incorrect_loss_raw": 14.49622631072998, "correct_loss_per_char": 1.346817684173584, "incorrect_loss_per_char": 1.1526800199216154, "correct_loss_per_token": 6.73408842086792, "incorrect_loss_per_token": 7.959393501281738, "correct_loss_uncond": -5.887049198150635, "incorrect_loss_uncond": -3.3129587173461914}, "model_output": [{"sum_logits": -21.656320571899414, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -23.43914031982422, "logits_per_token": -7.218773523966472, "logits_per_char": -1.3535200357437134, "num_chars": 16}, {"sum_logits": -17.56467628479004, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.35799789428711, "logits_per_token": -5.854892094930013, "logits_per_char": -1.033216252046473, "num_chars": 17}, {"sum_logits": -10.653406143188477, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.08862590789795, "logits_per_token": -10.653406143188477, "logits_per_char": -1.0653406143188477, "num_chars": 10}, {"sum_logits": -6.73408842086792, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.621137619018555, "logits_per_token": -6.73408842086792, "logits_per_char": -1.346817684173584, "num_chars": 5}, {"sum_logits": -8.110502243041992, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.35097599029541, "logits_per_token": -8.110502243041992, "logits_per_char": -1.1586431775774275, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 533, "native_id": "2d2b69ad187b7c40273ab13caab7dc19", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.117561340332031, "incorrect_loss_raw": 10.963611841201782, "correct_loss_per_char": 0.35985654943129597, "incorrect_loss_per_char": 1.1590317306064424, "correct_loss_per_token": 2.0391871134440103, "incorrect_loss_per_token": 6.700477520624797, "correct_loss_uncond": -12.67500114440918, "incorrect_loss_uncond": -5.677399396896362}, "model_output": [{"sum_logits": -6.117561340332031, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.79256248474121, "logits_per_token": -2.0391871134440103, "logits_per_char": -0.35985654943129597, "num_chars": 17}, {"sum_logits": -15.802501678466797, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.518798828125, "logits_per_token": -7.901250839233398, "logits_per_char": -1.5802501678466796, "num_chars": 10}, {"sum_logits": -6.794981002807617, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.62524700164795, "logits_per_token": -6.794981002807617, "logits_per_char": -0.9707115718296596, "num_chars": 7}, {"sum_logits": -13.726929664611816, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.758136749267578, "logits_per_token": -4.575643221537272, "logits_per_char": -1.143910805384318, "num_chars": 12}, {"sum_logits": -7.530035018920898, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.66186237335205, "logits_per_token": -7.530035018920898, "logits_per_char": -0.9412543773651123, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 534, "native_id": "fde1f9bfc33da302449c0b950d16c0ea", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.8603901863098145, "incorrect_loss_raw": 12.754107236862183, "correct_loss_per_char": 0.4767316977183024, "incorrect_loss_per_char": 1.567435867136175, "correct_loss_per_token": 2.8603901863098145, "incorrect_loss_per_token": 7.5284541845321655, "correct_loss_uncond": -8.24258279800415, "incorrect_loss_uncond": -3.8082492351531982}, "model_output": [{"sum_logits": -18.311412811279297, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -20.071313858032227, "logits_per_token": -9.155706405639648, "logits_per_char": -1.8311412811279297, "num_chars": 10}, {"sum_logits": -2.8603901863098145, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -11.102972984313965, "logits_per_token": -2.8603901863098145, "logits_per_char": -0.4767316977183024, "num_chars": 6}, {"sum_logits": -8.977520942687988, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -16.50049591064453, "logits_per_token": -4.488760471343994, "logits_per_char": -0.8161382675170898, "num_chars": 11}, {"sum_logits": -14.516290664672852, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -17.691415786743164, "logits_per_token": -7.258145332336426, "logits_per_char": -1.319662787697532, "num_chars": 11}, {"sum_logits": -9.211204528808594, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -11.986200332641602, "logits_per_token": -9.211204528808594, "logits_per_char": -2.3028011322021484, "num_chars": 4}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 535, "native_id": "3c90a632f46aeab11fbb73aa59a33892", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.658998012542725, "incorrect_loss_raw": 7.729571223258972, "correct_loss_per_char": 0.2911873757839203, "incorrect_loss_per_char": 0.7516077071428299, "correct_loss_per_token": 1.5529993375142415, "incorrect_loss_per_token": 4.644178867340088, "correct_loss_uncond": -12.828964710235596, "incorrect_loss_uncond": -9.086164355278015}, "model_output": [{"sum_logits": -4.658998012542725, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.48796272277832, "logits_per_token": -1.5529993375142415, "logits_per_char": -0.2911873757839203, "num_chars": 16}, {"sum_logits": -6.461493492126465, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.279050827026367, "logits_per_token": -3.2307467460632324, "logits_per_char": -0.38008785247802734, "num_chars": 17}, {"sum_logits": -8.464224815368652, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.214515686035156, "logits_per_token": -4.232112407684326, "logits_per_char": -0.769474983215332, "num_chars": 11}, {"sum_logits": -6.2351460456848145, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.55066967010498, "logits_per_token": -6.2351460456848145, "logits_per_char": -1.2470292091369628, "num_chars": 5}, {"sum_logits": -9.757420539855957, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.218706130981445, "logits_per_token": -4.8787102699279785, "logits_per_char": -0.6098387837409973, "num_chars": 16}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 536, "native_id": "1f3ccb722600da7d862531416934949a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 3.679718017578125, "incorrect_loss_raw": 6.027536988258362, "correct_loss_per_char": 0.33451981977982953, "incorrect_loss_per_char": 0.8003823234921409, "correct_loss_per_token": 1.8398590087890625, "incorrect_loss_per_token": 4.615930199623108, "correct_loss_uncond": -14.4652099609375, "incorrect_loss_uncond": -10.414301991462708}, "model_output": [{"sum_logits": -7.502070426940918, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.781972885131836, "logits_per_token": -3.751035213470459, "logits_per_char": -0.6251725355784098, "num_chars": 12}, {"sum_logits": -10.238571166992188, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.9962739944458, "logits_per_token": -10.238571166992188, "logits_per_char": -2.0477142333984375, "num_chars": 5}, {"sum_logits": -3.679718017578125, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.144927978515625, "logits_per_token": -1.8398590087890625, "logits_per_char": -0.33451981977982953, "num_chars": 11}, {"sum_logits": -2.5787224769592285, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.214923858642578, "logits_per_token": -2.5787224769592285, "logits_per_char": -0.2578722476959229, "num_chars": 10}, {"sum_logits": -3.7907838821411133, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.774185180664062, "logits_per_token": -1.8953919410705566, "logits_per_char": -0.2707702772957938, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 537, "native_id": "46ba5d2b8cfc6708e5e2618568d8730e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.317896842956543, "incorrect_loss_raw": 10.038848876953125, "correct_loss_per_char": 0.33214591099665713, "incorrect_loss_per_char": 1.04909812495822, "correct_loss_per_token": 2.1589484214782715, "incorrect_loss_per_token": 7.197340130805969, "correct_loss_uncond": -14.106965065002441, "incorrect_loss_uncond": -6.674378156661987}, "model_output": [{"sum_logits": -4.317896842956543, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.424861907958984, "logits_per_token": -2.1589484214782715, "logits_per_char": -0.33214591099665713, "num_chars": 13}, {"sum_logits": -8.122587203979492, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.998748779296875, "logits_per_token": -8.122587203979492, "logits_per_char": -1.160369600568499, "num_chars": 7}, {"sum_logits": -9.300738334655762, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -9.300738334655762, "logits_per_char": -1.3286769049508231, "num_chars": 7}, {"sum_logits": -11.512479782104492, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.409805297851562, "logits_per_token": -5.756239891052246, "logits_per_char": -0.9593733151753744, "num_chars": 12}, {"sum_logits": -11.219590187072754, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.881364822387695, "logits_per_token": -5.609795093536377, "logits_per_char": -0.7479726791381835, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 538, "native_id": "f8a2cbc7189b92a809ce9cd857030621", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.427818775177002, "incorrect_loss_raw": 11.945390343666077, "correct_loss_per_char": 0.714202086130778, "incorrect_loss_per_char": 1.0491768986932815, "correct_loss_per_token": 3.213909387588501, "incorrect_loss_per_token": 7.7916496594746905, "correct_loss_uncond": -11.043138027191162, "incorrect_loss_uncond": -4.618197560310364}, "model_output": [{"sum_logits": -14.474639892578125, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.774185180664062, "logits_per_token": -4.824879964192708, "logits_per_char": -1.033902849469866, "num_chars": 14}, {"sum_logits": -6.427818775177002, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.470956802368164, "logits_per_token": -3.213909387588501, "logits_per_char": -0.714202086130778, "num_chars": 9}, {"sum_logits": -13.532648086547852, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.140450477600098, "logits_per_token": -13.532648086547852, "logits_per_char": -1.5036275651719835, "num_chars": 9}, {"sum_logits": -13.930405616760254, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.767471313476562, "logits_per_token": -6.965202808380127, "logits_per_char": -0.9286937077840169, "num_chars": 15}, {"sum_logits": -5.843867778778076, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.572244644165039, "logits_per_token": -5.843867778778076, "logits_per_char": -0.7304834723472595, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 539, "native_id": "225287e06c993feee34e0f06b25f6ba8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.6712512969970703, "incorrect_loss_raw": 8.804555773735046, "correct_loss_per_char": 0.6118752161661783, "incorrect_loss_per_char": 1.3212948957046906, "correct_loss_per_token": 3.6712512969970703, "incorrect_loss_per_token": 7.510249495506287, "correct_loss_uncond": -7.138764381408691, "incorrect_loss_uncond": -4.013912081718445}, "model_output": [{"sum_logits": -10.354450225830078, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.892102241516113, "logits_per_token": -5.177225112915039, "logits_per_char": -0.9413136568936434, "num_chars": 11}, {"sum_logits": -3.6712512969970703, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -10.810015678405762, "logits_per_token": -3.6712512969970703, "logits_per_char": -0.6118752161661783, "num_chars": 6}, {"sum_logits": -8.886268615722656, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.676141738891602, "logits_per_token": -8.886268615722656, "logits_per_char": -1.7772537231445313, "num_chars": 5}, {"sum_logits": -4.97195291519165, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.260344505310059, "logits_per_token": -4.97195291519165, "logits_per_char": -0.9943905830383301, "num_chars": 5}, {"sum_logits": -11.0055513381958, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.445282936096191, "logits_per_token": -11.0055513381958, "logits_per_char": -1.5722216197422572, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 540, "native_id": "e211b1a3f3401d164c8b0bfc10160caa", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.746966361999512, "incorrect_loss_raw": 11.755568265914917, "correct_loss_per_char": 0.3968803742352654, "incorrect_loss_per_char": 1.2673651845682234, "correct_loss_per_token": 3.373483180999756, "incorrect_loss_per_token": 8.3819895585378, "correct_loss_uncond": -12.983443260192871, "incorrect_loss_uncond": -4.432478666305542}, "model_output": [{"sum_logits": -11.062195777893066, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.214360237121582, "logits_per_token": -11.062195777893066, "logits_per_char": -1.3827744722366333, "num_chars": 8}, {"sum_logits": -20.241472244262695, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -24.480716705322266, "logits_per_token": -6.7471574147542315, "logits_per_char": -1.4458194460187639, "num_chars": 14}, {"sum_logits": -6.746966361999512, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.730409622192383, "logits_per_token": -3.373483180999756, "logits_per_char": -0.3968803742352654, "num_chars": 17}, {"sum_logits": -6.624988555908203, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.480264663696289, "logits_per_token": -6.624988555908203, "logits_per_char": -1.104164759318034, "num_chars": 6}, {"sum_logits": -9.093616485595703, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -9.093616485595703, "logits_per_char": -1.136702060699463, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 541, "native_id": "fce1c5d069758aea57a787fc98dcf7a9", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.463749647140503, "incorrect_loss_raw": 11.674355268478394, "correct_loss_per_char": 0.35196423530578613, "incorrect_loss_per_char": 1.0853171095049758, "correct_loss_per_token": 2.463749647140503, "incorrect_loss_per_token": 6.912301143010457, "correct_loss_uncond": -9.88459324836731, "incorrect_loss_uncond": -4.286253213882446}, "model_output": [{"sum_logits": -13.838187217712402, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.089000701904297, "logits_per_token": -4.612729072570801, "logits_per_char": -0.9884419441223145, "num_chars": 14}, {"sum_logits": -9.253617286682129, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.202109336853027, "logits_per_token": -9.253617286682129, "logits_per_char": -0.7118167143601638, "num_chars": 13}, {"sum_logits": -14.734137535095215, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.955181121826172, "logits_per_token": -4.911379178365071, "logits_per_char": -0.8667139726526597, "num_chars": 17}, {"sum_logits": -8.871479034423828, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.596142768859863, "logits_per_token": -8.871479034423828, "logits_per_char": -1.7742958068847656, "num_chars": 5}, {"sum_logits": -2.463749647140503, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -12.348342895507812, "logits_per_token": -2.463749647140503, "logits_per_char": -0.35196423530578613, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 542, "native_id": "c0d75f9fbf30aa3a612f16edb20d6b8d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.342507839202881, "incorrect_loss_raw": 7.7310580015182495, "correct_loss_per_char": 0.6678134799003601, "incorrect_loss_per_char": 1.3830590679532004, "correct_loss_per_token": 5.342507839202881, "incorrect_loss_per_token": 7.7310580015182495, "correct_loss_uncond": -8.898870944976807, "incorrect_loss_uncond": -5.3474143743515015}, "model_output": [{"sum_logits": -9.274166107177734, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.693686485290527, "logits_per_token": -9.274166107177734, "logits_per_char": -1.854833221435547, "num_chars": 5}, {"sum_logits": -5.342507839202881, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.241378784179688, "logits_per_token": -5.342507839202881, "logits_per_char": -0.6678134799003601, "num_chars": 8}, {"sum_logits": -4.283664226531982, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.470205307006836, "logits_per_token": -4.283664226531982, "logits_per_char": -0.6119520323617118, "num_chars": 7}, {"sum_logits": -12.234879493713379, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -12.234879493713379, "logits_per_char": -2.039146582285563, "num_chars": 6}, {"sum_logits": -5.131522178649902, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.260344505310059, "logits_per_token": -5.131522178649902, "logits_per_char": -1.0263044357299804, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 543, "native_id": "d07f149d8d953dcc45dda432194c375e", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.290827751159668, "incorrect_loss_raw": 13.85017442703247, "correct_loss_per_char": 0.6431767344474792, "incorrect_loss_per_char": 1.0580166798785218, "correct_loss_per_token": 3.4302759170532227, "incorrect_loss_per_token": 6.90827997525533, "correct_loss_uncond": -10.547406196594238, "incorrect_loss_uncond": -7.00852108001709}, "model_output": [{"sum_logits": -11.193865776062012, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -23.630504608154297, "logits_per_token": -3.7312885920206704, "logits_per_char": -0.5891508303190532, "num_chars": 19}, {"sum_logits": -10.290827751159668, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.838233947753906, "logits_per_token": -3.4302759170532227, "logits_per_char": -0.6431767344474792, "num_chars": 16}, {"sum_logits": -9.983040809631348, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.052749633789062, "logits_per_token": -9.983040809631348, "logits_per_char": -0.7679262161254883, "num_chars": 13}, {"sum_logits": -19.15863037109375, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -22.28643226623535, "logits_per_token": -6.386210123697917, "logits_per_char": -1.3684735979352678, "num_chars": 14}, {"sum_logits": -15.065160751342773, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -21.46509552001953, "logits_per_token": -7.532580375671387, "logits_per_char": -1.5065160751342774, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 544, "native_id": "080a9cf2d6447a9a4d98b0af311e10da", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.365169525146484, "incorrect_loss_raw": 16.914947748184204, "correct_loss_per_char": 0.4243446350097656, "incorrect_loss_per_char": 1.4519490166315956, "correct_loss_per_token": 3.182584762573242, "incorrect_loss_per_token": 10.058733224868774, "correct_loss_uncond": -10.903434753417969, "incorrect_loss_uncond": -2.7875518798828125}, "model_output": [{"sum_logits": -28.842487335205078, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -33.078086853027344, "logits_per_token": -9.61416244506836, "logits_per_char": -2.060177666800363, "num_chars": 14}, {"sum_logits": -12.519522666931152, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.042495727539062, "logits_per_token": -12.519522666931152, "logits_per_char": -1.3910580741034613, "num_chars": 9}, {"sum_logits": -16.39306640625, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.95490837097168, "logits_per_token": -8.196533203125, "logits_per_char": -1.3660888671875, "num_chars": 12}, {"sum_logits": -6.365169525146484, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.268604278564453, "logits_per_token": -3.182584762573242, "logits_per_char": -0.4243446350097656, "num_chars": 15}, {"sum_logits": -9.904714584350586, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -9.904714584350586, "logits_per_char": -0.9904714584350586, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 545, "native_id": "111501a49dd41ceed9c2073eed5d2b72", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.3570325374603271, "incorrect_loss_raw": 9.485553741455078, "correct_loss_per_char": 0.193861791065761, "incorrect_loss_per_char": 0.9428396718842642, "correct_loss_per_token": 1.3570325374603271, "incorrect_loss_per_token": 8.236536741256714, "correct_loss_uncond": -12.441373109817505, "incorrect_loss_uncond": -5.200441598892212}, "model_output": [{"sum_logits": -1.3570325374603271, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": true, "sum_logits_uncond": -13.798405647277832, "logits_per_token": -1.3570325374603271, "logits_per_char": -0.193861791065761, "num_chars": 7}, {"sum_logits": -4.290644645690918, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.238256454467773, "logits_per_token": -4.290644645690918, "logits_per_char": -0.6129492350987026, "num_chars": 7}, {"sum_logits": -12.404854774475098, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.666552543640137, "logits_per_token": -12.404854774475098, "logits_per_char": -1.0337378978729248, "num_chars": 12}, {"sum_logits": -9.992136001586914, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.613142013549805, "logits_per_token": -4.996068000793457, "logits_per_char": -0.9992136001586914, "num_chars": 10}, {"sum_logits": -11.254579544067383, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.226030349731445, "logits_per_token": -11.254579544067383, "logits_per_char": -1.1254579544067382, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 546, "native_id": "7bb87c6d8eab57d4e983f60025b1f0dc", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.5625643730163574, "incorrect_loss_raw": 11.78695797920227, "correct_loss_per_char": 0.21354703108469644, "incorrect_loss_per_char": 1.3037575317753687, "correct_loss_per_token": 0.8541881243387858, "incorrect_loss_per_token": 7.238211274147034, "correct_loss_uncond": -11.727213382720947, "incorrect_loss_uncond": -4.9307544231414795}, "model_output": [{"sum_logits": -10.757858276367188, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.69064712524414, "logits_per_token": -10.757858276367188, "logits_per_char": -1.7929763793945312, "num_chars": 6}, {"sum_logits": -7.996576309204102, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.629486083984375, "logits_per_token": -3.998288154602051, "logits_per_char": -0.49978601932525635, "num_chars": 16}, {"sum_logits": -20.021446228027344, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.789621353149414, "logits_per_token": -10.010723114013672, "logits_per_char": -2.2246051364474826, "num_chars": 9}, {"sum_logits": -2.5625643730163574, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.289777755737305, "logits_per_token": -0.8541881243387858, "logits_per_char": -0.21354703108469644, "num_chars": 12}, {"sum_logits": -8.37195110321045, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.76109504699707, "logits_per_token": -4.185975551605225, "logits_per_char": -0.6976625919342041, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 547, "native_id": "5c2bc4335c8860342ec2d568ceb6ac6b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.742256164550781, "incorrect_loss_raw": 11.752206087112427, "correct_loss_per_char": 0.6380284627278646, "incorrect_loss_per_char": 0.9611074549010676, "correct_loss_per_token": 2.8711280822753906, "incorrect_loss_per_token": 7.831190824508667, "correct_loss_uncond": -9.807360649108887, "incorrect_loss_uncond": -4.172993421554565}, "model_output": [{"sum_logits": -8.139293670654297, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.202109336853027, "logits_per_token": -8.139293670654297, "logits_per_char": -0.6260995131272536, "num_chars": 13}, {"sum_logits": -11.315542221069336, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.777209281921387, "logits_per_token": -5.657771110534668, "logits_per_char": -1.1315542221069337, "num_chars": 10}, {"sum_logits": -5.742256164550781, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.549616813659668, "logits_per_token": -2.8711280822753906, "logits_per_char": -0.6380284627278646, "num_chars": 9}, {"sum_logits": -20.052579879760742, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.641115188598633, "logits_per_token": -10.026289939880371, "logits_per_char": -1.2532862424850464, "num_chars": 16}, {"sum_logits": -7.501408576965332, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.080364227294922, "logits_per_token": -7.501408576965332, "logits_per_char": -0.8334898418850369, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 548, "native_id": "083861fc5ebb9226fff70544f3f83d2b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.627918004989624, "incorrect_loss_raw": 10.210021734237671, "correct_loss_per_char": 0.3754168578556606, "incorrect_loss_per_char": 0.8174472190630742, "correct_loss_per_token": 2.627918004989624, "incorrect_loss_per_token": 5.725165545940399, "correct_loss_uncond": -13.004318475723267, "incorrect_loss_uncond": -8.461848974227905}, "model_output": [{"sum_logits": -2.627918004989624, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.63223648071289, "logits_per_token": -2.627918004989624, "logits_per_char": -0.3754168578556606, "num_chars": 7}, {"sum_logits": -10.752037048339844, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.37601089477539, "logits_per_token": -5.376018524169922, "logits_per_char": -0.716802469889323, "num_chars": 15}, {"sum_logits": -7.992996692657471, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.726016998291016, "logits_per_token": -3.9964983463287354, "logits_per_char": -0.6148458994351901, "num_chars": 13}, {"sum_logits": -17.13381576538086, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.439311981201172, "logits_per_token": -8.56690788269043, "logits_per_char": -1.31798582810622, "num_chars": 13}, {"sum_logits": -4.96123743057251, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.146142959594727, "logits_per_token": -4.96123743057251, "logits_per_char": -0.6201546788215637, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 549, "native_id": "520b0eea9148e3cb4d45aa69a55491eb", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 1.2536439895629883, "incorrect_loss_raw": 7.270927906036377, "correct_loss_per_char": 0.0964341530433068, "incorrect_loss_per_char": 0.7373436965123572, "correct_loss_per_token": 1.2536439895629883, "incorrect_loss_per_token": 6.242769479751587, "correct_loss_uncond": -11.948465347290039, "incorrect_loss_uncond": -8.119057655334473}, "model_output": [{"sum_logits": -8.22526741027832, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.065330505371094, "logits_per_token": -4.11263370513916, "logits_per_char": -0.9139186011420356, "num_chars": 9}, {"sum_logits": -1.752279281616211, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.04529857635498, "logits_per_token": -1.752279281616211, "logits_per_char": -0.21903491020202637, "num_chars": 8}, {"sum_logits": -10.36147403717041, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.968234062194824, "logits_per_token": -10.36147403717041, "logits_per_char": -0.94195218519731, "num_chars": 11}, {"sum_logits": -8.744690895080566, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.4810791015625, "logits_per_token": -8.744690895080566, "logits_per_char": -0.8744690895080567, "num_chars": 10}, {"sum_logits": -1.2536439895629883, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -13.202109336853027, "logits_per_token": -1.2536439895629883, "logits_per_char": -0.0964341530433068, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 550, "native_id": "ef6ede0af827ddd1dc7bbeb36a6fdd22", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.936275959014893, "incorrect_loss_raw": 7.943674921989441, "correct_loss_per_char": 0.7706973287794325, "incorrect_loss_per_char": 0.9531666749721759, "correct_loss_per_token": 3.4681379795074463, "incorrect_loss_per_token": 4.527366638183594, "correct_loss_uncond": -10.907470226287842, "incorrect_loss_uncond": -6.815383553504944}, "model_output": [{"sum_logits": -6.936275959014893, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.843746185302734, "logits_per_token": -3.4681379795074463, "logits_per_char": -0.7706973287794325, "num_chars": 9}, {"sum_logits": -4.444233417510986, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.506343841552734, "logits_per_token": -4.444233417510986, "logits_per_char": -0.8888466835021973, "num_chars": 5}, {"sum_logits": -6.942296981811523, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.633387565612793, "logits_per_token": -3.4711484909057617, "logits_per_char": -0.6311179074374113, "num_chars": 11}, {"sum_logits": -10.986451148986816, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.977118492126465, "logits_per_token": -5.493225574493408, "logits_per_char": -1.569493021283831, "num_chars": 7}, {"sum_logits": -9.401718139648438, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.919384002685547, "logits_per_token": -4.700859069824219, "logits_per_char": -0.7232090876652644, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 551, "native_id": "d47986deb91d64b2b15d385da3d2f483", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.487624168395996, "incorrect_loss_raw": 11.136510491371155, "correct_loss_per_char": 0.49927200990564685, "incorrect_loss_per_char": 1.466219816655338, "correct_loss_per_token": 4.243812084197998, "incorrect_loss_per_token": 7.074340760707855, "correct_loss_uncond": -10.804678916931152, "incorrect_loss_uncond": -5.208318591117859}, "model_output": [{"sum_logits": -12.048684120178223, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.017801284790039, "logits_per_token": -12.048684120178223, "logits_per_char": -2.4097368240356447, "num_chars": 5}, {"sum_logits": -7.241270542144775, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.730409622192383, "logits_per_token": -3.6206352710723877, "logits_per_char": -0.4259570907143986, "num_chars": 17}, {"sum_logits": -9.724295616149902, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.47580909729004, "logits_per_token": -4.862147808074951, "logits_per_char": -0.8103579680124918, "num_chars": 12}, {"sum_logits": -15.531791687011719, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.155296325683594, "logits_per_token": -7.765895843505859, "logits_per_char": -2.218827383858817, "num_chars": 7}, {"sum_logits": -8.487624168395996, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.29230308532715, "logits_per_token": -4.243812084197998, "logits_per_char": -0.49927200990564685, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 552, "native_id": "c3b7f4196b12714940ac1b9417194df4", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.151294708251953, "incorrect_loss_raw": 8.658553123474121, "correct_loss_per_char": 1.021613529750279, "incorrect_loss_per_char": 1.0862268871731229, "correct_loss_per_token": 7.151294708251953, "incorrect_loss_per_token": 7.192084908485413, "correct_loss_uncond": -7.847454071044922, "incorrect_loss_uncond": -5.730123281478882}, "model_output": [{"sum_logits": -7.015413284301758, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -10.333660125732422, "logits_per_token": -7.015413284301758, "logits_per_char": -1.1692355473836262, "num_chars": 6}, {"sum_logits": -9.144033432006836, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.903160095214844, "logits_per_token": -9.144033432006836, "logits_per_char": -1.5240055720011394, "num_chars": 6}, {"sum_logits": -11.731745719909668, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.424861907958984, "logits_per_token": -5.865872859954834, "logits_per_char": -0.9024419784545898, "num_chars": 13}, {"sum_logits": -7.151294708251953, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.998748779296875, "logits_per_token": -7.151294708251953, "logits_per_char": -1.021613529750279, "num_chars": 7}, {"sum_logits": -6.743020057678223, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.893023490905762, "logits_per_token": -6.743020057678223, "logits_per_char": -0.7492244508531358, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 553, "native_id": "5d03ad171fd661a28da5b6eb79967a6b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.946495056152344, "incorrect_loss_raw": 9.321755647659302, "correct_loss_per_char": 0.842038081242488, "incorrect_loss_per_char": 0.8359484597072973, "correct_loss_per_token": 5.473247528076172, "incorrect_loss_per_token": 5.587245345115662, "correct_loss_uncond": -8.35470962524414, "incorrect_loss_uncond": -9.043996334075928}, "model_output": [{"sum_logits": -3.87689208984375, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.7811222076416, "logits_per_token": -1.938446044921875, "logits_per_char": -0.3524447354403409, "num_chars": 11}, {"sum_logits": -15.168362617492676, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.134624481201172, "logits_per_token": -7.584181308746338, "logits_per_char": -0.9480226635932922, "num_chars": 16}, {"sum_logits": -10.946495056152344, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.301204681396484, "logits_per_token": -5.473247528076172, "logits_per_char": -0.842038081242488, "num_chars": 13}, {"sum_logits": -7.410940170288086, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.768898963928223, "logits_per_token": -7.410940170288086, "logits_per_char": -1.0587057386125838, "num_chars": 7}, {"sum_logits": -10.830827713012695, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.778362274169922, "logits_per_token": -5.415413856506348, "logits_per_char": -0.9846207011829723, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 554, "native_id": "7c95d753943c58757fe6e1ccff8aea14", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.420899391174316, "incorrect_loss_raw": 11.476300239562988, "correct_loss_per_char": 0.3388062119483948, "incorrect_loss_per_char": 0.8213785710868301, "correct_loss_per_token": 1.8069664637247722, "incorrect_loss_per_token": 6.096186518669128, "correct_loss_uncond": -12.067063331604004, "incorrect_loss_uncond": -6.816079378128052}, "model_output": [{"sum_logits": -15.701013565063477, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -22.463226318359375, "logits_per_token": -5.233671188354492, "logits_per_char": -0.6542088985443115, "num_chars": 24}, {"sum_logits": -13.452558517456055, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.959211349487305, "logits_per_token": -6.726279258728027, "logits_per_char": -1.0348121936504657, "num_chars": 13}, {"sum_logits": -5.420899391174316, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -17.48796272277832, "logits_per_token": -1.8069664637247722, "logits_per_char": -0.3388062119483948, "num_chars": 16}, {"sum_logits": -8.653666496276855, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -18.214515686035156, "logits_per_token": -4.326833248138428, "logits_per_char": -0.7866969542069868, "num_chars": 11}, {"sum_logits": -8.097962379455566, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.532565116882324, "logits_per_token": -8.097962379455566, "logits_per_char": -0.8097962379455567, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 555, "native_id": "88d8bfb9dc8e77ef642acbe1a129f3db", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.9103989601135254, "incorrect_loss_raw": 9.582300305366516, "correct_loss_per_char": 0.39103989601135253, "incorrect_loss_per_char": 0.9650058441691928, "correct_loss_per_token": 1.9551994800567627, "incorrect_loss_per_token": 4.791150152683258, "correct_loss_uncond": -10.92688512802124, "incorrect_loss_uncond": -6.854080557823181}, "model_output": [{"sum_logits": -15.353983879089355, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -18.789621353149414, "logits_per_token": -7.676991939544678, "logits_per_char": -1.7059982087877061, "num_chars": 9}, {"sum_logits": -11.067547798156738, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -15.671432495117188, "logits_per_token": -5.533773899078369, "logits_per_char": -1.2297275331285265, "num_chars": 9}, {"sum_logits": -4.080489635467529, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -15.523374557495117, "logits_per_token": -2.0402448177337646, "logits_per_char": -0.272032642364502, "num_chars": 15}, {"sum_logits": -3.9103989601135254, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -14.837284088134766, "logits_per_token": -1.9551994800567627, "logits_per_char": -0.39103989601135253, "num_chars": 10}, {"sum_logits": -7.827179908752441, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -15.76109504699707, "logits_per_token": -3.9135899543762207, "logits_per_char": -0.6522649923960367, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 556, "native_id": "b1a9b20793b46e46e1beedadbf852f84", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.2226333618164062, "incorrect_loss_raw": 9.732776522636414, "correct_loss_per_char": 0.2778291702270508, "incorrect_loss_per_char": 0.9557068098159063, "correct_loss_per_token": 2.2226333618164062, "incorrect_loss_per_token": 6.3437498807907104, "correct_loss_uncond": -10.733881950378418, "incorrect_loss_uncond": -6.291056513786316}, "model_output": [{"sum_logits": -2.2226333618164062, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.956515312194824, "logits_per_token": -2.2226333618164062, "logits_per_char": -0.2778291702270508, "num_chars": 8}, {"sum_logits": -12.672649383544922, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.91778564453125, "logits_per_token": -6.336324691772461, "logits_per_char": -0.6034594944545201, "num_chars": 21}, {"sum_logits": -10.829672813415527, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.4459285736084, "logits_per_token": -3.609890937805176, "logits_per_char": -0.6016484896341959, "num_chars": 18}, {"sum_logits": -8.190656661987305, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.215434074401855, "logits_per_token": -8.190656661987305, "logits_per_char": -1.1700938088553292, "num_chars": 7}, {"sum_logits": -7.2381272315979, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.516183853149414, "logits_per_token": -7.2381272315979, "logits_per_char": -1.4476254463195801, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 557, "native_id": "81e016974d33fe383c848b6c819791cd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.991060256958008, "incorrect_loss_raw": 9.184573650360107, "correct_loss_per_char": 0.748882532119751, "incorrect_loss_per_char": 0.9504014166442307, "correct_loss_per_token": 5.991060256958008, "incorrect_loss_per_token": 6.495660185813904, "correct_loss_uncond": -6.850598335266113, "incorrect_loss_uncond": -5.7411229610443115}, "model_output": [{"sum_logits": -5.991060256958008, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.841658592224121, "logits_per_token": -5.991060256958008, "logits_per_char": -0.748882532119751, "num_chars": 8}, {"sum_logits": -6.940163612365723, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.311802864074707, "logits_per_token": -6.940163612365723, "logits_per_char": -0.6940163612365723, "num_chars": 10}, {"sum_logits": -9.90543270111084, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.977118492126465, "logits_per_token": -4.95271635055542, "logits_per_char": -1.4150618144444056, "num_chars": 7}, {"sum_logits": -8.286823272705078, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.621533393859863, "logits_per_token": -8.286823272705078, "logits_per_char": -0.6374479440542368, "num_chars": 13}, {"sum_logits": -11.605875015258789, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.79233169555664, "logits_per_token": -5.8029375076293945, "logits_per_char": -1.055079546841708, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 558, "native_id": "7cf54544d54818d53e7088c0749a3eca", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.898031234741211, "incorrect_loss_raw": 12.507210969924927, "correct_loss_per_char": 0.5932020823160807, "incorrect_loss_per_char": 0.7973730011313569, "correct_loss_per_token": 4.4490156173706055, "incorrect_loss_per_token": 4.874303698539734, "correct_loss_uncond": -13.086082458496094, "incorrect_loss_uncond": -6.520608186721802}, "model_output": [{"sum_logits": -9.67302131652832, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.648754119873047, "logits_per_token": -4.83651065826416, "logits_per_char": -0.60456383228302, "num_chars": 16}, {"sum_logits": -13.975654602050781, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.833324432373047, "logits_per_token": -6.987827301025391, "logits_per_char": -0.9317103068033854, "num_chars": 15}, {"sum_logits": -8.898031234741211, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.984113693237305, "logits_per_token": -4.4490156173706055, "logits_per_char": -0.5932020823160807, "num_chars": 15}, {"sum_logits": -13.446149826049805, "num_tokens": 4, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.80242156982422, "logits_per_token": -3.361537456512451, "logits_per_char": -0.7909499897676355, "num_chars": 17}, {"sum_logits": -12.9340181350708, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.8267765045166, "logits_per_token": -4.311339378356934, "logits_per_char": -0.8622678756713867, "num_chars": 15}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 559, "native_id": "6acd88b9b5dd15e23bbcc3fd679100a8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.2013375759124756, "incorrect_loss_raw": 11.83448350429535, "correct_loss_per_char": 0.14675583839416503, "incorrect_loss_per_char": 1.2217267821816837, "correct_loss_per_token": 2.2013375759124756, "incorrect_loss_per_token": 7.704359292984009, "correct_loss_uncond": -11.669903993606567, "incorrect_loss_uncond": -4.716262698173523}, "model_output": [{"sum_logits": -2.2013375759124756, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": true, "sum_logits_uncond": -13.871241569519043, "logits_per_token": -2.2013375759124756, "logits_per_char": -0.14675583839416503, "num_chars": 15}, {"sum_logits": -4.388272762298584, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.027542114257812, "logits_per_token": -4.388272762298584, "logits_per_char": -0.4875858624776204, "num_chars": 9}, {"sum_logits": -13.508265495300293, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.082138061523438, "logits_per_token": -6.7541327476501465, "logits_per_char": -0.7946038526647231, "num_chars": 17}, {"sum_logits": -9.90866756439209, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.564339637756348, "logits_per_token": -9.90866756439209, "logits_per_char": -1.6514445940653484, "num_chars": 6}, {"sum_logits": -19.53272819519043, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -22.52896499633789, "logits_per_token": -9.766364097595215, "logits_per_char": -1.953272819519043, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 560, "native_id": "c96a86957a9ab1d8ca0aeeb7f040d87a_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 2.8955235481262207, "incorrect_loss_raw": 5.926567494869232, "correct_loss_per_char": 0.2895523548126221, "incorrect_loss_per_char": 0.9310146254442986, "correct_loss_per_token": 2.8955235481262207, "incorrect_loss_per_token": 5.926567494869232, "correct_loss_uncond": -11.516890048980713, "incorrect_loss_uncond": -7.515396416187286}, "model_output": [{"sum_logits": -6.0598039627075195, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.181896209716797, "logits_per_token": -6.0598039627075195, "logits_per_char": -0.8656862803867885, "num_chars": 7}, {"sum_logits": -2.8955235481262207, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.412413597106934, "logits_per_token": -2.8955235481262207, "logits_per_char": -0.2895523548126221, "num_chars": 10}, {"sum_logits": -8.192712783813477, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -8.192712783813477, "logits_per_char": -1.3654521306355794, "num_chars": 6}, {"sum_logits": -1.9849307537078857, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.023554801940918, "logits_per_token": -1.9849307537078857, "logits_per_char": -0.24811634421348572, "num_chars": 8}, {"sum_logits": -7.468822479248047, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.672751426696777, "logits_per_token": -7.468822479248047, "logits_per_char": -1.244803746541341, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 561, "native_id": "6a1bf527af9ed0685ac5e2bf0bd76647", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.671220541000366, "incorrect_loss_raw": 11.170818567276001, "correct_loss_per_char": 0.3339025676250458, "incorrect_loss_per_char": 1.078245980660994, "correct_loss_per_token": 2.671220541000366, "incorrect_loss_per_token": 8.21197760105133, "correct_loss_uncond": -11.813904047012329, "incorrect_loss_uncond": -4.792337656021118}, "model_output": [{"sum_logits": -11.891451835632324, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.792903900146484, "logits_per_token": -11.891451835632324, "logits_per_char": -1.1891451835632325, "num_chars": 10}, {"sum_logits": -2.671220541000366, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.485124588012695, "logits_per_token": -2.671220541000366, "logits_per_char": -0.3339025676250458, "num_chars": 8}, {"sum_logits": -9.62614631652832, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.956235885620117, "logits_per_token": -4.81307315826416, "logits_per_char": -0.7404727935791016, "num_chars": 13}, {"sum_logits": -14.044581413269043, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.104385375976562, "logits_per_token": -7.0222907066345215, "logits_per_char": -1.080352416405311, "num_chars": 13}, {"sum_logits": -9.121094703674316, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.999099731445312, "logits_per_token": -9.121094703674316, "logits_per_char": -1.303013529096331, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 562, "native_id": "094fe91b20b03c647325fa2ee94470b3", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.02702522277832, "incorrect_loss_raw": 8.938491582870483, "correct_loss_per_char": 1.00337815284729, "incorrect_loss_per_char": 0.9993251344514271, "correct_loss_per_token": 4.01351261138916, "incorrect_loss_per_token": 5.0429723262786865, "correct_loss_uncond": -7.670519828796387, "incorrect_loss_uncond": -8.084067821502686}, "model_output": [{"sum_logits": -5.95755672454834, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.753787994384766, "logits_per_token": -2.97877836227417, "logits_per_char": -0.8510795320783343, "num_chars": 7}, {"sum_logits": -8.02702522277832, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.697545051574707, "logits_per_token": -4.01351261138916, "logits_per_char": -1.00337815284729, "num_chars": 8}, {"sum_logits": -11.167181968688965, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.7729434967041, "logits_per_token": -5.583590984344482, "logits_per_char": -0.9305984973907471, "num_chars": 12}, {"sum_logits": -14.03941535949707, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.253070831298828, "logits_per_token": -7.019707679748535, "logits_per_char": -1.559935039944119, "num_chars": 9}, {"sum_logits": -4.589812278747559, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.31043529510498, "logits_per_token": -4.589812278747559, "logits_per_char": -0.6556874683925084, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 563, "native_id": "bee2a6eadfaf7a4fa0a214e341ddbe5b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.654539942741394, "incorrect_loss_raw": 10.106183648109436, "correct_loss_per_char": 0.09350570610591344, "incorrect_loss_per_char": 1.2417671793338023, "correct_loss_per_token": 0.654539942741394, "incorrect_loss_per_token": 10.106183648109436, "correct_loss_uncond": -12.330695271492004, "incorrect_loss_uncond": -3.3430756330490112}, "model_output": [{"sum_logits": -0.654539942741394, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": true, "sum_logits_uncond": -12.985235214233398, "logits_per_token": -0.654539942741394, "logits_per_char": -0.09350570610591344, "num_chars": 7}, {"sum_logits": -11.833693504333496, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.893854141235352, "logits_per_token": -11.833693504333496, "logits_per_char": -1.9722822507222493, "num_chars": 6}, {"sum_logits": -4.2262959480285645, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.357250213623047, "logits_per_token": -4.2262959480285645, "logits_per_char": -0.5282869935035706, "num_chars": 8}, {"sum_logits": -11.914374351501465, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.349780082702637, "logits_per_token": -11.914374351501465, "logits_per_char": -1.0831249410455877, "num_chars": 11}, {"sum_logits": -12.450370788574219, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.196152687072754, "logits_per_token": -12.450370788574219, "logits_per_char": -1.383374532063802, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 564, "native_id": "2f97a77d155cb99092e8a7c055737b03_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.438924312591553, "incorrect_loss_raw": 17.595309734344482, "correct_loss_per_char": 0.8048655390739441, "incorrect_loss_per_char": 1.3671559625803824, "correct_loss_per_token": 2.146308104197184, "incorrect_loss_per_token": 6.484018484751383, "correct_loss_uncond": -9.375339984893799, "incorrect_loss_uncond": -2.3539600372314453}, "model_output": [{"sum_logits": -11.409139633178711, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.835020065307617, "logits_per_token": -3.803046544392904, "logits_per_char": -1.2676821814643011, "num_chars": 9}, {"sum_logits": -21.750661849975586, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.0910587310791, "logits_per_token": -7.250220616658528, "logits_per_char": -1.3594163656234741, "num_chars": 16}, {"sum_logits": -22.36747169494629, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.312129974365234, "logits_per_token": -7.45582389831543, "logits_per_char": -1.491164779663086, "num_chars": 15}, {"sum_logits": -6.438924312591553, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.814264297485352, "logits_per_token": -2.146308104197184, "logits_per_char": -0.8048655390739441, "num_chars": 8}, {"sum_logits": -14.853965759277344, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.558870315551758, "logits_per_token": -7.426982879638672, "logits_per_char": -1.3503605235706677, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 565, "native_id": "bc268cd19e2c95c78967fd6b9092fb90", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.773966312408447, "incorrect_loss_raw": 11.190542459487915, "correct_loss_per_char": 0.6158151193098589, "incorrect_loss_per_char": 1.2558207909266152, "correct_loss_per_token": 3.3869831562042236, "incorrect_loss_per_token": 5.5952712297439575, "correct_loss_uncond": -13.955743312835693, "incorrect_loss_uncond": -8.969216585159302}, "model_output": [{"sum_logits": -6.773966312408447, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.72970962524414, "logits_per_token": -3.3869831562042236, "logits_per_char": -0.6158151193098589, "num_chars": 11}, {"sum_logits": -10.183815956115723, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.35382080078125, "logits_per_token": -5.091907978057861, "logits_per_char": -0.8486513296763102, "num_chars": 12}, {"sum_logits": -6.570985794067383, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.04320526123047, "logits_per_token": -3.2854928970336914, "logits_per_char": -1.0951642990112305, "num_chars": 6}, {"sum_logits": -16.858139038085938, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -24.732513427734375, "logits_per_token": -8.429069519042969, "logits_per_char": -1.6858139038085938, "num_chars": 10}, {"sum_logits": -11.149229049682617, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.509496688842773, "logits_per_token": -5.574614524841309, "logits_per_char": -1.3936536312103271, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 566, "native_id": "060cad0d3c007ceb151db9907bfcb214", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.725613594055176, "incorrect_loss_raw": 11.954400539398193, "correct_loss_per_char": 0.6703508496284485, "incorrect_loss_per_char": 1.7449403261527037, "correct_loss_per_token": 5.362806797027588, "incorrect_loss_per_token": 10.000563859939575, "correct_loss_uncond": -9.021546363830566, "incorrect_loss_uncond": -4.368256568908691}, "model_output": [{"sum_logits": -12.38746452331543, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.123125076293945, "logits_per_token": -12.38746452331543, "logits_per_char": -2.477492904663086, "num_chars": 5}, {"sum_logits": -9.740053176879883, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.903160095214844, "logits_per_token": -9.740053176879883, "logits_per_char": -1.6233421961466472, "num_chars": 6}, {"sum_logits": -15.630693435668945, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -23.48581314086914, "logits_per_token": -7.815346717834473, "logits_per_char": -1.2023610335129957, "num_chars": 13}, {"sum_logits": -10.725613594055176, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.747159957885742, "logits_per_token": -5.362806797027588, "logits_per_char": -0.6703508496284485, "num_chars": 16}, {"sum_logits": -10.059391021728516, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.77853012084961, "logits_per_token": -10.059391021728516, "logits_per_char": -1.676565170288086, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 567, "native_id": "29c2cc0ba85b4afb9c9d29801469a68f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.143655776977539, "incorrect_loss_raw": 9.51470148563385, "correct_loss_per_char": 0.9388325554983956, "incorrect_loss_per_char": 0.9260363798765909, "correct_loss_per_token": 4.381218592325847, "incorrect_loss_per_token": 5.074853003025055, "correct_loss_uncond": -7.658821105957031, "incorrect_loss_uncond": -7.17786967754364}, "model_output": [{"sum_logits": -11.383051872253418, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.7302303314209, "logits_per_token": -3.7943506240844727, "logits_per_char": -0.7114407420158386, "num_chars": 16}, {"sum_logits": -13.744369506835938, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.622447967529297, "logits_per_token": -6.872184753417969, "logits_per_char": -1.1453641255696614, "num_chars": 12}, {"sum_logits": -6.597015857696533, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.974644660949707, "logits_per_token": -3.2985079288482666, "logits_per_char": -0.9424308368137905, "num_chars": 7}, {"sum_logits": -13.143655776977539, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.80247688293457, "logits_per_token": -4.381218592325847, "logits_per_char": -0.9388325554983956, "num_chars": 14}, {"sum_logits": -6.334368705749512, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.442961692810059, "logits_per_token": -6.334368705749512, "logits_per_char": -0.9049098151070731, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 568, "native_id": "6cb895ce89995f6be422f7c4167c7638", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.087691307067871, "incorrect_loss_raw": 16.509521007537842, "correct_loss_per_char": 0.6739742755889893, "incorrect_loss_per_char": 1.1739807209411224, "correct_loss_per_token": 4.0438456535339355, "incorrect_loss_per_token": 6.865951617558797, "correct_loss_uncond": -12.055245399475098, "incorrect_loss_uncond": -4.08541202545166}, "model_output": [{"sum_logits": -8.087691307067871, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.14293670654297, "logits_per_token": -4.0438456535339355, "logits_per_char": -0.6739742755889893, "num_chars": 12}, {"sum_logits": -19.93398666381836, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -21.10812759399414, "logits_per_token": -9.96699333190918, "logits_per_char": -1.812180605801669, "num_chars": 11}, {"sum_logits": -12.772684097290039, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.893333435058594, "logits_per_token": -6.3863420486450195, "logits_per_char": -0.9123345783778599, "num_chars": 14}, {"sum_logits": -18.29022216796875, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.910371780395508, "logits_per_token": -6.09674072265625, "logits_per_char": -1.21934814453125, "num_chars": 15}, {"sum_logits": -15.041191101074219, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -22.467899322509766, "logits_per_token": -5.013730367024739, "logits_per_char": -0.7520595550537109, "num_chars": 20}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 569, "native_id": "839f3c37622c1ed5eebc9cd0b9d658e8", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.319204330444336, "incorrect_loss_raw": 7.46067214012146, "correct_loss_per_char": 0.457450270652771, "incorrect_loss_per_char": 1.0968712230523425, "correct_loss_per_token": 1.829801082611084, "incorrect_loss_per_token": 7.46067214012146, "correct_loss_uncond": -14.152933120727539, "incorrect_loss_uncond": -5.3320631980896}, "model_output": [{"sum_logits": -9.36247444152832, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.022502899169922, "logits_per_token": -9.36247444152832, "logits_per_char": -1.040274937947591, "num_chars": 9}, {"sum_logits": -7.204583644866943, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.740178108215332, "logits_per_token": -7.204583644866943, "logits_per_char": -1.2007639408111572, "num_chars": 6}, {"sum_logits": -6.493229389190674, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.10049819946289, "logits_per_token": -6.493229389190674, "logits_per_char": -1.2986458778381347, "num_chars": 5}, {"sum_logits": -7.319204330444336, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.472137451171875, "logits_per_token": -1.829801082611084, "logits_per_char": -0.457450270652771, "num_chars": 16}, {"sum_logits": -6.782401084899902, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.307762145996094, "logits_per_token": -6.782401084899902, "logits_per_char": -0.8478001356124878, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 570, "native_id": "3957ac6bab96fc9d4f173ada4692d16b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.274853229522705, "incorrect_loss_raw": 15.408840656280518, "correct_loss_per_char": 0.26374266147613523, "incorrect_loss_per_char": 1.7039206345876057, "correct_loss_per_token": 1.7582844098409016, "incorrect_loss_per_token": 9.088783979415894, "correct_loss_uncond": -17.19304609298706, "incorrect_loss_uncond": -2.741173028945923}, "model_output": [{"sum_logits": -5.274853229522705, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.467899322509766, "logits_per_token": -1.7582844098409016, "logits_per_char": -0.26374266147613523, "num_chars": 20}, {"sum_logits": -15.54306411743164, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.10812759399414, "logits_per_token": -7.77153205871582, "logits_per_char": -1.4130058288574219, "num_chars": 11}, {"sum_logits": -18.84264373779297, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.14293670654297, "logits_per_token": -9.421321868896484, "logits_per_char": -1.5702203114827473, "num_chars": 12}, {"sum_logits": -16.174745559692383, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.00286102294922, "logits_per_token": -8.087372779846191, "logits_per_char": -1.6174745559692383, "num_chars": 10}, {"sum_logits": -11.074909210205078, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.346129417419434, "logits_per_token": -11.074909210205078, "logits_per_char": -2.2149818420410154, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 571, "native_id": "a4f5e5412f0f8ac9190db1730db07a90", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.7834553718566895, "incorrect_loss_raw": 15.487718105316162, "correct_loss_per_char": 0.4819546143213908, "incorrect_loss_per_char": 1.2835282675152324, "correct_loss_per_token": 2.8917276859283447, "incorrect_loss_per_token": 7.612047433853149, "correct_loss_uncond": -10.582789897918701, "incorrect_loss_uncond": -3.9973676204681396}, "model_output": [{"sum_logits": -25.06390380859375, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -27.366832733154297, "logits_per_token": -6.2659759521484375, "logits_per_char": -2.278536709872159, "num_chars": 11}, {"sum_logits": -11.477458953857422, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.888142585754395, "logits_per_token": -11.477458953857422, "logits_per_char": -1.4346823692321777, "num_chars": 8}, {"sum_logits": -11.915962219238281, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.12929916381836, "logits_per_token": -5.957981109619141, "logits_per_char": -0.6271559062756991, "num_chars": 19}, {"sum_logits": -5.7834553718566895, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.36624526977539, "logits_per_token": -2.8917276859283447, "logits_per_char": -0.4819546143213908, "num_chars": 12}, {"sum_logits": -13.493547439575195, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.556068420410156, "logits_per_token": -6.746773719787598, "logits_per_char": -0.7937380846808938, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 572, "native_id": "cb5b39878be0e05a3ffe783801adbc3b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.748617172241211, "incorrect_loss_raw": 11.075027704238892, "correct_loss_per_char": 1.1247695287068684, "incorrect_loss_per_char": 1.1598821537835258, "correct_loss_per_token": 6.748617172241211, "incorrect_loss_per_token": 11.075027704238892, "correct_loss_uncond": -6.756304740905762, "incorrect_loss_uncond": -1.838977575302124}, "model_output": [{"sum_logits": -12.1184720993042, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.184654235839844, "logits_per_token": -12.1184720993042, "logits_per_char": -1.0098726749420166, "num_chars": 12}, {"sum_logits": -6.748617172241211, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.504921913146973, "logits_per_token": -6.748617172241211, "logits_per_char": -1.1247695287068684, "num_chars": 6}, {"sum_logits": -7.453417778015137, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -10.994718551635742, "logits_per_token": -7.453417778015137, "logits_per_char": -1.8633544445037842, "num_chars": 4}, {"sum_logits": -12.51973819732666, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.641528129577637, "logits_per_token": -12.51973819732666, "logits_per_char": -0.8942670140947614, "num_chars": 14}, {"sum_logits": -12.20848274230957, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.83512020111084, "logits_per_token": -12.20848274230957, "logits_per_char": -0.8720344815935407, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 573, "native_id": "985a4f1a3f31f1ba6654f4fc48f504df", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.1167893409729004, "incorrect_loss_raw": 10.032060146331787, "correct_loss_per_char": 0.38959866762161255, "incorrect_loss_per_char": 1.2445456862449646, "correct_loss_per_token": 1.5583946704864502, "incorrect_loss_per_token": 7.634880900382996, "correct_loss_uncond": -14.527059078216553, "incorrect_loss_uncond": -5.441713571548462}, "model_output": [{"sum_logits": -8.760872840881348, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.779285430908203, "logits_per_token": -4.380436420440674, "logits_per_char": -0.8760872840881347, "num_chars": 10}, {"sum_logits": -15.062959671020508, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.023900032043457, "logits_per_token": -15.062959671020508, "logits_per_char": -1.8828699588775635, "num_chars": 8}, {"sum_logits": -10.416561126708984, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.588943481445312, "logits_per_token": -5.208280563354492, "logits_per_char": -1.0416561126708985, "num_chars": 10}, {"sum_logits": -3.1167893409729004, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.643848419189453, "logits_per_token": -1.5583946704864502, "logits_per_char": -0.38959866762161255, "num_chars": 8}, {"sum_logits": -5.887846946716309, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.502965927124023, "logits_per_token": -5.887846946716309, "logits_per_char": -1.1775693893432617, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 574, "native_id": "5d687fe9c95436ce84230c996d34382d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.929707527160645, "incorrect_loss_raw": 7.9378474950790405, "correct_loss_per_char": 0.9108089605967203, "incorrect_loss_per_char": 0.9784264759583906, "correct_loss_per_token": 5.464853763580322, "incorrect_loss_per_token": 6.143822133541107, "correct_loss_uncond": -8.997221946716309, "incorrect_loss_uncond": -7.229653239250183}, "model_output": [{"sum_logits": -6.477419853210449, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -11.611862182617188, "logits_per_token": -6.477419853210449, "logits_per_char": -1.2954839706420898, "num_chars": 5}, {"sum_logits": -6.830298900604248, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.76109504699707, "logits_per_token": -3.415149450302124, "logits_per_char": -0.569191575050354, "num_chars": 12}, {"sum_logits": -10.929707527160645, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.926929473876953, "logits_per_token": -5.464853763580322, "logits_per_char": -0.9108089605967203, "num_chars": 12}, {"sum_logits": -10.921767234802246, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.96692943572998, "logits_per_token": -10.921767234802246, "logits_per_char": -1.3652209043502808, "num_chars": 8}, {"sum_logits": -7.521903991699219, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -17.330116271972656, "logits_per_token": -3.7609519958496094, "logits_per_char": -0.683809453790838, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 575, "native_id": "af11faa29097b71141fe192ad019d1dd", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.544029235839844, "incorrect_loss_raw": 11.33568274974823, "correct_loss_per_char": 0.9585481123490767, "incorrect_loss_per_char": 1.3729876251447768, "correct_loss_per_token": 5.272014617919922, "incorrect_loss_per_token": 6.668023308118184, "correct_loss_uncond": -7.35954475402832, "incorrect_loss_uncond": -4.6492356061935425}, "model_output": [{"sum_logits": -10.544029235839844, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.903573989868164, "logits_per_token": -5.272014617919922, "logits_per_char": -0.9585481123490767, "num_chars": 11}, {"sum_logits": -19.057947158813477, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -21.26119613647461, "logits_per_token": -6.352649052937825, "logits_per_char": -1.3612819399152483, "num_chars": 14}, {"sum_logits": -6.944617748260498, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.144001007080078, "logits_per_token": -6.944617748260498, "logits_per_char": -1.1574362913767497, "num_chars": 6}, {"sum_logits": -11.930679321289062, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.281843185424805, "logits_per_token": -5.965339660644531, "logits_per_char": -1.4913349151611328, "num_chars": 8}, {"sum_logits": -7.409486770629883, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.252633094787598, "logits_per_token": -7.409486770629883, "logits_per_char": -1.4818973541259766, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 576, "native_id": "07fd8b0aed06406fedb137d11b07a890", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.1127851009368896, "incorrect_loss_raw": 9.819184422492981, "correct_loss_per_char": 0.31127851009368895, "incorrect_loss_per_char": 1.0421806782011, "correct_loss_per_token": 3.1127851009368896, "incorrect_loss_per_token": 5.640896737575531, "correct_loss_uncond": -13.102138757705688, "incorrect_loss_uncond": -7.294887661933899}, "model_output": [{"sum_logits": -16.799976348876953, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.204669952392578, "logits_per_token": -8.399988174438477, "logits_per_char": -1.8666640387641058, "num_chars": 9}, {"sum_logits": -11.708870887756348, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.781972885131836, "logits_per_token": -5.854435443878174, "logits_per_char": -0.9757392406463623, "num_chars": 12}, {"sum_logits": -4.917454242706299, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.774185180664062, "logits_per_token": -2.4587271213531494, "logits_per_char": -0.35124673162187847, "num_chars": 14}, {"sum_logits": -3.1127851009368896, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.214923858642578, "logits_per_token": -3.1127851009368896, "logits_per_char": -0.31127851009368895, "num_chars": 10}, {"sum_logits": -5.850436210632324, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.695460319519043, "logits_per_token": -5.850436210632324, "logits_per_char": -0.9750727017720541, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 577, "native_id": "7044d82a456d0fa6f0210abb03cbf2c4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.647115230560303, "incorrect_loss_raw": 9.001822233200073, "correct_loss_per_char": 0.6042832027782093, "incorrect_loss_per_char": 1.2709608844348361, "correct_loss_per_token": 3.3235576152801514, "incorrect_loss_per_token": 9.001822233200073, "correct_loss_uncond": -9.815224170684814, "incorrect_loss_uncond": -4.257709264755249}, "model_output": [{"sum_logits": -10.176324844360352, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.920620918273926, "logits_per_token": -10.176324844360352, "logits_per_char": -1.4537606920514787, "num_chars": 7}, {"sum_logits": -10.203143119812012, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.793344497680664, "logits_per_token": -10.203143119812012, "logits_per_char": -0.850261926651001, "num_chars": 12}, {"sum_logits": -6.647115230560303, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.462339401245117, "logits_per_token": -3.3235576152801514, "logits_per_char": -0.6042832027782093, "num_chars": 11}, {"sum_logits": -5.107900619506836, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.135960578918457, "logits_per_token": -5.107900619506836, "logits_per_char": -1.276975154876709, "num_chars": 4}, {"sum_logits": -10.519920349121094, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.188199996948242, "logits_per_token": -10.519920349121094, "logits_per_char": -1.5028457641601562, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 578, "native_id": "e53ba4c7d2a818bdb6001e6924bc8896", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.660327911376953, "incorrect_loss_raw": 10.054266452789307, "correct_loss_per_char": 1.3320655822753906, "incorrect_loss_per_char": 1.168951400121053, "correct_loss_per_token": 6.660327911376953, "incorrect_loss_per_token": 7.430446267127991, "correct_loss_uncond": -6.49891471862793, "incorrect_loss_uncond": -4.781438827514648}, "model_output": [{"sum_logits": -11.050848007202148, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.113682746887207, "logits_per_token": -11.050848007202148, "logits_per_char": -1.8418080012003581, "num_chars": 6}, {"sum_logits": -6.660327911376953, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.159242630004883, "logits_per_token": -6.660327911376953, "logits_per_char": -1.3320655822753906, "num_chars": 5}, {"sum_logits": -9.150598526000977, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.932706832885742, "logits_per_token": -4.575299263000488, "logits_per_char": -0.6100399017333984, "num_chars": 15}, {"sum_logits": -8.17565631866455, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.263617515563965, "logits_per_token": -8.17565631866455, "logits_per_char": -0.9084062576293945, "num_chars": 9}, {"sum_logits": -11.83996295928955, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.032814025878906, "logits_per_token": -5.919981479644775, "logits_per_char": -1.3155514399210613, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 579, "native_id": "ecbc1ab06ad1ed6c53e5293d7a90ebd3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.258872032165527, "incorrect_loss_raw": 11.570793151855469, "correct_loss_per_char": 0.5925722122192383, "incorrect_loss_per_char": 1.3321631675158745, "correct_loss_per_token": 5.629436016082764, "incorrect_loss_per_token": 6.071534474690755, "correct_loss_uncond": -10.53299617767334, "incorrect_loss_uncond": -5.204624652862549}, "model_output": [{"sum_logits": -11.536377906799316, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.254098892211914, "logits_per_token": -5.768188953399658, "logits_per_char": -2.307275581359863, "num_chars": 5}, {"sum_logits": -14.409191131591797, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.814483642578125, "logits_per_token": -4.803063710530599, "logits_per_char": -1.0292279379708427, "num_chars": 14}, {"sum_logits": -7.092166900634766, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.517623901367188, "logits_per_token": -7.092166900634766, "logits_per_char": -0.788018544514974, "num_chars": 9}, {"sum_logits": -11.258872032165527, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.791868209838867, "logits_per_token": -5.629436016082764, "logits_per_char": -0.5925722122192383, "num_chars": 19}, {"sum_logits": -13.245436668395996, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.515464782714844, "logits_per_token": -6.622718334197998, "logits_per_char": -1.2041306062178179, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 580, "native_id": "9a356ff463c042d04ba45bfd627bac20", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.8257873058319092, "incorrect_loss_raw": 7.715183258056641, "correct_loss_per_char": 0.22822341322898865, "incorrect_loss_per_char": 1.2321687426831986, "correct_loss_per_token": 1.8257873058319092, "incorrect_loss_per_token": 7.715183258056641, "correct_loss_uncond": -11.197767496109009, "incorrect_loss_uncond": -5.410396337509155}, "model_output": [{"sum_logits": -10.333614349365234, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -10.333614349365234, "logits_per_char": -2.066722869873047, "num_chars": 5}, {"sum_logits": -3.1270275115966797, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.187535285949707, "logits_per_token": -3.1270275115966797, "logits_per_char": -0.34744750128852, "num_chars": 9}, {"sum_logits": -12.873515129089355, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.241378784179688, "logits_per_token": -12.873515129089355, "logits_per_char": -1.6091893911361694, "num_chars": 8}, {"sum_logits": -1.8257873058319092, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -13.023554801940918, "logits_per_token": -1.8257873058319092, "logits_per_char": -0.22822341322898865, "num_chars": 8}, {"sum_logits": -4.526576042175293, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.245282173156738, "logits_per_token": -4.526576042175293, "logits_per_char": -0.9053152084350586, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 581, "native_id": "0a5c069836784c3d574828d85a20a074", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.670710563659668, "incorrect_loss_raw": 10.05932092666626, "correct_loss_per_char": 0.8892258803049723, "incorrect_loss_per_char": 0.8253379450602965, "correct_loss_per_token": 5.335355281829834, "incorrect_loss_per_token": 5.02966046333313, "correct_loss_uncond": -8.636990547180176, "incorrect_loss_uncond": -7.589363098144531}, "model_output": [{"sum_logits": -7.654665946960449, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -18.118162155151367, "logits_per_token": -3.8273329734802246, "logits_per_char": -0.5888204574584961, "num_chars": 13}, {"sum_logits": -12.579565048217773, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -21.27880096435547, "logits_per_token": -6.289782524108887, "logits_per_char": -1.1435968225652522, "num_chars": 11}, {"sum_logits": -11.503220558166504, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.47633171081543, "logits_per_token": -5.751610279083252, "logits_per_char": -0.7189512848854065, "num_chars": 16}, {"sum_logits": -10.670710563659668, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -19.307701110839844, "logits_per_token": -5.335355281829834, "logits_per_char": -0.8892258803049723, "num_chars": 12}, {"sum_logits": -8.499832153320312, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.721441268920898, "logits_per_token": -4.249916076660156, "logits_per_char": -0.8499832153320312, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 582, "native_id": "f996430ce208606452868fd2e739d409", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 12.417329788208008, "incorrect_loss_raw": 14.77668023109436, "correct_loss_per_char": 1.1288481625643643, "incorrect_loss_per_char": 1.130377303388782, "correct_loss_per_token": 6.208664894104004, "incorrect_loss_per_token": 7.619386355082194, "correct_loss_uncond": -8.424934387207031, "incorrect_loss_uncond": -5.513518810272217}, "model_output": [{"sum_logits": -8.92948055267334, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.81131649017334, "logits_per_token": -8.92948055267334, "logits_per_char": -1.2756400789533342, "num_chars": 7}, {"sum_logits": -12.417329788208008, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.84226417541504, "logits_per_token": -6.208664894104004, "logits_per_char": -1.1288481625643643, "num_chars": 11}, {"sum_logits": -21.243331909179688, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -26.094181060791016, "logits_per_token": -7.0811106363932295, "logits_per_char": -1.1180701004831415, "num_chars": 19}, {"sum_logits": -11.932308197021484, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.572669982910156, "logits_per_token": -5.966154098510742, "logits_per_char": -0.994359016418457, "num_chars": 12}, {"sum_logits": -17.00160026550293, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.682628631591797, "logits_per_token": -8.500800132751465, "logits_per_char": -1.1334400177001953, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 583, "native_id": "26c854d933d2115e7636fdcde57eb463", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.733443021774292, "incorrect_loss_raw": 11.05626130104065, "correct_loss_per_char": 0.13334177090571478, "incorrect_loss_per_char": 1.784234402860914, "correct_loss_per_token": 0.866721510887146, "incorrect_loss_per_token": 11.05626130104065, "correct_loss_uncond": -17.86243176460266, "incorrect_loss_uncond": -2.9013686180114746}, "model_output": [{"sum_logits": -13.323709487915039, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.00421142578125, "logits_per_token": -13.323709487915039, "logits_per_char": -2.6647418975830077, "num_chars": 5}, {"sum_logits": -13.259176254272461, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.287572860717773, "logits_per_token": -13.259176254272461, "logits_per_char": -1.8941680363246374, "num_chars": 7}, {"sum_logits": -4.9701032638549805, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.132861137390137, "logits_per_token": -4.9701032638549805, "logits_per_char": -0.994020652770996, "num_chars": 5}, {"sum_logits": -12.672056198120117, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.405874252319336, "logits_per_token": -12.672056198120117, "logits_per_char": -1.5840070247650146, "num_chars": 8}, {"sum_logits": -1.733443021774292, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": true, "sum_logits_uncond": -19.595874786376953, "logits_per_token": -0.866721510887146, "logits_per_char": -0.13334177090571478, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 584, "native_id": "83c25b9a5db5f9b3fd1ff6c7453d23d0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.4584505558013916, "incorrect_loss_raw": 11.940154433250427, "correct_loss_per_char": 0.2234955050728538, "incorrect_loss_per_char": 0.8327987716000349, "correct_loss_per_token": 1.2292252779006958, "incorrect_loss_per_token": 4.206627746423085, "correct_loss_uncond": -13.238986730575562, "incorrect_loss_uncond": -8.493788361549377}, "model_output": [{"sum_logits": -6.739592552185059, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.67377471923828, "logits_per_token": -3.3697962760925293, "logits_per_char": -0.6126902320168235, "num_chars": 11}, {"sum_logits": -15.013997077941895, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -22.860706329345703, "logits_per_token": -5.004665692647298, "logits_per_char": -0.9383748173713684, "num_chars": 16}, {"sum_logits": -2.4584505558013916, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -15.697437286376953, "logits_per_token": -1.2292252779006958, "logits_per_char": -0.2234955050728538, "num_chars": 11}, {"sum_logits": -7.801167964935303, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.431522369384766, "logits_per_token": -3.9005839824676514, "logits_per_char": -0.7091970877213911, "num_chars": 11}, {"sum_logits": -18.205860137939453, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.76976776123047, "logits_per_token": -4.551465034484863, "logits_per_char": -1.0709329492905562, "num_chars": 17}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 585, "native_id": "a0d02fc32878efdf0b0d420972943492", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.533479928970337, "incorrect_loss_raw": 7.615256667137146, "correct_loss_per_char": 0.281497769885593, "incorrect_loss_per_char": 1.0100773572921753, "correct_loss_per_token": 1.2667399644851685, "incorrect_loss_per_token": 6.6342180371284485, "correct_loss_uncond": -11.992086172103882, "incorrect_loss_uncond": -6.988644957542419}, "model_output": [{"sum_logits": -7.84830904006958, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.85423755645752, "logits_per_token": -3.92415452003479, "logits_per_char": -0.6540257533391317, "num_chars": 12}, {"sum_logits": -2.533479928970337, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.525566101074219, "logits_per_token": -1.2667399644851685, "logits_per_char": -0.281497769885593, "num_chars": 9}, {"sum_logits": -4.549648284912109, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.400054931640625, "logits_per_token": -4.549648284912109, "logits_per_char": -0.7582747141520182, "num_chars": 6}, {"sum_logits": -9.180062294006348, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.629175186157227, "logits_per_token": -9.180062294006348, "logits_per_char": -1.1475077867507935, "num_chars": 8}, {"sum_logits": -8.883007049560547, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.53213882446289, "logits_per_token": -8.883007049560547, "logits_per_char": -1.4805011749267578, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 586, "native_id": "73fbd2caac2c3786ca810adfe7030273", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.279940605163574, "incorrect_loss_raw": 15.68829607963562, "correct_loss_per_char": 0.7907646619356595, "incorrect_loss_per_char": 1.690797928925399, "correct_loss_per_token": 2.5699851512908936, "incorrect_loss_per_token": 10.98209285736084, "correct_loss_uncond": -10.666655540466309, "incorrect_loss_uncond": -2.6171722412109375}, "model_output": [{"sum_logits": -21.131620407104492, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -22.52896499633789, "logits_per_token": -10.565810203552246, "logits_per_char": -2.113162040710449, "num_chars": 10}, {"sum_logits": -11.581291198730469, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.756300926208496, "logits_per_token": -11.581291198730469, "logits_per_char": -1.4476613998413086, "num_chars": 8}, {"sum_logits": -10.279940605163574, "num_tokens": 4, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -20.946596145629883, "logits_per_token": -2.5699851512908936, "logits_per_char": -0.7907646619356595, "num_chars": 13}, {"sum_logits": -16.51800537109375, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.304725646972656, "logits_per_token": -8.259002685546875, "logits_per_char": -1.2706157977764423, "num_chars": 13}, {"sum_logits": -13.52226734161377, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.631881713867188, "logits_per_token": -13.52226734161377, "logits_per_char": -1.9317524773733956, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 587, "native_id": "6c515b068b4d3aa88a5382224d9b866d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 2.7754640579223633, "incorrect_loss_raw": 8.135031819343567, "correct_loss_per_char": 0.27754640579223633, "incorrect_loss_per_char": 0.884467098362002, "correct_loss_per_token": 2.7754640579223633, "incorrect_loss_per_token": 6.0295163194338475, "correct_loss_uncond": -13.439459800720215, "incorrect_loss_uncond": -9.198698878288269}, "model_output": [{"sum_logits": -7.89526891708374, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -7.89526891708374, "logits_per_char": -1.1278955595833915, "num_chars": 7}, {"sum_logits": -7.485406398773193, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.34377670288086, "logits_per_token": -2.495135466257731, "logits_per_char": -0.4990270932515462, "num_chars": 15}, {"sum_logits": -2.7754640579223633, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.214923858642578, "logits_per_token": -2.7754640579223633, "logits_per_char": -0.27754640579223633, "num_chars": 10}, {"sum_logits": -10.295869827270508, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -10.295869827270508, "logits_per_char": -1.2869837284088135, "num_chars": 8}, {"sum_logits": -6.863582134246826, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.85131072998047, "logits_per_token": -3.431791067123413, "logits_per_char": -0.623962012204257, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 588, "native_id": "0af371b94fb414860b13eea6009ccc31", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.828830718994141, "incorrect_loss_raw": 7.104802429676056, "correct_loss_per_char": 0.48777362278529574, "incorrect_loss_per_char": 0.8318798417136783, "correct_loss_per_token": 2.2762769063313804, "incorrect_loss_per_token": 4.3867663741111755, "correct_loss_uncond": -10.934200286865234, "incorrect_loss_uncond": -9.010696351528168}, "model_output": [{"sum_logits": -11.558164596557617, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -22.230199813842773, "logits_per_token": -5.779082298278809, "logits_per_char": -0.8255831854684013, "num_chars": 14}, {"sum_logits": -10.186123847961426, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.612049102783203, "logits_per_token": -5.093061923980713, "logits_per_char": -1.2732654809951782, "num_chars": 8}, {"sum_logits": -3.189406633377075, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.896347999572754, "logits_per_token": -3.189406633377075, "logits_per_char": -0.5315677722295126, "num_chars": 6}, {"sum_logits": -6.828830718994141, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.763031005859375, "logits_per_token": -2.2762769063313804, "logits_per_char": -0.48777362278529574, "num_chars": 14}, {"sum_logits": -3.4855146408081055, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.723398208618164, "logits_per_token": -3.4855146408081055, "logits_per_char": -0.6971029281616211, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 589, "native_id": "38e61d4be0da46b3cbbd76dc20bce677", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.44289493560791, "incorrect_loss_raw": 10.71263337135315, "correct_loss_per_char": 1.34898499080113, "incorrect_loss_per_char": 0.97072036455548, "correct_loss_per_token": 9.44289493560791, "incorrect_loss_per_token": 6.106413960456848, "correct_loss_uncond": -4.798811912536621, "incorrect_loss_uncond": -8.016608953475952}, "model_output": [{"sum_logits": -10.549470901489258, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -19.616893768310547, "logits_per_token": -5.274735450744629, "logits_per_char": -0.7535336358206612, "num_chars": 14}, {"sum_logits": -13.638221740722656, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -23.021343231201172, "logits_per_token": -4.546073913574219, "logits_per_char": -0.9092147827148438, "num_chars": 15}, {"sum_logits": -10.546852111816406, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.434986114501953, "logits_per_token": -10.546852111816406, "logits_per_char": -1.3183565139770508, "num_chars": 8}, {"sum_logits": -9.44289493560791, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.241706848144531, "logits_per_token": -9.44289493560791, "logits_per_char": -1.34898499080113, "num_chars": 7}, {"sum_logits": -8.115988731384277, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.843746185302734, "logits_per_token": -4.057994365692139, "logits_per_char": -0.9017765257093642, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 590, "native_id": "cebc07bd5080cc72862cb333b10d782d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.8026816844940186, "incorrect_loss_raw": 10.463403582572937, "correct_loss_per_char": 0.42252018716600204, "incorrect_loss_per_char": 1.5562240809202195, "correct_loss_per_token": 1.9013408422470093, "incorrect_loss_per_token": 8.815332770347595, "correct_loss_uncond": -12.544677019119263, "incorrect_loss_uncond": -4.262278199195862}, "model_output": [{"sum_logits": -13.184566497802734, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.90774917602539, "logits_per_token": -6.592283248901367, "logits_per_char": -1.3184566497802734, "num_chars": 10}, {"sum_logits": -6.3168158531188965, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -11.922264099121094, "logits_per_token": -6.3168158531188965, "logits_per_char": -0.7896019816398621, "num_chars": 8}, {"sum_logits": -3.8026816844940186, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -16.34735870361328, "logits_per_token": -1.9013408422470093, "logits_per_char": -0.42252018716600204, "num_chars": 9}, {"sum_logits": -10.608261108398438, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -10.608261108398438, "logits_per_char": -1.7680435180664062, "num_chars": 6}, {"sum_logits": -11.74397087097168, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.001322746276855, "logits_per_token": -11.74397087097168, "logits_per_char": -2.348794174194336, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 591, "native_id": "de0386024f32cdf277a785a851b97544", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.353882789611816, "incorrect_loss_raw": 10.271231651306152, "correct_loss_per_char": 0.8503529808738015, "incorrect_loss_per_char": 0.7073922192497555, "correct_loss_per_token": 4.676941394805908, "incorrect_loss_per_token": 4.170306086540222, "correct_loss_uncond": -8.248435020446777, "incorrect_loss_uncond": -7.508890390396118}, "model_output": [{"sum_logits": -15.444955825805664, "num_tokens": 4, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -23.08909034729004, "logits_per_token": -3.861238956451416, "logits_per_char": -0.8128924118845087, "num_chars": 19}, {"sum_logits": -10.097346305847168, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.837525367736816, "logits_per_token": -5.048673152923584, "logits_per_char": -0.9179405732588335, "num_chars": 11}, {"sum_logits": -9.353882789611816, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.602317810058594, "logits_per_token": -4.676941394805908, "logits_per_char": -0.8503529808738015, "num_chars": 11}, {"sum_logits": -8.827649116516113, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.418149948120117, "logits_per_token": -4.413824558258057, "logits_per_char": -0.679049932039701, "num_chars": 13}, {"sum_logits": -6.714975357055664, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.77572250366211, "logits_per_token": -3.357487678527832, "logits_per_char": -0.419685959815979, "num_chars": 16}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 592, "native_id": "9b62cd7f89716f393239e6c6ff3e11d5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.457175254821777, "incorrect_loss_raw": 15.130153894424438, "correct_loss_per_char": 0.4051977504383434, "incorrect_loss_per_char": 1.330643563559561, "correct_loss_per_token": 2.2285876274108887, "incorrect_loss_per_token": 7.565076947212219, "correct_loss_uncond": -12.957650184631348, "incorrect_loss_uncond": -5.929484128952026}, "model_output": [{"sum_logits": -4.457175254821777, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.414825439453125, "logits_per_token": -2.2285876274108887, "logits_per_char": -0.4051977504383434, "num_chars": 11}, {"sum_logits": -11.729479789733887, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.818565368652344, "logits_per_token": -5.864739894866943, "logits_per_char": -0.7819653193155924, "num_chars": 15}, {"sum_logits": -14.753437995910645, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.051025390625, "logits_per_token": -7.376718997955322, "logits_per_char": -1.3412216359918767, "num_chars": 11}, {"sum_logits": -11.555624961853027, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.271503448486328, "logits_per_token": -5.777812480926514, "logits_per_char": -1.1555624961853028, "num_chars": 10}, {"sum_logits": -22.482072830200195, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -25.097457885742188, "logits_per_token": -11.241036415100098, "logits_per_char": -2.043824802745472, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 593, "native_id": "8b25332de2894ab38784235838d38cec", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.920780658721924, "incorrect_loss_raw": 12.042963027954102, "correct_loss_per_char": 0.6600650548934937, "incorrect_loss_per_char": 1.0438245629206364, "correct_loss_per_token": 3.960390329360962, "incorrect_loss_per_token": 5.908007204532623, "correct_loss_uncond": -9.687324047088623, "incorrect_loss_uncond": -5.815661191940308}, "model_output": [{"sum_logits": -9.727855682373047, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.892474174499512, "logits_per_token": -9.727855682373047, "logits_per_char": -1.3896936689104353, "num_chars": 7}, {"sum_logits": -13.346329689025879, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.77572250366211, "logits_per_token": -6.6731648445129395, "logits_per_char": -0.8341456055641174, "num_chars": 16}, {"sum_logits": -13.61856746673584, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.663850784301758, "logits_per_token": -3.40464186668396, "logits_per_char": -0.9079044977823894, "num_chars": 15}, {"sum_logits": -11.47909927368164, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.102449417114258, "logits_per_token": -3.826366424560547, "logits_per_char": -1.0435544794256038, "num_chars": 11}, {"sum_logits": -7.920780658721924, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.608104705810547, "logits_per_token": -3.960390329360962, "logits_per_char": -0.6600650548934937, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 594, "native_id": "dd4a811d18549f1ae1954cf938b28536", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.280322551727295, "incorrect_loss_raw": 11.337769985198975, "correct_loss_per_char": 0.8971889359610421, "incorrect_loss_per_char": 1.3163042007708081, "correct_loss_per_token": 6.280322551727295, "incorrect_loss_per_token": 8.136133035024006, "correct_loss_uncond": -6.240406513214111, "incorrect_loss_uncond": -4.781437635421753}, "model_output": [{"sum_logits": -6.280322551727295, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.520729064941406, "logits_per_token": -6.280322551727295, "logits_per_char": -0.8971889359610421, "num_chars": 7}, {"sum_logits": -13.23970890045166, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.001758575439453, "logits_per_token": -13.23970890045166, "logits_per_char": -1.8913869857788086, "num_chars": 7}, {"sum_logits": -14.272560119628906, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.653310775756836, "logits_per_token": -4.757520039876302, "logits_per_char": -0.839562359978171, "num_chars": 17}, {"sum_logits": -11.2557954788208, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.903332710266113, "logits_per_token": -11.2557954788208, "logits_per_char": -1.8759659131368, "num_chars": 6}, {"sum_logits": -6.583015441894531, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.918428421020508, "logits_per_token": -3.2915077209472656, "logits_per_char": -0.6583015441894531, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 595, "native_id": "e2ff952c17faf1c56a970502630d4c86", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.7700594663619995, "incorrect_loss_raw": 12.03559410572052, "correct_loss_per_char": 0.045297615668352914, "incorrect_loss_per_char": 1.0483322909800878, "correct_loss_per_token": 0.38502973318099976, "incorrect_loss_per_token": 6.34600555896759, "correct_loss_uncond": -16.23332989215851, "incorrect_loss_uncond": -5.85359799861908}, "model_output": [{"sum_logits": -6.319565296173096, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.973722457885742, "logits_per_token": -6.319565296173096, "logits_per_char": -0.5266304413477579, "num_chars": 12}, {"sum_logits": -11.08169174194336, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.333938598632812, "logits_per_token": -3.693897247314453, "logits_per_char": -1.0074265219948508, "num_chars": 11}, {"sum_logits": -9.679313659667969, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.514259338378906, "logits_per_token": -4.839656829833984, "logits_per_char": -0.7445625892052283, "num_chars": 13}, {"sum_logits": -21.061805725097656, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.734848022460938, "logits_per_token": -10.530902862548828, "logits_per_char": -1.9147096113725142, "num_chars": 11}, {"sum_logits": -0.7700594663619995, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": true, "sum_logits_uncond": -17.003389358520508, "logits_per_token": -0.38502973318099976, "logits_per_char": -0.045297615668352914, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 596, "native_id": "3a6140e475cbbd3ee1da5ba9a6953597_1", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.1930813789367676, "incorrect_loss_raw": 10.721870183944702, "correct_loss_per_char": 0.14913517236709595, "incorrect_loss_per_char": 1.0634086243311565, "correct_loss_per_token": 1.1930813789367676, "incorrect_loss_per_token": 7.053296327590942, "correct_loss_uncond": -11.83047342300415, "incorrect_loss_uncond": -6.336901664733887}, "model_output": [{"sum_logits": -6.239302635192871, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.412413597106934, "logits_per_token": -6.239302635192871, "logits_per_char": -0.6239302635192872, "num_chars": 10}, {"sum_logits": -7.692155838012695, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.672751426696777, "logits_per_token": -7.692155838012695, "logits_per_char": -1.282025973002116, "num_chars": 6}, {"sum_logits": -19.56572723388672, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -26.260269165039062, "logits_per_token": -4.89143180847168, "logits_per_char": -0.7826290893554687, "num_chars": 25}, {"sum_logits": -9.390295028686523, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -9.390295028686523, "logits_per_char": -1.565049171447754, "num_chars": 6}, {"sum_logits": -1.1930813789367676, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -13.023554801940918, "logits_per_token": -1.1930813789367676, "logits_per_char": -0.14913517236709595, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 597, "native_id": "e75e0c11e2d5a7b634455a1b4b76856c", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 2.465813159942627, "incorrect_loss_raw": 9.478774905204773, "correct_loss_per_char": 0.2739792399936252, "incorrect_loss_per_char": 0.948853035767873, "correct_loss_per_token": 2.465813159942627, "incorrect_loss_per_token": 5.693118214607239, "correct_loss_uncond": -9.559054851531982, "incorrect_loss_uncond": -6.924049735069275}, "model_output": [{"sum_logits": -6.449346542358398, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.607711791992188, "logits_per_token": -3.224673271179199, "logits_per_char": -0.5374455451965332, "num_chars": 12}, {"sum_logits": -2.465813159942627, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": true, "sum_logits_uncond": -12.02486801147461, "logits_per_token": -2.465813159942627, "logits_per_char": -0.2739792399936252, "num_chars": 9}, {"sum_logits": -10.849508285522461, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.081417083740234, "logits_per_token": -5.4247541427612305, "logits_per_char": -0.9041256904602051, "num_chars": 12}, {"sum_logits": -7.629846096038818, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.558703422546387, "logits_per_token": -7.629846096038818, "logits_per_char": -1.2716410160064697, "num_chars": 6}, {"sum_logits": -12.986398696899414, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.363466262817383, "logits_per_token": -6.493199348449707, "logits_per_char": -1.0821998914082844, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 598, "native_id": "3b9ccdcb1c932c46a38e040d3e6c7f5b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.386209964752197, "incorrect_loss_raw": 13.818772077560425, "correct_loss_per_char": 0.42574733098347983, "incorrect_loss_per_char": 1.584519746563175, "correct_loss_per_token": 3.1931049823760986, "incorrect_loss_per_token": 9.360458850860596, "correct_loss_uncond": -10.606181621551514, "incorrect_loss_uncond": -2.349099636077881}, "model_output": [{"sum_logits": -6.386209964752197, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.99239158630371, "logits_per_token": -3.1931049823760986, "logits_per_char": -0.42574733098347983, "num_chars": 15}, {"sum_logits": -20.183706283569336, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.055910110473633, "logits_per_token": -10.091853141784668, "logits_per_char": -1.8348823894153943, "num_chars": 11}, {"sum_logits": -10.666328430175781, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.998748779296875, "logits_per_token": -10.666328430175781, "logits_per_char": -1.523761204310826, "num_chars": 7}, {"sum_logits": -8.942254066467285, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.787047386169434, "logits_per_token": -8.942254066467285, "logits_per_char": -1.7884508132934571, "num_chars": 5}, {"sum_logits": -15.482799530029297, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.82978057861328, "logits_per_token": -7.741399765014648, "logits_per_char": -1.190984579233023, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 599, "native_id": "6a29b657b29e1506284d8328dffbbd21", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.531567931175232, "incorrect_loss_raw": 11.259721755981445, "correct_loss_per_char": 0.3063135862350464, "incorrect_loss_per_char": 1.5926011729550051, "correct_loss_per_token": 1.531567931175232, "incorrect_loss_per_token": 8.471966862678528, "correct_loss_uncond": -12.050739884376526, "incorrect_loss_uncond": -3.872206926345825}, "model_output": [{"sum_logits": -9.593332290649414, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -9.593332290649414, "logits_per_char": -1.918666458129883, "num_chars": 5}, {"sum_logits": -1.531567931175232, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": true, "sum_logits_uncond": -13.582307815551758, "logits_per_token": -1.531567931175232, "logits_per_char": -0.3063135862350464, "num_chars": 5}, {"sum_logits": -11.80832576751709, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -16.53887939453125, "logits_per_token": -5.904162883758545, "logits_per_char": -1.0734841606833718, "num_chars": 11}, {"sum_logits": -10.49371337890625, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -18.097108840942383, "logits_per_token": -5.246856689453125, "logits_per_char": -0.7495509556361607, "num_chars": 14}, {"sum_logits": -13.143515586853027, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.063604354858398, "logits_per_token": -13.143515586853027, "logits_per_char": -2.6287031173706055, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 600, "native_id": "96cb628fb7ed2f53245598f707ed2b80", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.564772605895996, "incorrect_loss_raw": 14.948264241218567, "correct_loss_per_char": 0.6877066005359996, "incorrect_loss_per_char": 1.1093933601128427, "correct_loss_per_token": 3.782386302947998, "incorrect_loss_per_token": 7.191540618737539, "correct_loss_uncond": -12.337359428405762, "incorrect_loss_uncond": -2.9217644929885864}, "model_output": [{"sum_logits": -27.003231048583984, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -26.118022918701172, "logits_per_token": -9.001077016194662, "logits_per_char": -1.3501615524291992, "num_chars": 20}, {"sum_logits": -7.564772605895996, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.902132034301758, "logits_per_token": -3.782386302947998, "logits_per_char": -0.6877066005359996, "num_chars": 11}, {"sum_logits": -7.775593280792236, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.882139205932617, "logits_per_token": -3.887796640396118, "logits_per_char": -0.7775593280792237, "num_chars": 10}, {"sum_logits": -18.273887634277344, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.399694442749023, "logits_per_token": -9.136943817138672, "logits_per_char": -0.9617835596988076, "num_chars": 19}, {"sum_logits": -6.740345001220703, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.0802583694458, "logits_per_token": -6.740345001220703, "logits_per_char": -1.3480690002441407, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 601, "native_id": "bd4e80fa6642a76c064d0bc924411fb0", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.9046711921691895, "incorrect_loss_raw": 7.669898867607117, "correct_loss_per_char": 0.40872259934743244, "incorrect_loss_per_char": 0.9873324635009917, "correct_loss_per_token": 2.4523355960845947, "incorrect_loss_per_token": 6.030224561691284, "correct_loss_uncond": -11.753596782684326, "incorrect_loss_uncond": -6.3678048849105835}, "model_output": [{"sum_logits": -13.11739444732666, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.882814407348633, "logits_per_token": -6.55869722366333, "logits_per_char": -1.6396743059158325, "num_chars": 8}, {"sum_logits": -9.521394729614258, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -9.521394729614258, "logits_per_char": -1.360199247087751, "num_chars": 7}, {"sum_logits": -4.9046711921691895, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.658267974853516, "logits_per_token": -2.4523355960845947, "logits_per_char": -0.40872259934743244, "num_chars": 12}, {"sum_logits": -4.034403324127197, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.241388320922852, "logits_per_token": -4.034403324127197, "logits_per_char": -0.5043004155158997, "num_chars": 8}, {"sum_logits": -4.006402969360352, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.463623046875, "logits_per_token": -4.006402969360352, "logits_per_char": -0.4451558854844835, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 602, "native_id": "05490e6c191fbc3c2fe0033ed0bd8aa0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.152601957321167, "incorrect_loss_raw": 8.041202425956726, "correct_loss_per_char": 0.23917799525790745, "incorrect_loss_per_char": 0.7354917170509458, "correct_loss_per_token": 2.152601957321167, "incorrect_loss_per_token": 4.650023738543192, "correct_loss_uncond": -12.922497987747192, "incorrect_loss_uncond": -7.596046328544617}, "model_output": [{"sum_logits": -10.812517166137695, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.76747703552246, "logits_per_token": -3.604172388712565, "logits_per_char": -0.5406258583068848, "num_chars": 20}, {"sum_logits": -5.358676910400391, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.181896209716797, "logits_per_token": -5.358676910400391, "logits_per_char": -0.7655252729143415, "num_chars": 7}, {"sum_logits": -3.2808756828308105, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.986479759216309, "logits_per_token": -3.2808756828308105, "logits_per_char": -0.36454174253675675, "num_chars": 9}, {"sum_logits": -2.152601957321167, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": true, "sum_logits_uncond": -15.07509994506836, "logits_per_token": -2.152601957321167, "logits_per_char": -0.23917799525790745, "num_chars": 9}, {"sum_logits": -12.712739944458008, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.613142013549805, "logits_per_token": -6.356369972229004, "logits_per_char": -1.2712739944458007, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 603, "native_id": "6abd34442438509b4a00c69d6fd24764", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 11.346384048461914, "incorrect_loss_raw": 12.331393718719482, "correct_loss_per_char": 0.8727987729586087, "incorrect_loss_per_char": 1.2673394987077424, "correct_loss_per_token": 5.673192024230957, "incorrect_loss_per_token": 6.777525266011556, "correct_loss_uncond": -5.787317276000977, "incorrect_loss_uncond": -5.335769891738892}, "model_output": [{"sum_logits": -9.887247085571289, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.373043060302734, "logits_per_token": -4.9436235427856445, "logits_per_char": -0.8988406441428445, "num_chars": 11}, {"sum_logits": -15.737452507019043, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.741750717163086, "logits_per_token": -5.245817502339681, "logits_per_char": -1.3114543755849202, "num_chars": 12}, {"sum_logits": -10.1404447555542, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.372330665588379, "logits_per_token": -10.1404447555542, "logits_per_char": -0.9218586141412909, "num_chars": 11}, {"sum_logits": -13.560430526733398, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.181529998779297, "logits_per_token": -6.780215263366699, "logits_per_char": -1.937204360961914, "num_chars": 7}, {"sum_logits": -11.346384048461914, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.13370132446289, "logits_per_token": -5.673192024230957, "logits_per_char": -0.8727987729586087, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 604, "native_id": "e58eb0ec4197c29e961a7bdd4d67de4e", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.6194210052490234, "incorrect_loss_raw": 8.332726001739502, "correct_loss_per_char": 0.5170601436070034, "incorrect_loss_per_char": 1.078018388603673, "correct_loss_per_token": 3.6194210052490234, "incorrect_loss_per_token": 7.163950443267822, "correct_loss_uncond": -9.379866600036621, "incorrect_loss_uncond": -6.703835725784302}, "model_output": [{"sum_logits": -7.012653350830078, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.6790828704834, "logits_per_token": -2.3375511169433594, "logits_per_char": -0.38959185282389325, "num_chars": 18}, {"sum_logits": -9.326151847839355, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.431900978088379, "logits_per_token": -9.326151847839355, "logits_per_char": -0.8478319861672141, "num_chars": 11}, {"sum_logits": -7.278997421264648, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.47655963897705, "logits_per_token": -7.278997421264648, "logits_per_char": -1.4557994842529296, "num_chars": 5}, {"sum_logits": -3.6194210052490234, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.999287605285645, "logits_per_token": -3.6194210052490234, "logits_per_char": -0.5170601436070034, "num_chars": 7}, {"sum_logits": -9.713101387023926, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.558703422546387, "logits_per_token": -9.713101387023926, "logits_per_char": -1.6188502311706543, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 605, "native_id": "597d2a1c9df7962218d8b807df1f8212", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.060797691345215, "incorrect_loss_raw": 10.593361377716064, "correct_loss_per_char": 2.012159538269043, "incorrect_loss_per_char": 1.8267763078212738, "correct_loss_per_token": 10.060797691345215, "incorrect_loss_per_token": 10.593361377716064, "correct_loss_uncond": -3.3957929611206055, "incorrect_loss_uncond": -2.5193889141082764}, "model_output": [{"sum_logits": -8.932801246643066, "num_tokens": 1, "num_tokens_all": 129, "is_greedy": false, "sum_logits_uncond": -13.026251792907715, "logits_per_token": -8.932801246643066, "logits_per_char": -1.2761144638061523, "num_chars": 7}, {"sum_logits": -9.485520362854004, "num_tokens": 1, "num_tokens_all": 129, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -9.485520362854004, "logits_per_char": -1.8971040725708008, "num_chars": 5}, {"sum_logits": -14.839154243469238, "num_tokens": 1, "num_tokens_all": 129, "is_greedy": false, "sum_logits_uncond": -13.75567626953125, "logits_per_token": -14.839154243469238, "logits_per_char": -1.8548942804336548, "num_chars": 8}, {"sum_logits": -9.11596965789795, "num_tokens": 1, "num_tokens_all": 129, "is_greedy": false, "sum_logits_uncond": -11.840950965881348, "logits_per_token": -9.11596965789795, "logits_per_char": -2.2789924144744873, "num_chars": 4}, {"sum_logits": -10.060797691345215, "num_tokens": 1, "num_tokens_all": 129, "is_greedy": false, "sum_logits_uncond": -13.45659065246582, "logits_per_token": -10.060797691345215, "logits_per_char": -2.012159538269043, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 606, "native_id": "68f6ac445cc008d93f931b999b44b0ba", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.9906558990478516, "incorrect_loss_raw": 11.227112412452698, "correct_loss_per_char": 0.1170974058263442, "incorrect_loss_per_char": 1.4371437397268085, "correct_loss_per_token": 0.9953279495239258, "incorrect_loss_per_token": 6.061141610145569, "correct_loss_uncond": -12.034015655517578, "incorrect_loss_uncond": -5.274425625801086}, "model_output": [{"sum_logits": -10.075489044189453, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.97714614868164, "logits_per_token": -5.037744522094727, "logits_per_char": -1.119498782687717, "num_chars": 9}, {"sum_logits": -3.5806832313537598, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.707180976867676, "logits_per_token": -3.5806832313537598, "logits_per_char": -0.8951708078384399, "num_chars": 4}, {"sum_logits": -18.822967529296875, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.86726951599121, "logits_per_token": -9.411483764648438, "logits_per_char": -2.3528709411621094, "num_chars": 8}, {"sum_logits": -12.429309844970703, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.45455551147461, "logits_per_token": -6.214654922485352, "logits_per_char": -1.381034427218967, "num_chars": 9}, {"sum_logits": -1.9906558990478516, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -14.02467155456543, "logits_per_token": -0.9953279495239258, "logits_per_char": -0.1170974058263442, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 607, "native_id": "aa4c5d2d348796b8d7fa324f27f4c34f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.676085472106934, "incorrect_loss_raw": 10.493339776992798, "correct_loss_per_char": 0.9537264960152763, "incorrect_loss_per_char": 0.9942115873348814, "correct_loss_per_token": 6.676085472106934, "incorrect_loss_per_token": 4.889019171396892, "correct_loss_uncond": -9.32567310333252, "incorrect_loss_uncond": -7.066130638122559}, "model_output": [{"sum_logits": -8.49041748046875, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.148038864135742, "logits_per_token": -2.83013916015625, "logits_per_char": -0.4994363223805147, "num_chars": 17}, {"sum_logits": -13.2152099609375, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -22.43378448486328, "logits_per_token": -4.405069986979167, "logits_per_char": -0.9439435686383929, "num_chars": 14}, {"sum_logits": -15.893728256225586, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.22107219696045, "logits_per_token": -7.946864128112793, "logits_per_char": -1.9867160320281982, "num_chars": 8}, {"sum_logits": -6.676085472106934, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.001758575439453, "logits_per_token": -6.676085472106934, "logits_per_char": -0.9537264960152763, "num_chars": 7}, {"sum_logits": -4.3740034103393555, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.434986114501953, "logits_per_token": -4.3740034103393555, "logits_per_char": -0.5467504262924194, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 608, "native_id": "7400e9c4a2c8e600a0f7e2d162a07837", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.701595306396484, "incorrect_loss_raw": 9.26747715473175, "correct_loss_per_char": 1.3701595306396483, "incorrect_loss_per_char": 1.0822090682658283, "correct_loss_per_token": 6.850797653198242, "incorrect_loss_per_token": 6.800542712211609, "correct_loss_uncond": -4.289722442626953, "incorrect_loss_uncond": -6.781579852104187}, "model_output": [{"sum_logits": -6.9764628410339355, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -14.017937660217285, "logits_per_token": -6.9764628410339355, "logits_per_char": -0.8720578551292419, "num_chars": 8}, {"sum_logits": -10.357970237731934, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -13.586409568786621, "logits_per_token": -10.357970237731934, "logits_per_char": -1.7263283729553223, "num_chars": 6}, {"sum_logits": -11.329174995422363, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -19.933612823486328, "logits_per_token": -5.664587497711182, "logits_per_char": -1.0299249995838513, "num_chars": 11}, {"sum_logits": -13.701595306396484, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -17.991317749023438, "logits_per_token": -6.850797653198242, "logits_per_char": -1.3701595306396483, "num_chars": 10}, {"sum_logits": -8.40630054473877, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -16.658267974853516, "logits_per_token": -4.203150272369385, "logits_per_char": -0.7005250453948975, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 609, "native_id": "fad197409a977126c9587eccd240ceea", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.617619514465332, "incorrect_loss_raw": 15.089720249176025, "correct_loss_per_char": 1.102936585744222, "incorrect_loss_per_char": 1.5269358441943213, "correct_loss_per_token": 3.308809757232666, "incorrect_loss_per_token": 8.467614555358887, "correct_loss_uncond": -6.505017280578613, "incorrect_loss_uncond": -2.6786763668060303}, "model_output": [{"sum_logits": -17.108699798583984, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.041133880615234, "logits_per_token": -8.554349899291992, "logits_per_char": -1.2220499856131417, "num_chars": 14}, {"sum_logits": -12.543006896972656, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.836979866027832, "logits_per_token": -12.543006896972656, "logits_per_char": -1.567875862121582, "num_chars": 8}, {"sum_logits": -6.617619514465332, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.122636795043945, "logits_per_token": -3.308809757232666, "logits_per_char": -1.102936585744222, "num_chars": 6}, {"sum_logits": -22.417591094970703, "num_tokens": 5, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -28.643587112426758, "logits_per_token": -4.48351821899414, "logits_per_char": -1.2454217274983723, "num_chars": 18}, {"sum_logits": -8.289583206176758, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.551885604858398, "logits_per_token": -8.289583206176758, "logits_per_char": -2.0723958015441895, "num_chars": 4}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 610, "native_id": "f09038444aeb1a048f04dedd5b97b769", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.96386194229126, "incorrect_loss_raw": 8.45709490776062, "correct_loss_per_char": 0.4512601765719327, "incorrect_loss_per_char": 0.9798447623913422, "correct_loss_per_token": 2.48193097114563, "incorrect_loss_per_token": 5.080812096595764, "correct_loss_uncond": -15.887448787689209, "incorrect_loss_uncond": -8.680299043655396}, "model_output": [{"sum_logits": -6.932904243469238, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.658267974853516, "logits_per_token": -3.466452121734619, "logits_per_char": -0.5777420202891032, "num_chars": 12}, {"sum_logits": -10.441313743591309, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.109554290771484, "logits_per_token": -5.220656871795654, "logits_per_char": -1.4916162490844727, "num_chars": 7}, {"sum_logits": -9.6360445022583, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.783004760742188, "logits_per_token": -4.81802225112915, "logits_per_char": -0.8760040456598456, "num_chars": 11}, {"sum_logits": -4.96386194229126, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.85131072998047, "logits_per_token": -2.48193097114563, "logits_per_char": -0.4512601765719327, "num_chars": 11}, {"sum_logits": -6.818117141723633, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.998748779296875, "logits_per_token": -6.818117141723633, "logits_per_char": -0.9740167345319476, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 611, "native_id": "0aa23ad1ba9f28bc3e0185237a7ce1cc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.799243927001953, "incorrect_loss_raw": 13.823502898216248, "correct_loss_per_char": 0.7249054908752441, "incorrect_loss_per_char": 1.2447217447417123, "correct_loss_per_token": 5.799243927001953, "incorrect_loss_per_token": 9.440756678581238, "correct_loss_uncond": -7.910104751586914, "incorrect_loss_uncond": -3.283586621284485}, "model_output": [{"sum_logits": -17.095359802246094, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -22.919178009033203, "logits_per_token": -8.547679901123047, "logits_per_char": -1.221097128731864, "num_chars": 14}, {"sum_logits": -14.115316390991211, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.436118125915527, "logits_per_token": -14.115316390991211, "logits_per_char": -1.411531639099121, "num_chars": 10}, {"sum_logits": -5.799243927001953, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.709348678588867, "logits_per_token": -5.799243927001953, "logits_per_char": -0.7249054908752441, "num_chars": 8}, {"sum_logits": -17.966609954833984, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.54778289794922, "logits_per_token": -8.983304977416992, "logits_per_char": -1.122913122177124, "num_chars": 16}, {"sum_logits": -6.116725444793701, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -11.52527904510498, "logits_per_token": -6.116725444793701, "logits_per_char": -1.2233450889587403, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 612, "native_id": "06be29539ad3e1fbd7b53b05243f4bd7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.3878228664398193, "incorrect_loss_raw": 10.319653511047363, "correct_loss_per_char": 0.23130381107330322, "incorrect_loss_per_char": 1.2811731838044667, "correct_loss_per_token": 1.3878228664398193, "incorrect_loss_per_token": 10.319653511047363, "correct_loss_uncond": -11.433758020401001, "incorrect_loss_uncond": -3.967200994491577}, "model_output": [{"sum_logits": -10.046764373779297, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.479204177856445, "logits_per_token": -10.046764373779297, "logits_per_char": -1.255845546722412, "num_chars": 8}, {"sum_logits": -1.3878228664398193, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.82158088684082, "logits_per_token": -1.3878228664398193, "logits_per_char": -0.23130381107330322, "num_chars": 6}, {"sum_logits": -9.959806442260742, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.894200325012207, "logits_per_token": -9.959806442260742, "logits_per_char": -0.8299838701883951, "num_chars": 12}, {"sum_logits": -9.761124610900879, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.188592910766602, "logits_per_token": -9.761124610900879, "logits_per_char": -1.3944463729858398, "num_chars": 7}, {"sum_logits": -11.510918617248535, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.585420608520508, "logits_per_token": -11.510918617248535, "logits_per_char": -1.6444169453212194, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 613, "native_id": "bbe0a1ad733e5699f991ff91b3712a6f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.8010993003845215, "incorrect_loss_raw": 8.38754117488861, "correct_loss_per_char": 0.6001374125480652, "incorrect_loss_per_char": 1.0023776718548367, "correct_loss_per_token": 4.8010993003845215, "incorrect_loss_per_token": 6.0202025175094604, "correct_loss_uncond": -10.687289714813232, "incorrect_loss_uncond": -6.448966383934021}, "model_output": [{"sum_logits": -4.8010993003845215, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.488389015197754, "logits_per_token": -4.8010993003845215, "logits_per_char": -0.6001374125480652, "num_chars": 8}, {"sum_logits": -10.227766990661621, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.184246063232422, "logits_per_token": -10.227766990661621, "logits_per_char": -1.4611095700945174, "num_chars": 7}, {"sum_logits": -8.310003280639648, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.25865936279297, "logits_per_token": -4.155001640319824, "logits_per_char": -0.593571662902832, "num_chars": 14}, {"sum_logits": -4.383688449859619, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.289387702941895, "logits_per_token": -4.383688449859619, "logits_per_char": -0.6262412071228027, "num_chars": 7}, {"sum_logits": -10.628705978393555, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.613737106323242, "logits_per_token": -5.314352989196777, "logits_per_char": -1.3285882472991943, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 614, "native_id": "9e5ce2b7d9eb404cdf8c7317dd0b5a59", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.019359111785889, "incorrect_loss_raw": 10.8584885597229, "correct_loss_per_char": 0.3585256508418492, "incorrect_loss_per_char": 1.0078912708494399, "correct_loss_per_token": 2.5096795558929443, "incorrect_loss_per_token": 5.868456125259399, "correct_loss_uncond": -14.550392627716064, "incorrect_loss_uncond": -6.740180015563965}, "model_output": [{"sum_logits": -10.895240783691406, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -22.578327178955078, "logits_per_token": -2.7238101959228516, "logits_per_char": -0.6809525489807129, "num_chars": 16}, {"sum_logits": -9.56393814086914, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.671432495117188, "logits_per_token": -4.78196907043457, "logits_per_char": -1.0626597934299045, "num_chars": 9}, {"sum_logits": -5.019359111785889, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.569751739501953, "logits_per_token": -2.5096795558929443, "logits_per_char": -0.3585256508418492, "num_chars": 14}, {"sum_logits": -14.013460159301758, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.572669982910156, "logits_per_token": -7.006730079650879, "logits_per_char": -1.1677883466084797, "num_chars": 12}, {"sum_logits": -8.961315155029297, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.572244644165039, "logits_per_token": -8.961315155029297, "logits_per_char": -1.120164394378662, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 615, "native_id": "ffde211723f55e9744f94cbc14488a23", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.403079986572266, "incorrect_loss_raw": 11.175638556480408, "correct_loss_per_char": 0.7718685695103237, "incorrect_loss_per_char": 1.492251608106825, "correct_loss_per_token": 5.403079986572266, "incorrect_loss_per_token": 6.368900656700134, "correct_loss_uncond": -7.903077125549316, "incorrect_loss_uncond": -5.4234312772750854}, "model_output": [{"sum_logits": -14.466349601745605, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.101316452026367, "logits_per_token": -7.233174800872803, "logits_per_char": -2.411058266957601, "num_chars": 6}, {"sum_logits": -14.719706535339355, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.789621353149414, "logits_per_token": -7.359853267669678, "logits_per_char": -1.6355229483710394, "num_chars": 9}, {"sum_logits": -6.248651027679443, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.564719200134277, "logits_per_token": -6.248651027679443, "logits_per_char": -0.8926644325256348, "num_chars": 7}, {"sum_logits": -5.403079986572266, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.306157112121582, "logits_per_token": -5.403079986572266, "logits_per_char": -0.7718685695103237, "num_chars": 7}, {"sum_logits": -9.267847061157227, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.940622329711914, "logits_per_token": -4.633923530578613, "logits_per_char": -1.0297607845730252, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 616, "native_id": "5ff8b0deed53b9ff91d58bd5b6f85bdf", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.355683326721191, "incorrect_loss_raw": 9.117319345474243, "correct_loss_per_char": 0.6355683326721191, "incorrect_loss_per_char": 0.8481302371610215, "correct_loss_per_token": 3.1778416633605957, "incorrect_loss_per_token": 5.46777206659317, "correct_loss_uncond": -13.069193840026855, "incorrect_loss_uncond": -7.868945598602295}, "model_output": [{"sum_logits": -12.67170524597168, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.749786376953125, "logits_per_token": -6.33585262298584, "logits_per_char": -1.0559754371643066, "num_chars": 12}, {"sum_logits": -4.123271465301514, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.955062866210938, "logits_per_token": -2.061635732650757, "logits_per_char": -0.3436059554417928, "num_chars": 12}, {"sum_logits": -7.272899150848389, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.043295860290527, "logits_per_token": -7.272899150848389, "logits_per_char": -1.0389855929783411, "num_chars": 7}, {"sum_logits": -6.355683326721191, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.424877166748047, "logits_per_token": -3.1778416633605957, "logits_per_char": -0.6355683326721191, "num_chars": 10}, {"sum_logits": -12.40140151977539, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.196914672851562, "logits_per_token": -6.200700759887695, "logits_per_char": -0.9539539630596454, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 617, "native_id": "36f1ceeecde7abf99dab635239e12442", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.279750823974609, "incorrect_loss_raw": 12.584192633628845, "correct_loss_per_char": 0.47552786933051217, "incorrect_loss_per_char": 1.3448508580525715, "correct_loss_per_token": 2.1398754119873047, "incorrect_loss_per_token": 6.654962321122487, "correct_loss_uncond": -13.301502227783203, "incorrect_loss_uncond": -7.075353503227234}, "model_output": [{"sum_logits": -4.384341716766357, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.806671142578125, "logits_per_token": -2.1921708583831787, "logits_per_char": -0.48714907964070636, "num_chars": 9}, {"sum_logits": -18.626928329467773, "num_tokens": 3, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -26.31793212890625, "logits_per_token": -6.208976109822591, "logits_per_char": -1.5522440274556477, "num_chars": 12}, {"sum_logits": -4.279750823974609, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.581253051757812, "logits_per_token": -2.1398754119873047, "logits_per_char": -0.47552786933051217, "num_chars": 9}, {"sum_logits": -9.11190414428711, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.237231254577637, "logits_per_token": -9.11190414428711, "logits_per_char": -1.5186506907145183, "num_chars": 6}, {"sum_logits": -18.21359634399414, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -16.276350021362305, "logits_per_token": -9.10679817199707, "logits_per_char": -1.821359634399414, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 618, "native_id": "e3c9e83c0c62d842de2dfe229f5e6d41", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 9.255464553833008, "incorrect_loss_raw": 9.841992378234863, "correct_loss_per_char": 0.7119588118333083, "incorrect_loss_per_char": 1.3847823401292165, "correct_loss_per_token": 4.627732276916504, "incorrect_loss_per_token": 7.4255759716033936, "correct_loss_uncond": -7.2466888427734375, "incorrect_loss_uncond": -6.104369401931763}, "model_output": [{"sum_logits": -9.884147644042969, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.102972984313965, "logits_per_token": -9.884147644042969, "logits_per_char": -1.6473579406738281, "num_chars": 6}, {"sum_logits": -10.662261009216309, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.98219871520996, "logits_per_token": -5.331130504608154, "logits_per_char": -1.3327826261520386, "num_chars": 8}, {"sum_logits": -10.152490615844727, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.378252029418945, "logits_per_token": -10.152490615844727, "logits_per_char": -1.6920817693074544, "num_chars": 6}, {"sum_logits": -8.66907024383545, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.322023391723633, "logits_per_token": -4.334535121917725, "logits_per_char": -0.8669070243835449, "num_chars": 10}, {"sum_logits": -9.255464553833008, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.502153396606445, "logits_per_token": -4.627732276916504, "logits_per_char": -0.7119588118333083, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 619, "native_id": "c0e4d0118c9cdfe2edc49ef954572b31", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.469557762145996, "incorrect_loss_raw": 13.004892110824585, "correct_loss_per_char": 1.117389440536499, "incorrect_loss_per_char": 1.4088830731131814, "correct_loss_per_token": 4.469557762145996, "incorrect_loss_per_token": 8.427686929702759, "correct_loss_uncond": -9.423675537109375, "incorrect_loss_uncond": -3.255462646484375}, "model_output": [{"sum_logits": -18.21865463256836, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -20.387222290039062, "logits_per_token": -9.10932731628418, "logits_per_char": -1.6562413302334873, "num_chars": 11}, {"sum_logits": -18.39898681640625, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -17.77572250366211, "logits_per_token": -9.199493408203125, "logits_per_char": -1.1499366760253906, "num_chars": 16}, {"sum_logits": -4.469557762145996, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -13.893233299255371, "logits_per_token": -4.469557762145996, "logits_per_char": -1.117389440536499, "num_chars": 4}, {"sum_logits": -7.870993614196777, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -13.766522407531738, "logits_per_token": -7.870993614196777, "logits_per_char": -1.5741987228393555, "num_chars": 5}, {"sum_logits": -7.530933380126953, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -13.11195182800293, "logits_per_token": -7.530933380126953, "logits_per_char": -1.2551555633544922, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 620, "native_id": "4423c006f2a43f222d4c4e97360c25d3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.2595930099487305, "incorrect_loss_raw": 22.019999980926514, "correct_loss_per_char": 0.5584302315345178, "incorrect_loss_per_char": 1.2033984943033165, "correct_loss_per_token": 3.6297965049743652, "incorrect_loss_per_token": 8.015120363235473, "correct_loss_uncond": -9.029829978942871, "incorrect_loss_uncond": 0.055397987365722656}, "model_output": [{"sum_logits": -7.2595930099487305, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.2894229888916, "logits_per_token": -3.6297965049743652, "logits_per_char": -0.5584302315345178, "num_chars": 13}, {"sum_logits": -15.199979782104492, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.910371780395508, "logits_per_token": -5.066659927368164, "logits_per_char": -1.013331985473633, "num_chars": 15}, {"sum_logits": -31.487295150756836, "num_tokens": 5, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -29.167850494384766, "logits_per_token": -6.2974590301513675, "logits_per_char": -1.2110498134906476, "num_chars": 26}, {"sum_logits": -21.71360969543457, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.869884490966797, "logits_per_token": -10.856804847717285, "logits_per_char": -1.2772711585549748, "num_chars": 17}, {"sum_logits": -19.679115295410156, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.910301208496094, "logits_per_token": -9.839557647705078, "logits_per_char": -1.3119410196940104, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 621, "native_id": "9382bc51ba092f55a494eff8615899de", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.723757266998291, "incorrect_loss_raw": 11.678123235702515, "correct_loss_per_char": 0.9654696583747864, "incorrect_loss_per_char": 1.4578606365552913, "correct_loss_per_token": 3.8618786334991455, "incorrect_loss_per_token": 6.480199813842773, "correct_loss_uncond": -7.651648044586182, "incorrect_loss_uncond": -4.733942270278931}, "model_output": [{"sum_logits": -9.333686828613281, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.916854858398438, "logits_per_token": -9.333686828613281, "logits_per_char": -1.5556144714355469, "num_chars": 6}, {"sum_logits": -10.686010360717773, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.79250717163086, "logits_per_token": -5.343005180358887, "logits_per_char": -1.187334484524197, "num_chars": 9}, {"sum_logits": -7.723757266998291, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.375405311584473, "logits_per_token": -3.8618786334991455, "logits_per_char": -0.9654696583747864, "num_chars": 8}, {"sum_logits": -12.613743782043457, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.514463424682617, "logits_per_token": -4.204581260681152, "logits_per_char": -0.7419849283554975, "num_chars": 17}, {"sum_logits": -14.079051971435547, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.424436569213867, "logits_per_token": -7.039525985717773, "logits_per_char": -2.3465086619059243, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 622, "native_id": "dec1c42628a7448aa364cdada6e82f98", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.93143081665039, "incorrect_loss_raw": 11.37308931350708, "correct_loss_per_char": 1.293143081665039, "incorrect_loss_per_char": 1.1133529515493483, "correct_loss_per_token": 6.465715408325195, "incorrect_loss_per_token": 8.625805854797363, "correct_loss_uncond": -3.681711196899414, "incorrect_loss_uncond": -4.051577568054199}, "model_output": [{"sum_logits": -12.93143081665039, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -16.613142013549805, "logits_per_token": -6.465715408325195, "logits_per_char": -1.293143081665039, "num_chars": 10}, {"sum_logits": -11.606674194335938, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -16.69085121154785, "logits_per_token": -5.803337097167969, "logits_per_char": -0.9672228495279948, "num_chars": 12}, {"sum_logits": -7.756013870239258, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.412413597106934, "logits_per_token": -7.756013870239258, "logits_per_char": -0.7756013870239258, "num_chars": 10}, {"sum_logits": -15.758075714111328, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.202067375183105, "logits_per_token": -15.758075714111328, "logits_per_char": -1.969759464263916, "num_chars": 8}, {"sum_logits": -10.371593475341797, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -16.393335342407227, "logits_per_token": -5.185796737670898, "logits_per_char": -0.740828105381557, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 623, "native_id": "07ea8ff6ee916f2bf9aceab1e19ff99a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.243212699890137, "incorrect_loss_raw": 10.189642190933228, "correct_loss_per_char": 0.42432126998901365, "incorrect_loss_per_char": 1.077463063928816, "correct_loss_per_token": 2.1216063499450684, "incorrect_loss_per_token": 7.470972180366516, "correct_loss_uncond": -13.20361042022705, "incorrect_loss_uncond": -5.24117374420166}, "model_output": [{"sum_logits": -7.879593849182129, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.772855758666992, "logits_per_token": -3.9397969245910645, "logits_per_char": -0.6566328207651774, "num_chars": 12}, {"sum_logits": -13.869766235351562, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.671432495117188, "logits_per_token": -6.934883117675781, "logits_per_char": -1.5410851372612848, "num_chars": 9}, {"sum_logits": -5.243914604187012, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.339823722839355, "logits_per_token": -5.243914604187012, "logits_per_char": -0.5826571782430013, "num_chars": 9}, {"sum_logits": -4.243212699890137, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.446823120117188, "logits_per_token": -2.1216063499450684, "logits_per_char": -0.42432126998901365, "num_chars": 10}, {"sum_logits": -13.765294075012207, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.939151763916016, "logits_per_token": -13.765294075012207, "logits_per_char": -1.5294771194458008, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 624, "native_id": "a328285c6212c899e335c45db3c49ffd", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.346926689147949, "incorrect_loss_raw": 10.445533990859985, "correct_loss_per_char": 0.5433658361434937, "incorrect_loss_per_char": 1.2809815110582294, "correct_loss_per_token": 4.346926689147949, "incorrect_loss_per_token": 6.187283356984456, "correct_loss_uncond": -9.960835456848145, "incorrect_loss_uncond": -5.127570152282715}, "model_output": [{"sum_logits": -14.576433181762695, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.079051971435547, "logits_per_token": -7.288216590881348, "logits_per_char": -1.4576433181762696, "num_chars": 10}, {"sum_logits": -14.617178916931152, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.509748458862305, "logits_per_token": -4.872392972310384, "logits_per_char": -1.3288344469937412, "num_chars": 11}, {"sum_logits": -7.180829048156738, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.22335147857666, "logits_per_token": -7.180829048156738, "logits_per_char": -1.4361658096313477, "num_chars": 5}, {"sum_logits": -4.346926689147949, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.307762145996094, "logits_per_token": -4.346926689147949, "logits_per_char": -0.5433658361434937, "num_chars": 8}, {"sum_logits": -5.4076948165893555, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.480264663696289, "logits_per_token": -5.4076948165893555, "logits_per_char": -0.9012824694315592, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 625, "native_id": "e248968fec422e1fab0f0561fedff76e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.714621543884277, "incorrect_loss_raw": 7.1313316822052, "correct_loss_per_char": 0.5446638464927673, "incorrect_loss_per_char": 1.0572766842522145, "correct_loss_per_token": 4.357310771942139, "incorrect_loss_per_token": 6.096087694168091, "correct_loss_uncond": -9.3352689743042, "incorrect_loss_uncond": -8.534260272979736}, "model_output": [{"sum_logits": -2.753568172454834, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.726911544799805, "logits_per_token": -1.376784086227417, "logits_per_char": -0.22946401437123617, "num_chars": 12}, {"sum_logits": -10.590154647827148, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.8103609085083, "logits_per_token": -10.590154647827148, "logits_per_char": -2.1180309295654296, "num_chars": 5}, {"sum_logits": -5.528383731842041, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.100921630859375, "logits_per_token": -2.7641918659210205, "logits_per_char": -0.5025803392583673, "num_chars": 11}, {"sum_logits": -8.714621543884277, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.049890518188477, "logits_per_token": -4.357310771942139, "logits_per_char": -0.5446638464927673, "num_chars": 16}, {"sum_logits": -9.653220176696777, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.024173736572266, "logits_per_token": -9.653220176696777, "logits_per_char": -1.3790314538138253, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 626, "native_id": "2067720531fc03c017af941cec2f6f40", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.63862419128418, "incorrect_loss_raw": 10.537214994430542, "correct_loss_per_char": 0.3865520159403483, "incorrect_loss_per_char": 1.2308974769380359, "correct_loss_per_token": 2.31931209564209, "incorrect_loss_per_token": 7.49508261680603, "correct_loss_uncond": -11.026894569396973, "incorrect_loss_uncond": -5.2031636238098145}, "model_output": [{"sum_logits": -24.337059020996094, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -22.52896499633789, "logits_per_token": -12.168529510498047, "logits_per_char": -2.4337059020996095, "num_chars": 10}, {"sum_logits": -6.885618209838867, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.41093635559082, "logits_per_token": -6.885618209838867, "logits_per_char": -0.9836597442626953, "num_chars": 7}, {"sum_logits": -4.63862419128418, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.665518760681152, "logits_per_token": -2.31931209564209, "logits_per_char": -0.3865520159403483, "num_chars": 12}, {"sum_logits": -5.259671211242676, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.235330581665039, "logits_per_token": -5.259671211242676, "logits_per_char": -0.8766118685404459, "num_chars": 6}, {"sum_logits": -5.666511535644531, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.786282539367676, "logits_per_token": -5.666511535644531, "logits_per_char": -0.6296123928493924, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 627, "native_id": "70d3ebc00b165d9d08f9491a1dd85034", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.64860725402832, "incorrect_loss_raw": 7.730119705200195, "correct_loss_per_char": 0.42260065945712005, "incorrect_loss_per_char": 0.689787507236452, "correct_loss_per_token": 2.32430362701416, "incorrect_loss_per_token": 4.528493881225586, "correct_loss_uncond": -10.704288482666016, "incorrect_loss_uncond": -9.421733856201172}, "model_output": [{"sum_logits": -10.050247192382812, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.474275588989258, "logits_per_token": -5.025123596191406, "logits_per_char": -0.5289603785464638, "num_chars": 19}, {"sum_logits": -4.64860725402832, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.352895736694336, "logits_per_token": -2.32430362701416, "logits_per_char": -0.42260065945712005, "num_chars": 11}, {"sum_logits": -8.189484596252441, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.992185592651367, "logits_per_token": -4.094742298126221, "logits_per_char": -0.8189484596252441, "num_chars": 10}, {"sum_logits": -7.373274803161621, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.96158218383789, "logits_per_token": -3.6866374015808105, "logits_per_char": -0.5266624859401158, "num_chars": 14}, {"sum_logits": -5.307472229003906, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.179370880126953, "logits_per_token": -5.307472229003906, "logits_per_char": -0.8845787048339844, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 628, "native_id": "41bab71fea3fa04e5a4e10a2f86996df", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.288146495819092, "incorrect_loss_raw": 9.254493832588196, "correct_loss_per_char": 1.041163785117013, "incorrect_loss_per_char": 1.1581153461893836, "correct_loss_per_token": 7.288146495819092, "incorrect_loss_per_token": 8.260655999183655, "correct_loss_uncond": -5.274842739105225, "incorrect_loss_uncond": -6.728273034095764}, "model_output": [{"sum_logits": -13.168354034423828, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -14.615224838256836, "logits_per_token": -13.168354034423828, "logits_per_char": -1.8811934334891183, "num_chars": 7}, {"sum_logits": -7.3695244789123535, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -14.836979866027832, "logits_per_token": -7.3695244789123535, "logits_per_char": -0.9211905598640442, "num_chars": 8}, {"sum_logits": -7.950702667236328, "num_tokens": 2, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -18.424861907958984, "logits_per_token": -3.975351333618164, "logits_per_char": -0.6115925128643329, "num_chars": 13}, {"sum_logits": -8.529394149780273, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -16.054000854492188, "logits_per_token": -8.529394149780273, "logits_per_char": -1.218484878540039, "num_chars": 7}, {"sum_logits": -7.288146495819092, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -7.288146495819092, "logits_per_char": -1.041163785117013, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 629, "native_id": "e18dd9ffc7b7934c39f2b5e9dee5a8c2", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.705092906951904, "incorrect_loss_raw": 18.26084876060486, "correct_loss_per_char": 0.6705092906951904, "incorrect_loss_per_char": 1.8260848760604858, "correct_loss_per_token": 3.352546453475952, "incorrect_loss_per_token": 10.807288408279419, "correct_loss_uncond": -12.73310136795044, "incorrect_loss_uncond": 0.976921558380127}, "model_output": [{"sum_logits": -17.10439682006836, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.494699478149414, "logits_per_token": -8.55219841003418, "logits_per_char": -1.710439682006836, "num_chars": 10}, {"sum_logits": -19.710342407226562, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.68854331970215, "logits_per_token": -9.855171203613281, "logits_per_char": -1.9710342407226562, "num_chars": 10}, {"sum_logits": -13.414912223815918, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.455964088439941, "logits_per_token": -13.414912223815918, "logits_per_char": -1.3414912223815918, "num_chars": 10}, {"sum_logits": -22.813743591308594, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.496501922607422, "logits_per_token": -11.406871795654297, "logits_per_char": -2.2813743591308593, "num_chars": 10}, {"sum_logits": -6.705092906951904, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.438194274902344, "logits_per_token": -3.352546453475952, "logits_per_char": -0.6705092906951904, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 630, "native_id": "449de58e919975867255218484a9fc89", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 2.8526930809020996, "incorrect_loss_raw": 11.12612509727478, "correct_loss_per_char": 0.2037637914930071, "incorrect_loss_per_char": 1.3379163344701133, "correct_loss_per_token": 2.8526930809020996, "incorrect_loss_per_token": 8.422845005989075, "correct_loss_uncond": -10.788835048675537, "incorrect_loss_uncond": -5.088695526123047}, "model_output": [{"sum_logits": -2.8526930809020996, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -13.641528129577637, "logits_per_token": -2.8526930809020996, "logits_per_char": -0.2037637914930071, "num_chars": 14}, {"sum_logits": -9.08203125, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -20.24420738220215, "logits_per_token": -4.541015625, "logits_per_char": -0.908203125, "num_chars": 10}, {"sum_logits": -9.360030174255371, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -13.104439735412598, "logits_per_token": -9.360030174255371, "logits_per_char": -0.9360030174255372, "num_chars": 10}, {"sum_logits": -12.544209480285645, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -18.012283325195312, "logits_per_token": -6.272104740142822, "logits_per_char": -1.2544209480285644, "num_chars": 10}, {"sum_logits": -13.518229484558105, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -13.49835205078125, "logits_per_token": -13.518229484558105, "logits_per_char": -2.253038247426351, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 631, "native_id": "9698232e3599157431c9dc8f2fe179cd", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.405623435974121, "incorrect_loss_raw": 9.819844484329224, "correct_loss_per_char": 0.3003124131096734, "incorrect_loss_per_char": 1.6920074780782064, "correct_loss_per_token": 2.7028117179870605, "incorrect_loss_per_token": 7.743803024291992, "correct_loss_uncond": -14.582528114318848, "incorrect_loss_uncond": -4.63714599609375}, "model_output": [{"sum_logits": -7.655932426452637, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.243598937988281, "logits_per_token": -7.655932426452637, "logits_per_char": -1.9139831066131592, "num_chars": 4}, {"sum_logits": -5.405623435974121, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.98815155029297, "logits_per_token": -2.7028117179870605, "logits_per_char": -0.3003124131096734, "num_chars": 18}, {"sum_logits": -16.60833168029785, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.654415130615234, "logits_per_token": -8.304165840148926, "logits_per_char": -2.0760414600372314, "num_chars": 8}, {"sum_logits": -8.264591217041016, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.138700485229492, "logits_per_token": -8.264591217041016, "logits_per_char": -1.652918243408203, "num_chars": 5}, {"sum_logits": -6.750522613525391, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.791247367858887, "logits_per_token": -6.750522613525391, "logits_per_char": -1.1250871022542317, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 632, "native_id": "0b5d0c3bafbe06dd5334c20cd8ea7fe2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.6261515617370605, "incorrect_loss_raw": 10.252963781356812, "correct_loss_per_char": 0.3681195312076145, "incorrect_loss_per_char": 1.156939885195564, "correct_loss_per_token": 3.3130757808685303, "incorrect_loss_per_token": 9.07475221157074, "correct_loss_uncond": -11.148182392120361, "incorrect_loss_uncond": -4.194650173187256}, "model_output": [{"sum_logits": -13.648488998413086, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.985567092895508, "logits_per_token": -13.648488998413086, "logits_per_char": -1.9497841426304408, "num_chars": 7}, {"sum_logits": -7.694295883178711, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.531683921813965, "logits_per_token": -7.694295883178711, "logits_per_char": -1.0991851261683874, "num_chars": 7}, {"sum_logits": -9.425692558288574, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.538697242736816, "logits_per_token": -4.712846279144287, "logits_per_char": -0.5544525034287396, "num_chars": 17}, {"sum_logits": -6.6261515617370605, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.774333953857422, "logits_per_token": -3.3130757808685303, "logits_per_char": -0.3681195312076145, "num_chars": 18}, {"sum_logits": -10.243377685546875, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -10.243377685546875, "logits_per_char": -1.0243377685546875, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 633, "native_id": "7fe53bf68ec57a52a508611acf5b279e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.774078369140625, "incorrect_loss_raw": 12.181542873382568, "correct_loss_per_char": 1.0552913120814733, "incorrect_loss_per_char": 1.0227034148715792, "correct_loss_per_token": 7.3870391845703125, "incorrect_loss_per_token": 5.151576399803162, "correct_loss_uncond": -6.8840179443359375, "incorrect_loss_uncond": -6.644384384155273}, "model_output": [{"sum_logits": -12.846738815307617, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -18.182462692260742, "logits_per_token": -6.423369407653809, "logits_per_char": -0.9882106781005859, "num_chars": 13}, {"sum_logits": -9.211630821228027, "num_tokens": 3, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -18.28645133972168, "logits_per_token": -3.0705436070760093, "logits_per_char": -0.6579736300877163, "num_chars": 14}, {"sum_logits": -13.329050064086914, "num_tokens": 3, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -17.585811614990234, "logits_per_token": -4.443016688028972, "logits_per_char": -1.110754172007243, "num_chars": 12}, {"sum_logits": -13.338751792907715, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -21.24898338317871, "logits_per_token": -6.669375896453857, "logits_per_char": -1.3338751792907715, "num_chars": 10}, {"sum_logits": -14.774078369140625, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -21.658096313476562, "logits_per_token": -7.3870391845703125, "logits_per_char": -1.0552913120814733, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 634, "native_id": "68c41ec8415eab50620eb9ecf6f35a6a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.818113327026367, "incorrect_loss_raw": 10.440004348754883, "correct_loss_per_char": 1.9696855545043945, "incorrect_loss_per_char": 0.9540953302716875, "correct_loss_per_token": 11.818113327026367, "incorrect_loss_per_token": 4.9584706624348955, "correct_loss_uncond": -2.085219383239746, "incorrect_loss_uncond": -7.230620384216309}, "model_output": [{"sum_logits": -16.356544494628906, "num_tokens": 4, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.308395385742188, "logits_per_token": -4.089136123657227, "logits_per_char": -1.4869585904208096, "num_chars": 11}, {"sum_logits": -11.818113327026367, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.903332710266113, "logits_per_token": -11.818113327026367, "logits_per_char": -1.9696855545043945, "num_chars": 6}, {"sum_logits": -5.692193031311035, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.442961692810059, "logits_per_token": -5.692193031311035, "logits_per_char": -0.8131704330444336, "num_chars": 7}, {"sum_logits": -5.2231903076171875, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.202109336853027, "logits_per_token": -5.2231903076171875, "logits_per_char": -0.4017838698167067, "num_chars": 13}, {"sum_logits": -14.488089561462402, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -22.729032516479492, "logits_per_token": -4.829363187154134, "logits_per_char": -1.1144684278048003, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 635, "native_id": "6c4b2c93a4bdafb6cbf2b2ef2439b06f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.653451919555664, "incorrect_loss_raw": 6.512903392314911, "correct_loss_per_char": 0.5544543266296387, "incorrect_loss_per_char": 0.6659192478329933, "correct_loss_per_token": 3.326725959777832, "incorrect_loss_per_token": 5.812167823314667, "correct_loss_uncond": -9.284088134765625, "incorrect_loss_uncond": -7.884080708026886}, "model_output": [{"sum_logits": -11.8499174118042, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.804515838623047, "logits_per_token": -11.8499174118042, "logits_per_char": -1.0772652192549272, "num_chars": 11}, {"sum_logits": -6.367717742919922, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.864840507507324, "logits_per_token": -6.367717742919922, "logits_per_char": -0.7075241936577691, "num_chars": 9}, {"sum_logits": -6.653451919555664, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.937540054321289, "logits_per_token": -3.326725959777832, "logits_per_char": -0.5544543266296387, "num_chars": 12}, {"sum_logits": -5.605884552001953, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.308795928955078, "logits_per_token": -2.8029422760009766, "logits_per_char": -0.5605884552001953, "num_chars": 10}, {"sum_logits": -2.2280938625335693, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": true, "sum_logits_uncond": -13.609784126281738, "logits_per_token": -2.2280938625335693, "logits_per_char": -0.3182991232190813, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 636, "native_id": "51e2da7396ab7045533e885dbb98a424", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.009660243988037, "incorrect_loss_raw": 9.956640064716339, "correct_loss_per_char": 0.4009660243988037, "incorrect_loss_per_char": 0.8850650857795368, "correct_loss_per_token": 2.0048301219940186, "incorrect_loss_per_token": 6.9606680274009705, "correct_loss_uncond": -12.037189960479736, "incorrect_loss_uncond": -5.584412038326263}, "model_output": [{"sum_logits": -11.007210731506348, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.166208267211914, "logits_per_token": -5.503605365753174, "logits_per_char": -1.0006555210460315, "num_chars": 11}, {"sum_logits": -4.009660243988037, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.046850204467773, "logits_per_token": -2.0048301219940186, "logits_per_char": -0.4009660243988037, "num_chars": 10}, {"sum_logits": -13.47040843963623, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.666552543640137, "logits_per_token": -13.47040843963623, "logits_per_char": -1.1225340366363525, "num_chars": 12}, {"sum_logits": -2.3883755207061768, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": true, "sum_logits_uncond": -14.065229415893555, "logits_per_token": -2.3883755207061768, "logits_per_char": -0.2388375520706177, "num_chars": 10}, {"sum_logits": -12.960565567016602, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.266218185424805, "logits_per_token": -6.480282783508301, "logits_per_char": -1.1782332333651455, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 637, "native_id": "3f6157968fcf50d257ec3d8c729b7443", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.471772193908691, "incorrect_loss_raw": 9.304112672805786, "correct_loss_per_char": 1.0524191326565213, "incorrect_loss_per_char": 1.17072395934343, "correct_loss_per_token": 9.471772193908691, "incorrect_loss_per_token": 7.196782509485881, "correct_loss_uncond": -3.8124561309814453, "incorrect_loss_uncond": -5.378025054931641}, "model_output": [{"sum_logits": -9.471772193908691, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.284228324890137, "logits_per_token": -9.471772193908691, "logits_per_char": -1.0524191326565213, "num_chars": 9}, {"sum_logits": -11.443906784057617, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.53042984008789, "logits_per_token": -11.443906784057617, "logits_per_char": -1.271545198228624, "num_chars": 9}, {"sum_logits": -6.761804580688477, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.669677734375, "logits_per_token": -6.761804580688477, "logits_per_char": -1.3523609161376953, "num_chars": 5}, {"sum_logits": -12.643980979919434, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.796329498291016, "logits_per_token": -4.2146603266398115, "logits_per_char": -1.1494528163563122, "num_chars": 11}, {"sum_logits": -6.366758346557617, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.7321138381958, "logits_per_token": -6.366758346557617, "logits_per_char": -0.9095369066510882, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 638, "native_id": "4768aa28fa14569d830f8947565296c1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 3.481844425201416, "incorrect_loss_raw": 8.515768766403198, "correct_loss_per_char": 0.435230553150177, "incorrect_loss_per_char": 1.1181633869515815, "correct_loss_per_token": 3.481844425201416, "incorrect_loss_per_token": 8.515768766403198, "correct_loss_uncond": -9.359814167022705, "incorrect_loss_uncond": -5.705806255340576}, "model_output": [{"sum_logits": -5.952542304992676, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.912116050720215, "logits_per_token": -5.952542304992676, "logits_per_char": -1.1905084609985352, "num_chars": 5}, {"sum_logits": -10.394542694091797, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.443976402282715, "logits_per_token": -10.394542694091797, "logits_per_char": -1.4849346705845423, "num_chars": 7}, {"sum_logits": -12.707427024841309, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.991591453552246, "logits_per_token": -12.707427024841309, "logits_per_char": -1.4119363360934787, "num_chars": 9}, {"sum_logits": -3.481844425201416, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.841658592224121, "logits_per_token": -3.481844425201416, "logits_per_char": -0.435230553150177, "num_chars": 8}, {"sum_logits": -5.008563041687012, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.538616180419922, "logits_per_token": -5.008563041687012, "logits_per_char": -0.38527408012977016, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 639, "native_id": "5516b1c93f94aaa0bf9a4c7b124788d4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.192253112792969, "incorrect_loss_raw": 16.362217664718628, "correct_loss_per_char": 1.0192253112792968, "incorrect_loss_per_char": 1.3749602677938821, "correct_loss_per_token": 5.096126556396484, "incorrect_loss_per_token": 7.301135420799255, "correct_loss_uncond": -10.8565673828125, "incorrect_loss_uncond": -2.0977249145507812}, "model_output": [{"sum_logits": -10.192253112792969, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.04882049560547, "logits_per_token": -5.096126556396484, "logits_per_char": -1.0192253112792968, "num_chars": 10}, {"sum_logits": -15.013245582580566, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.888322830200195, "logits_per_token": -7.506622791290283, "logits_per_char": -1.1548650448138897, "num_chars": 13}, {"sum_logits": -10.469375610351562, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.882476806640625, "logits_per_token": -5.234687805175781, "logits_per_char": -0.9517614191228693, "num_chars": 11}, {"sum_logits": -18.846887588500977, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.598116874694824, "logits_per_token": -9.423443794250488, "logits_per_char": -1.8846887588500976, "num_chars": 10}, {"sum_logits": -21.119361877441406, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.470853805541992, "logits_per_token": -7.039787292480469, "logits_per_char": -1.5085258483886719, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 640, "native_id": "96ea2c3174229c4a6a0e2ffaed2df378", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.48939037322998, "incorrect_loss_raw": 11.625851392745972, "correct_loss_per_char": 2.897878074645996, "incorrect_loss_per_char": 1.6018344665330553, "correct_loss_per_token": 7.24469518661499, "incorrect_loss_per_token": 7.75308624903361, "correct_loss_uncond": -0.20114994049072266, "incorrect_loss_uncond": -2.241896867752075}, "model_output": [{"sum_logits": -11.73838996887207, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.22107219696045, "logits_per_token": -5.869194984436035, "logits_per_char": -1.4672987461090088, "num_chars": 8}, {"sum_logits": -7.556366920471191, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.506343841552734, "logits_per_token": -7.556366920471191, "logits_per_char": -1.5112733840942383, "num_chars": 5}, {"sum_logits": -12.775850296020508, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.858729362487793, "logits_per_token": -12.775850296020508, "logits_per_char": -1.8251214708600725, "num_chars": 7}, {"sum_logits": -14.432798385620117, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.884847640991211, "logits_per_token": -4.810932795206706, "logits_per_char": -1.603644265068902, "num_chars": 9}, {"sum_logits": -14.48939037322998, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.690540313720703, "logits_per_token": -7.24469518661499, "logits_per_char": -2.897878074645996, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 641, "native_id": "7905b9f4ba503b0ce13b576808e99c42", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.238919258117676, "incorrect_loss_raw": 9.90717327594757, "correct_loss_per_char": 0.6337630198552058, "incorrect_loss_per_char": 1.06164923338663, "correct_loss_per_token": 2.7463064193725586, "incorrect_loss_per_token": 5.3691949645678205, "correct_loss_uncond": -12.800292015075684, "incorrect_loss_uncond": -8.313881278038025}, "model_output": [{"sum_logits": -8.238919258117676, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.03921127319336, "logits_per_token": -2.7463064193725586, "logits_per_char": -0.6337630198552058, "num_chars": 13}, {"sum_logits": -13.99754524230957, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -23.599605560302734, "logits_per_token": -4.66584841410319, "logits_per_char": -0.9998246601649693, "num_chars": 14}, {"sum_logits": -13.584883689880371, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.204669952392578, "logits_per_token": -6.7924418449401855, "logits_per_char": -1.509431521097819, "num_chars": 9}, {"sum_logits": -4.055549144744873, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.30141258239746, "logits_per_token": -2.0277745723724365, "logits_per_char": -0.4055549144744873, "num_chars": 10}, {"sum_logits": -7.990715026855469, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.77853012084961, "logits_per_token": -7.990715026855469, "logits_per_char": -1.3317858378092449, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 642, "native_id": "e0a7d1df3ce14b27888e785e6636d5f0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.793929100036621, "incorrect_loss_raw": 8.703724026679993, "correct_loss_per_char": 0.5995330076951247, "incorrect_loss_per_char": 0.913002828189305, "correct_loss_per_token": 3.8969645500183105, "incorrect_loss_per_token": 5.848138292630513, "correct_loss_uncond": -13.058613777160645, "incorrect_loss_uncond": -8.159415125846863}, "model_output": [{"sum_logits": -9.041385650634766, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.37601089477539, "logits_per_token": -4.520692825317383, "logits_per_char": -0.602759043375651, "num_chars": 15}, {"sum_logits": -9.238771438598633, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.935400009155273, "logits_per_token": -9.238771438598633, "logits_per_char": -1.319824491228376, "num_chars": 7}, {"sum_logits": -7.793929100036621, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.852542877197266, "logits_per_token": -3.8969645500183105, "logits_per_char": -0.5995330076951247, "num_chars": 13}, {"sum_logits": -6.1822638511657715, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.21864128112793, "logits_per_token": -6.1822638511657715, "logits_per_char": -1.2364527702331543, "num_chars": 5}, {"sum_logits": -10.3524751663208, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.922504425048828, "logits_per_token": -3.450825055440267, "logits_per_char": -0.4929750079200381, "num_chars": 21}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 643, "native_id": "3eb397b96b6c3a245c81ab30205943f1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.257715702056885, "incorrect_loss_raw": 10.13598620891571, "correct_loss_per_char": 0.35480964183807373, "incorrect_loss_per_char": 1.6143130318986045, "correct_loss_per_token": 2.1288578510284424, "incorrect_loss_per_token": 8.931532263755798, "correct_loss_uncond": -12.893203258514404, "incorrect_loss_uncond": -4.720128178596497}, "model_output": [{"sum_logits": -13.321372985839844, "num_tokens": 1, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -15.676286697387695, "logits_per_token": -13.321372985839844, "logits_per_char": -1.480152553982205, "num_chars": 9}, {"sum_logits": -6.232197284698486, "num_tokens": 1, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -13.84918212890625, "logits_per_token": -6.232197284698486, "logits_per_char": -0.7790246605873108, "num_chars": 8}, {"sum_logits": -4.257715702056885, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -17.15091896057129, "logits_per_token": -2.1288578510284424, "logits_per_char": -0.35480964183807373, "num_chars": 12}, {"sum_logits": -9.635631561279297, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -16.895427703857422, "logits_per_token": -4.817815780639648, "logits_per_char": -1.9271263122558593, "num_chars": 5}, {"sum_logits": -11.354743003845215, "num_tokens": 1, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -13.003561019897461, "logits_per_token": -11.354743003845215, "logits_per_char": -2.270948600769043, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 644, "native_id": "536c9af0fae0aa75b32874dfcac66353", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.114603519439697, "incorrect_loss_raw": 12.988011598587036, "correct_loss_per_char": 0.4703541168799767, "incorrect_loss_per_char": 1.250827670097351, "correct_loss_per_token": 3.0573017597198486, "incorrect_loss_per_token": 7.791668653488159, "correct_loss_uncond": -9.895848751068115, "incorrect_loss_uncond": -2.8562862873077393}, "model_output": [{"sum_logits": -6.114603519439697, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.010452270507812, "logits_per_token": -3.0573017597198486, "logits_per_char": -0.4703541168799767, "num_chars": 13}, {"sum_logits": -16.492372512817383, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.041133880615234, "logits_per_token": -8.246186256408691, "logits_per_char": -1.1780266080583846, "num_chars": 14}, {"sum_logits": -15.992815017700195, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.09847640991211, "logits_per_token": -7.996407508850098, "logits_per_char": -1.3327345848083496, "num_chars": 12}, {"sum_logits": -9.085556030273438, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.565593719482422, "logits_per_token": -4.542778015136719, "logits_per_char": -1.0095062255859375, "num_chars": 9}, {"sum_logits": -10.381302833557129, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.671987533569336, "logits_per_token": -10.381302833557129, "logits_per_char": -1.4830432619367326, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 645, "native_id": "dc36293f603cf230f8059fc6f2e5660d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.224957466125488, "incorrect_loss_raw": 7.230081558227539, "correct_loss_per_char": 0.653119683265686, "incorrect_loss_per_char": 1.2277390979585194, "correct_loss_per_token": 5.224957466125488, "incorrect_loss_per_token": 7.230081558227539, "correct_loss_uncond": -9.082804679870605, "incorrect_loss_uncond": -6.513395547866821}, "model_output": [{"sum_logits": -8.217131614685059, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.181896209716797, "logits_per_token": -8.217131614685059, "logits_per_char": -1.1738759449550085, "num_chars": 7}, {"sum_logits": -6.200963973999023, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.776235580444336, "logits_per_token": -6.200963973999023, "logits_per_char": -0.6200963973999023, "num_chars": 10}, {"sum_logits": -5.224957466125488, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.307762145996094, "logits_per_token": -5.224957466125488, "logits_per_char": -0.653119683265686, "num_chars": 8}, {"sum_logits": -8.399347305297852, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.32465648651123, "logits_per_token": -8.399347305297852, "logits_per_char": -2.099836826324463, "num_chars": 4}, {"sum_logits": -6.102883338928223, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.691120147705078, "logits_per_token": -6.102883338928223, "logits_per_char": -1.0171472231547039, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 646, "native_id": "1510f5183095466e4fe41b82501a9dd0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.899561405181885, "incorrect_loss_raw": 17.208139181137085, "correct_loss_per_char": 0.7666179339090983, "incorrect_loss_per_char": 2.078404235839844, "correct_loss_per_token": 6.899561405181885, "incorrect_loss_per_token": 12.351034998893738, "correct_loss_uncond": -8.896273136138916, "incorrect_loss_uncond": 1.6668004989624023}, "model_output": [{"sum_logits": -12.63930892944336, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.16828441619873, "logits_per_token": -12.63930892944336, "logits_per_char": -2.527861785888672, "num_chars": 5}, {"sum_logits": -23.87809944152832, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.68854331970215, "logits_per_token": -11.93904972076416, "logits_per_char": -2.387809944152832, "num_chars": 10}, {"sum_logits": -6.899561405181885, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.7958345413208, "logits_per_token": -6.899561405181885, "logits_per_char": -0.7666179339090983, "num_chars": 9}, {"sum_logits": -14.978734016418457, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.655503273010254, "logits_per_token": -7.4893670082092285, "logits_per_char": -1.6643037796020508, "num_chars": 9}, {"sum_logits": -17.336414337158203, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.653023719787598, "logits_per_token": -17.336414337158203, "logits_per_char": -1.7336414337158204, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 647, "native_id": "1fcc547e4e6813afc1a66717248d6c62", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.865146636962891, "incorrect_loss_raw": 6.793027400970459, "correct_loss_per_char": 0.6950209481375558, "incorrect_loss_per_char": 0.8216441287880853, "correct_loss_per_token": 4.865146636962891, "incorrect_loss_per_token": 5.951251864433289, "correct_loss_uncond": -10.013453483581543, "incorrect_loss_uncond": -7.764545679092407}, "model_output": [{"sum_logits": -8.427518844604492, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.39526081085205, "logits_per_token": -8.427518844604492, "logits_per_char": -0.5267199277877808, "num_chars": 16}, {"sum_logits": -4.893688201904297, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.995492935180664, "logits_per_token": -4.893688201904297, "logits_per_char": -0.6117110252380371, "num_chars": 8}, {"sum_logits": -4.865146636962891, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.878600120544434, "logits_per_token": -4.865146636962891, "logits_per_char": -0.6950209481375558, "num_chars": 7}, {"sum_logits": -6.734204292297363, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.975027084350586, "logits_per_token": -3.3671021461486816, "logits_per_char": -0.9620291846139091, "num_chars": 7}, {"sum_logits": -7.116698265075684, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.864511489868164, "logits_per_token": -7.116698265075684, "logits_per_char": -1.186116377512614, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 648, "native_id": "68a911b64dc943b5f81c0f8dec7faed7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.672453880310059, "incorrect_loss_raw": 8.47430944442749, "correct_loss_per_char": 0.5840567350387573, "incorrect_loss_per_char": 0.9050014327554142, "correct_loss_per_token": 4.672453880310059, "incorrect_loss_per_token": 5.684957067171732, "correct_loss_uncond": -8.35110092163086, "incorrect_loss_uncond": -7.349097728729248}, "model_output": [{"sum_logits": -5.555885314941406, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -11.10049819946289, "logits_per_token": -5.555885314941406, "logits_per_char": -1.1111770629882813, "num_chars": 5}, {"sum_logits": -4.672453880310059, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.023554801940918, "logits_per_token": -4.672453880310059, "logits_per_char": -0.5840567350387573, "num_chars": 8}, {"sum_logits": -9.797297477722168, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.789663314819336, "logits_per_token": -3.265765825907389, "logits_per_char": -0.5763116163365981, "num_chars": 17}, {"sum_logits": -9.292299270629883, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.307762145996094, "logits_per_token": -9.292299270629883, "logits_per_char": -1.1615374088287354, "num_chars": 8}, {"sum_logits": -9.251755714416504, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.095705032348633, "logits_per_token": -4.625877857208252, "logits_per_char": -0.770979642868042, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 649, "native_id": "92f423de9a556a66c3eb73e9ddf9399a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.8109130859375, "incorrect_loss_raw": 11.688695907592773, "correct_loss_per_char": 0.7100830078125, "incorrect_loss_per_char": 1.1352416773637135, "correct_loss_per_token": 3.90545654296875, "incorrect_loss_per_token": 8.098166465759277, "correct_loss_uncond": -12.42525863647461, "incorrect_loss_uncond": -4.726377964019775}, "model_output": [{"sum_logits": -14.541120529174805, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.01914405822754, "logits_per_token": -7.270560264587402, "logits_per_char": -0.9088200330734253, "num_chars": 16}, {"sum_logits": -7.8109130859375, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.23617172241211, "logits_per_token": -3.90545654296875, "logits_per_char": -0.7100830078125, "num_chars": 11}, {"sum_logits": -11.805203437805176, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.06981372833252, "logits_per_token": -11.805203437805176, "logits_per_char": -1.9675339063008626, "num_chars": 6}, {"sum_logits": -14.183115005493164, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.54778289794922, "logits_per_token": -7.091557502746582, "logits_per_char": -0.8864446878433228, "num_chars": 16}, {"sum_logits": -6.225344657897949, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.023554801940918, "logits_per_token": -6.225344657897949, "logits_per_char": -0.7781680822372437, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 650, "native_id": "1cd94405124031e8681cd12bd25e2d61", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.863635063171387, "incorrect_loss_raw": 12.06688117980957, "correct_loss_per_char": 2.7159087657928467, "incorrect_loss_per_char": 2.055700946705682, "correct_loss_per_token": 10.863635063171387, "incorrect_loss_per_token": 10.337976574897766, "correct_loss_uncond": -2.1032800674438477, "incorrect_loss_uncond": -2.7325336933135986}, "model_output": [{"sum_logits": -13.831236839294434, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.36141586303711, "logits_per_token": -6.915618419647217, "logits_per_char": -1.7289046049118042, "num_chars": 8}, {"sum_logits": -7.867279052734375, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.19228744506836, "logits_per_token": -7.867279052734375, "logits_per_char": -1.9668197631835938, "num_chars": 4}, {"sum_logits": -12.80136775970459, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.079458236694336, "logits_per_token": -12.80136775970459, "logits_per_char": -2.560273551940918, "num_chars": 5}, {"sum_logits": -10.863635063171387, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.966915130615234, "logits_per_token": -10.863635063171387, "logits_per_char": -2.7159087657928467, "num_chars": 4}, {"sum_logits": -13.767641067504883, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.564497947692871, "logits_per_token": -13.767641067504883, "logits_per_char": -1.966805866786412, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 651, "native_id": "64ab884bd870f6f68146636b4cce921c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.524868011474609, "incorrect_loss_raw": 10.447566866874695, "correct_loss_per_char": 0.6840789101340554, "incorrect_loss_per_char": 1.273398934848725, "correct_loss_per_token": 7.524868011474609, "incorrect_loss_per_token": 8.944073557853699, "correct_loss_uncond": -7.68193244934082, "incorrect_loss_uncond": -4.787430167198181}, "model_output": [{"sum_logits": -7.524868011474609, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.20680046081543, "logits_per_token": -7.524868011474609, "logits_per_char": -0.6840789101340554, "num_chars": 11}, {"sum_logits": -12.027946472167969, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.665264129638672, "logits_per_token": -6.013973236083984, "logits_per_char": -0.8591390337262835, "num_chars": 14}, {"sum_logits": -7.924839496612549, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.743748664855957, "logits_per_token": -7.924839496612549, "logits_per_char": -0.8805377218458388, "num_chars": 9}, {"sum_logits": -9.83970832824707, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.49835205078125, "logits_per_token": -9.83970832824707, "logits_per_char": -1.6399513880411785, "num_chars": 6}, {"sum_logits": -11.997773170471191, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.032623291015625, "logits_per_token": -11.997773170471191, "logits_per_char": -1.7139675957815987, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 652, "native_id": "66275550d64d16339c944e6a6d63eb5b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.4892101287841797, "incorrect_loss_raw": 11.28075623512268, "correct_loss_per_char": 0.09928067525227864, "incorrect_loss_per_char": 1.137785464103776, "correct_loss_per_token": 0.7446050643920898, "incorrect_loss_per_token": 8.232517302036285, "correct_loss_uncond": -14.299110412597656, "incorrect_loss_uncond": -4.536302328109741}, "model_output": [{"sum_logits": -1.4892101287841797, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": true, "sum_logits_uncond": -15.788320541381836, "logits_per_token": -0.7446050643920898, "logits_per_char": -0.09928067525227864, "num_chars": 15}, {"sum_logits": -10.602251052856445, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.135272979736328, "logits_per_token": -5.301125526428223, "logits_per_char": -0.9638410048051314, "num_chars": 11}, {"sum_logits": -9.189106941223145, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.734067916870117, "logits_per_token": -2.297276735305786, "logits_per_char": -0.9189106941223144, "num_chars": 10}, {"sum_logits": -12.170111656188965, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.986479759216309, "logits_per_token": -12.170111656188965, "logits_per_char": -1.3522346284654405, "num_chars": 9}, {"sum_logits": -13.161555290222168, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.412413597106934, "logits_per_token": -13.161555290222168, "logits_per_char": -1.3161555290222169, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 653, "native_id": "9b26329d74a6159ab9af4f899303de39", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.893432140350342, "incorrect_loss_raw": 10.378578901290894, "correct_loss_per_char": 0.48934321403503417, "incorrect_loss_per_char": 1.4651614866473457, "correct_loss_per_token": 4.893432140350342, "incorrect_loss_per_token": 9.565783858299255, "correct_loss_uncond": -10.428317546844482, "incorrect_loss_uncond": -3.9725887775421143}, "model_output": [{"sum_logits": -6.5023603439331055, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.906187057495117, "logits_per_token": -3.2511801719665527, "logits_per_char": -0.8127950429916382, "num_chars": 8}, {"sum_logits": -4.893432140350342, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.321749687194824, "logits_per_token": -4.893432140350342, "logits_per_char": -0.48934321403503417, "num_chars": 10}, {"sum_logits": -10.838579177856445, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.339823722839355, "logits_per_token": -10.838579177856445, "logits_per_char": -1.2042865753173828, "num_chars": 9}, {"sum_logits": -9.085183143615723, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.330537796020508, "logits_per_token": -9.085183143615723, "logits_per_char": -0.825925740328702, "num_chars": 11}, {"sum_logits": -15.0881929397583, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -15.0881929397583, "logits_per_char": -3.01763858795166, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 654, "native_id": "f74b7f268d3c190a13f99ede6d2359e1", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.69905424118042, "incorrect_loss_raw": 11.97092056274414, "correct_loss_per_char": 0.6332282490200467, "incorrect_loss_per_char": 1.8254377113448248, "correct_loss_per_token": 2.84952712059021, "incorrect_loss_per_token": 8.79204249382019, "correct_loss_uncond": -10.459137439727783, "incorrect_loss_uncond": -3.1714398860931396}, "model_output": [{"sum_logits": -5.69905424118042, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.158191680908203, "logits_per_token": -2.84952712059021, "logits_per_char": -0.6332282490200467, "num_chars": 9}, {"sum_logits": -8.787822723388672, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.691120147705078, "logits_per_token": -8.787822723388672, "logits_per_char": -1.4646371205647786, "num_chars": 6}, {"sum_logits": -25.4310245513916, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -22.835153579711914, "logits_per_token": -12.7155122756958, "logits_per_char": -2.8256693945990667, "num_chars": 9}, {"sum_logits": -8.807662010192871, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.813925743103027, "logits_per_token": -8.807662010192871, "logits_per_char": -2.2019155025482178, "num_chars": 4}, {"sum_logits": -4.857172966003418, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.229242324829102, "logits_per_token": -4.857172966003418, "logits_per_char": -0.8095288276672363, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 655, "native_id": "22458fdcead20e2def0df0d92d5806f6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.082883834838867, "incorrect_loss_raw": 15.55373990535736, "correct_loss_per_char": 0.7695814020493451, "incorrect_loss_per_char": 1.45738900011661, "correct_loss_per_token": 4.360961278279622, "incorrect_loss_per_token": 7.909680684407552, "correct_loss_uncond": -6.570426940917969, "incorrect_loss_uncond": -2.3759855031967163}, "model_output": [{"sum_logits": -6.917595386505127, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -14.455964088439941, "logits_per_token": -6.917595386505127, "logits_per_char": -0.6917595386505127, "num_chars": 10}, {"sum_logits": -22.21812629699707, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -18.789621353149414, "logits_per_token": -11.109063148498535, "logits_per_char": -2.4686806996663413, "num_chars": 9}, {"sum_logits": -17.56532859802246, "num_tokens": 3, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.47163200378418, "logits_per_token": -5.855109532674153, "logits_per_char": -1.756532859802246, "num_chars": 10}, {"sum_logits": -15.513909339904785, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -19.001684188842773, "logits_per_token": -7.756954669952393, "logits_per_char": -0.9125829023473403, "num_chars": 17}, {"sum_logits": -13.082883834838867, "num_tokens": 3, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.653310775756836, "logits_per_token": -4.360961278279622, "logits_per_char": -0.7695814020493451, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 656, "native_id": "f7b96f195a7adfe0c74924a165cfd055", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.368436813354492, "incorrect_loss_raw": 8.074706077575684, "correct_loss_per_char": 0.6710546016693115, "incorrect_loss_per_char": 1.1913675830477761, "correct_loss_per_token": 5.368436813354492, "incorrect_loss_per_token": 8.074706077575684, "correct_loss_uncond": -8.070232391357422, "incorrect_loss_uncond": -4.364657402038574}, "model_output": [{"sum_logits": -10.77688217163086, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.349469184875488, "logits_per_token": -10.77688217163086, "logits_per_char": -1.7961470286051433, "num_chars": 6}, {"sum_logits": -5.368436813354492, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.438669204711914, "logits_per_token": -5.368436813354492, "logits_per_char": -0.6710546016693115, "num_chars": 8}, {"sum_logits": -6.8951005935668945, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.234085083007812, "logits_per_token": -6.8951005935668945, "logits_per_char": -1.1491834322611492, "num_chars": 6}, {"sum_logits": -8.340633392333984, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.715743064880371, "logits_per_token": -8.340633392333984, "logits_per_char": -1.191519056047712, "num_chars": 7}, {"sum_logits": -6.286208152770996, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.45815658569336, "logits_per_token": -6.286208152770996, "logits_per_char": -0.6286208152770996, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 657, "native_id": "9b631734e72a0e559da153492c1e7894", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.326172113418579, "incorrect_loss_raw": 9.935802340507507, "correct_loss_per_char": 0.2907715141773224, "incorrect_loss_per_char": 0.7337209290557808, "correct_loss_per_token": 2.326172113418579, "incorrect_loss_per_token": 4.540952404340108, "correct_loss_uncond": -15.004497289657593, "incorrect_loss_uncond": -6.322293162345886}, "model_output": [{"sum_logits": -6.534863471984863, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.50223159790039, "logits_per_token": -3.2674317359924316, "logits_per_char": -0.5940784974531694, "num_chars": 11}, {"sum_logits": -2.326172113418579, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -17.330669403076172, "logits_per_token": -2.326172113418579, "logits_per_char": -0.2907715141773224, "num_chars": 8}, {"sum_logits": -20.036582946777344, "num_tokens": 3, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -17.817493438720703, "logits_per_token": -6.678860982259114, "logits_per_char": -1.252286434173584, "num_chars": 16}, {"sum_logits": -9.908492088317871, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -16.418981552124023, "logits_per_token": -4.9542460441589355, "logits_per_char": -0.7621916991013747, "num_chars": 13}, {"sum_logits": -3.263270854949951, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -13.293675422668457, "logits_per_token": -3.263270854949951, "logits_per_char": -0.3263270854949951, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 658, "native_id": "caccaa51ee960a92d44e5b949fc35a66", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.8230690956115723, "incorrect_loss_raw": 8.75688898563385, "correct_loss_per_char": 0.31858909130096436, "incorrect_loss_per_char": 1.116446988923209, "correct_loss_per_token": 1.9115345478057861, "incorrect_loss_per_token": 5.265893757343292, "correct_loss_uncond": -14.944833278656006, "incorrect_loss_uncond": -6.960521578788757}, "model_output": [{"sum_logits": -8.79068374633789, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.26136589050293, "logits_per_token": -2.1976709365844727, "logits_per_char": -0.5860455830891927, "num_chars": 15}, {"sum_logits": -11.494935989379883, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.080846786499023, "logits_per_token": -11.494935989379883, "logits_per_char": -1.9158226648966472, "num_chars": 6}, {"sum_logits": -10.266694068908691, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.29053020477295, "logits_per_token": -5.133347034454346, "logits_per_char": -1.4666705812726701, "num_chars": 7}, {"sum_logits": -4.4752421379089355, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.236899375915527, "logits_per_token": -2.2376210689544678, "logits_per_char": -0.49724912643432617, "num_chars": 9}, {"sum_logits": -3.8230690956115723, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.767902374267578, "logits_per_token": -1.9115345478057861, "logits_per_char": -0.31858909130096436, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 659, "native_id": "def936fda9f6ccee01f57c0f804fabd0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.212313175201416, "incorrect_loss_raw": 13.461560249328613, "correct_loss_per_char": 0.901539146900177, "incorrect_loss_per_char": 1.7846896648406985, "correct_loss_per_token": 3.606156587600708, "incorrect_loss_per_token": 8.923448324203491, "correct_loss_uncond": -14.697370052337646, "incorrect_loss_uncond": -3.376418113708496}, "model_output": [{"sum_logits": -7.402823448181152, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.089496612548828, "logits_per_token": -7.402823448181152, "logits_per_char": -1.4805646896362306, "num_chars": 5}, {"sum_logits": -12.109189987182617, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -21.863616943359375, "logits_per_token": -6.054594993591309, "logits_per_char": -1.2109189987182618, "num_chars": 10}, {"sum_logits": -24.19570541381836, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.722658157348633, "logits_per_token": -12.09785270690918, "logits_per_char": -2.419570541381836, "num_chars": 10}, {"sum_logits": -7.212313175201416, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -21.909683227539062, "logits_per_token": -3.606156587600708, "logits_per_char": -0.901539146900177, "num_chars": 8}, {"sum_logits": -10.138522148132324, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -11.676141738891602, "logits_per_token": -10.138522148132324, "logits_per_char": -2.027704429626465, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 660, "native_id": "761b0f6c68b1540949b70f76a9e67c78", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.616671562194824, "incorrect_loss_raw": 15.81916880607605, "correct_loss_per_char": 0.510606505654075, "incorrect_loss_per_char": 1.5638420581817627, "correct_loss_per_token": 5.616671562194824, "incorrect_loss_per_token": 9.616804003715515, "correct_loss_uncond": -5.26920223236084, "incorrect_loss_uncond": -2.041069984436035}, "model_output": [{"sum_logits": -13.657756805419922, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.412413597106934, "logits_per_token": -13.657756805419922, "logits_per_char": -1.3657756805419923, "num_chars": 10}, {"sum_logits": -17.15142059326172, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.15744972229004, "logits_per_token": -8.57571029663086, "logits_per_char": -1.2251014709472656, "num_chars": 14}, {"sum_logits": -5.616671562194824, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -10.885873794555664, "logits_per_token": -5.616671562194824, "logits_per_char": -0.510606505654075, "num_chars": 11}, {"sum_logits": -15.757845878601074, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.535737991333008, "logits_per_token": -7.878922939300537, "logits_per_char": -1.5757845878601073, "num_chars": 10}, {"sum_logits": -16.709651947021484, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.33535385131836, "logits_per_token": -8.354825973510742, "logits_per_char": -2.0887064933776855, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 661, "native_id": "8c11546468a2595b29a1297e73334fc4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.07242202758789, "incorrect_loss_raw": 10.48294734954834, "correct_loss_per_char": 1.5120703379313152, "incorrect_loss_per_char": 1.4747222933504316, "correct_loss_per_token": 9.07242202758789, "incorrect_loss_per_token": 8.218486785888672, "correct_loss_uncond": -5.862730979919434, "incorrect_loss_uncond": -4.733319282531738}, "model_output": [{"sum_logits": -7.080061912536621, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -11.413346290588379, "logits_per_token": -7.080061912536621, "logits_per_char": -1.4160123825073243, "num_chars": 5}, {"sum_logits": -9.07242202758789, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.935153007507324, "logits_per_token": -9.07242202758789, "logits_per_char": -1.5120703379313152, "num_chars": 6}, {"sum_logits": -7.663620948791504, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.64645767211914, "logits_per_token": -7.663620948791504, "logits_per_char": -0.957952618598938, "num_chars": 8}, {"sum_logits": -18.115684509277344, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.87010955810547, "logits_per_token": -9.057842254638672, "logits_per_char": -2.012853834364149, "num_chars": 9}, {"sum_logits": -9.07242202758789, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.935153007507324, "logits_per_token": -9.07242202758789, "logits_per_char": -1.5120703379313152, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 662, "native_id": "a5dcac512870e79f5aa2b22dbd662404", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 2.5516364574432373, "incorrect_loss_raw": 8.74136996269226, "correct_loss_per_char": 0.5103272914886474, "incorrect_loss_per_char": 1.1942361794599967, "correct_loss_per_token": 2.5516364574432373, "incorrect_loss_per_token": 8.242428958415985, "correct_loss_uncond": -11.836513757705688, "incorrect_loss_uncond": -6.090932607650757}, "model_output": [{"sum_logits": -5.586498737335205, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.715117454528809, "logits_per_token": -5.586498737335205, "logits_per_char": -1.117299747467041, "num_chars": 5}, {"sum_logits": -2.5516364574432373, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.388150215148926, "logits_per_token": -2.5516364574432373, "logits_per_char": -0.5103272914886474, "num_chars": 5}, {"sum_logits": -3.991528034210205, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.58877944946289, "logits_per_token": -1.9957640171051025, "logits_per_char": -0.23479576671824737, "num_chars": 17}, {"sum_logits": -14.079383850097656, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.001758575439453, "logits_per_token": -14.079383850097656, "logits_per_char": -2.011340550013951, "num_chars": 7}, {"sum_logits": -11.308069229125977, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.023554801940918, "logits_per_token": -11.308069229125977, "logits_per_char": -1.413508653640747, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 663, "native_id": "870b07a1c5af2e956673a9680da99852", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 10.049339294433594, "incorrect_loss_raw": 14.011824607849121, "correct_loss_per_char": 0.7178099496023995, "incorrect_loss_per_char": 1.0053760296420047, "correct_loss_per_token": 5.024669647216797, "incorrect_loss_per_token": 4.758511304855347, "correct_loss_uncond": -7.932991027832031, "incorrect_loss_uncond": -5.5326714515686035}, "model_output": [{"sum_logits": -12.676626205444336, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.469581604003906, "logits_per_token": -4.225542068481445, "logits_per_char": -0.8451084136962891, "num_chars": 15}, {"sum_logits": -21.043498992919922, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.92280387878418, "logits_per_token": -7.014499664306641, "logits_per_char": -1.1075525785747327, "num_chars": 19}, {"sum_logits": -13.47833251953125, "num_tokens": 4, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.910259246826172, "logits_per_token": -3.3695831298828125, "logits_per_char": -0.962738037109375, "num_chars": 14}, {"sum_logits": -8.848840713500977, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.87533950805664, "logits_per_token": -4.424420356750488, "logits_per_char": -1.106105089187622, "num_chars": 8}, {"sum_logits": -10.049339294433594, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.982330322265625, "logits_per_token": -5.024669647216797, "logits_per_char": -0.7178099496023995, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 664, "native_id": "f48528156632b9c5b18af9ce2095509b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.0978689193725586, "incorrect_loss_raw": 17.639275550842285, "correct_loss_per_char": 0.30978689193725584, "incorrect_loss_per_char": 1.875646040037081, "correct_loss_per_token": 3.0978689193725586, "incorrect_loss_per_token": 13.259935855865479, "correct_loss_uncond": -11.794055938720703, "incorrect_loss_uncond": 1.7718431949615479}, "model_output": [{"sum_logits": -17.2690486907959, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.590031623840332, "logits_per_token": -17.2690486907959, "logits_per_char": -1.5699135173450818, "num_chars": 11}, {"sum_logits": -18.25333595275879, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.218299865722656, "logits_per_token": -18.25333595275879, "logits_per_char": -2.607619421822684, "num_chars": 7}, {"sum_logits": -3.0978689193725586, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.891924858093262, "logits_per_token": -3.0978689193725586, "logits_per_char": -0.30978689193725584, "num_chars": 10}, {"sum_logits": -15.408458709716797, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.68854331970215, "logits_per_token": -7.704229354858398, "logits_per_char": -1.5408458709716797, "num_chars": 10}, {"sum_logits": -19.626258850097656, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.972854614257812, "logits_per_token": -9.813129425048828, "logits_per_char": -1.784205350008878, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 665, "native_id": "5496c7293f653120e5a5213db2d7b103", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.984260082244873, "incorrect_loss_raw": 8.088134706020355, "correct_loss_per_char": 0.24868834018707275, "incorrect_loss_per_char": 1.095413631954036, "correct_loss_per_token": 1.4921300411224365, "incorrect_loss_per_token": 8.088134706020355, "correct_loss_uncond": -16.42554521560669, "incorrect_loss_uncond": -5.063133537769318}, "model_output": [{"sum_logits": -8.597600936889648, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.193785667419434, "logits_per_token": -8.597600936889648, "logits_per_char": -1.2282287052699499, "num_chars": 7}, {"sum_logits": -9.932802200317383, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.202109336853027, "logits_per_token": -9.932802200317383, "logits_per_char": -0.7640617077167218, "num_chars": 13}, {"sum_logits": -2.984260082244873, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -19.409805297851562, "logits_per_token": -1.4921300411224365, "logits_per_char": -0.24868834018707275, "num_chars": 12}, {"sum_logits": -9.950918197631836, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.657292366027832, "logits_per_token": -9.950918197631836, "logits_per_char": -1.4215597425188338, "num_chars": 7}, {"sum_logits": -3.8712174892425537, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.551885604858398, "logits_per_token": -3.8712174892425537, "logits_per_char": -0.9678043723106384, "num_chars": 4}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 666, "native_id": "9d97e2bb458d93a8bafe4380b08727e3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.953243255615234, "incorrect_loss_raw": 6.8449355363845825, "correct_loss_per_char": 1.1059159172905817, "incorrect_loss_per_char": 0.9861063886256444, "correct_loss_per_token": 4.976621627807617, "incorrect_loss_per_token": 5.234991431236267, "correct_loss_uncond": -9.586723327636719, "incorrect_loss_uncond": -8.302473664283752}, "model_output": [{"sum_logits": -9.953243255615234, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.539966583251953, "logits_per_token": -4.976621627807617, "logits_per_char": -1.1059159172905817, "num_chars": 9}, {"sum_logits": -4.533045768737793, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.77853012084961, "logits_per_token": -4.533045768737793, "logits_per_char": -0.7555076281229655, "num_chars": 6}, {"sum_logits": -4.194167137145996, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.023554801940918, "logits_per_token": -4.194167137145996, "logits_per_char": -0.5242708921432495, "num_chars": 8}, {"sum_logits": -12.879552841186523, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.716047286987305, "logits_per_token": -6.439776420593262, "logits_per_char": -1.8399361201695033, "num_chars": 7}, {"sum_logits": -5.772976398468018, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.071504592895508, "logits_per_token": -5.772976398468018, "logits_per_char": -0.8247109140668597, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 667, "native_id": "26d7d59ef7b9f2e0c2d47419fa5bca91", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.7726457118988037, "incorrect_loss_raw": 6.471511781215668, "correct_loss_per_char": 0.31438714265823364, "incorrect_loss_per_char": 0.8245567046374642, "correct_loss_per_token": 3.7726457118988037, "incorrect_loss_per_token": 3.235755890607834, "correct_loss_uncond": -11.384889841079712, "incorrect_loss_uncond": -10.362745106220245}, "model_output": [{"sum_logits": -12.235471725463867, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.676098823547363, "logits_per_token": -6.117735862731934, "logits_per_char": -1.747924532209124, "num_chars": 7}, {"sum_logits": -4.866119384765625, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.68304443359375, "logits_per_token": -2.4330596923828125, "logits_per_char": -0.695159912109375, "num_chars": 7}, {"sum_logits": -2.7994954586029053, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": true, "sum_logits_uncond": -16.15967559814453, "logits_per_token": -1.3997477293014526, "logits_per_char": -0.3110550509558784, "num_chars": 9}, {"sum_logits": -3.7726457118988037, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -3.7726457118988037, "logits_per_char": -0.31438714265823364, "num_chars": 12}, {"sum_logits": -5.984960556030273, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.818208694458008, "logits_per_token": -2.9924802780151367, "logits_per_char": -0.5440873232754794, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 668, "native_id": "c6f10fd07348bf2cf5488b0d9f38d806", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.566014289855957, "incorrect_loss_raw": 18.63210916519165, "correct_loss_per_char": 0.6377342859903972, "incorrect_loss_per_char": 1.3169974699998515, "correct_loss_per_token": 4.7830071449279785, "incorrect_loss_per_token": 7.742793639500936, "correct_loss_uncond": -10.193991661071777, "incorrect_loss_uncond": -1.2794160842895508}, "model_output": [{"sum_logits": -22.01389503479004, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.54024314880371, "logits_per_token": -11.00694751739502, "logits_per_char": -1.83449125289917, "num_chars": 12}, {"sum_logits": -9.566014289855957, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.760005950927734, "logits_per_token": -4.7830071449279785, "logits_per_char": -0.6377342859903972, "num_chars": 15}, {"sum_logits": -14.756278991699219, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.312360763549805, "logits_per_token": -7.378139495849609, "logits_per_char": -1.135098383976863, "num_chars": 13}, {"sum_logits": -19.693553924560547, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -23.88312530517578, "logits_per_token": -6.564517974853516, "logits_per_char": -1.0940863291422527, "num_chars": 18}, {"sum_logits": -18.064708709716797, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.910371780395508, "logits_per_token": -6.021569569905599, "logits_per_char": -1.2043139139811199, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 669, "native_id": "8ebf9d24719649a0b041aea02a6e46af", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.5720043182373047, "incorrect_loss_raw": 8.60377025604248, "correct_loss_per_char": 0.5102863311767578, "incorrect_loss_per_char": 0.9495726141750727, "correct_loss_per_token": 3.5720043182373047, "incorrect_loss_per_token": 7.713100910186768, "correct_loss_uncond": -9.036532402038574, "incorrect_loss_uncond": -6.636253833770752}, "model_output": [{"sum_logits": -9.918293952941895, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.520729064941406, "logits_per_token": -9.918293952941895, "logits_per_char": -1.4168991361345564, "num_chars": 7}, {"sum_logits": -10.29644775390625, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.463623046875, "logits_per_token": -10.29644775390625, "logits_per_char": -1.1440497504340277, "num_chars": 9}, {"sum_logits": -3.5720043182373047, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.608536720275879, "logits_per_token": -3.5720043182373047, "logits_per_char": -0.5102863311767578, "num_chars": 7}, {"sum_logits": -7.074984550476074, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -7.074984550476074, "logits_per_char": -0.5895820458730062, "num_chars": 12}, {"sum_logits": -7.125354766845703, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.818208694458008, "logits_per_token": -3.5626773834228516, "logits_per_char": -0.6477595242587003, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 670, "native_id": "c961578f4c5768b67b843e5d2ce18452", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.532564163208008, "incorrect_loss_raw": 12.88118314743042, "correct_loss_per_char": 0.5532564163208008, "incorrect_loss_per_char": 1.4345973219190324, "correct_loss_per_token": 2.766282081604004, "incorrect_loss_per_token": 9.32126744588216, "correct_loss_uncond": -9.768239974975586, "incorrect_loss_uncond": -2.903289556503296}, "model_output": [{"sum_logits": -5.532564163208008, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.300804138183594, "logits_per_token": -2.766282081604004, "logits_per_char": -0.5532564163208008, "num_chars": 10}, {"sum_logits": -12.842308044433594, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.218299865722656, "logits_per_token": -12.842308044433594, "logits_per_char": -1.8346154349190849, "num_chars": 7}, {"sum_logits": -16.23025131225586, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.767902374267578, "logits_per_token": -8.11512565612793, "logits_per_char": -1.3525209426879883, "num_chars": 12}, {"sum_logits": -9.186805725097656, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.52644157409668, "logits_per_token": -3.0622685750325522, "logits_per_char": -0.6562004089355469, "num_chars": 14}, {"sum_logits": -13.26536750793457, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.62524700164795, "logits_per_token": -13.26536750793457, "logits_per_char": -1.89505250113351, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 671, "native_id": "cce1b59f7c4f540a84a1a7d6d88548c4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.698883056640625, "incorrect_loss_raw": 6.534291505813599, "correct_loss_per_char": 0.7831471761067709, "incorrect_loss_per_char": 0.646521609453928, "correct_loss_per_token": 4.698883056640625, "incorrect_loss_per_token": 4.7216784954071045, "correct_loss_uncond": -6.986571311950684, "incorrect_loss_uncond": -9.213427305221558}, "model_output": [{"sum_logits": -4.4596662521362305, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.14277172088623, "logits_per_token": -4.4596662521362305, "logits_per_char": -0.6370951788766044, "num_chars": 7}, {"sum_logits": -4.698883056640625, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.685454368591309, "logits_per_token": -4.698883056640625, "logits_per_char": -0.7831471761067709, "num_chars": 6}, {"sum_logits": -7.176595687866211, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.45752239227295, "logits_per_token": -7.176595687866211, "logits_per_char": -0.8970744609832764, "num_chars": 8}, {"sum_logits": -7.51161003112793, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.629486083984375, "logits_per_token": -3.755805015563965, "logits_per_char": -0.4694756269454956, "num_chars": 16}, {"sum_logits": -6.989294052124023, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.76109504699707, "logits_per_token": -3.4946470260620117, "logits_per_char": -0.5824411710103353, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 672, "native_id": "60848ce50295fc745756fbe960e78b88", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.32215690612793, "incorrect_loss_raw": 8.420982360839844, "correct_loss_per_char": 0.7701348066329956, "incorrect_loss_per_char": 0.8493678405429377, "correct_loss_per_token": 4.107385635375977, "incorrect_loss_per_token": 5.284050107002258, "correct_loss_uncond": -6.540533065795898, "incorrect_loss_uncond": -6.870139837265015}, "model_output": [{"sum_logits": -12.32215690612793, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.862689971923828, "logits_per_token": -4.107385635375977, "logits_per_char": -0.7701348066329956, "num_chars": 16}, {"sum_logits": -12.38310432434082, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.683568954467773, "logits_per_token": -6.19155216217041, "logits_per_char": -0.825540288289388, "num_chars": 15}, {"sum_logits": -4.019089698791504, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.474529266357422, "logits_per_token": -4.019089698791504, "logits_per_char": -0.502386212348938, "num_chars": 8}, {"sum_logits": -4.5693817138671875, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.55066967010498, "logits_per_token": -4.5693817138671875, "logits_per_char": -0.9138763427734375, "num_chars": 5}, {"sum_logits": -12.712353706359863, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.455720901489258, "logits_per_token": -6.356176853179932, "logits_per_char": -1.1556685187599876, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 673, "native_id": "3fdc0c422c524c994b9911a17f1f1834", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.794771194458008, "incorrect_loss_raw": 8.475969195365906, "correct_loss_per_char": 0.4871731996536255, "incorrect_loss_per_char": 0.9148482534620497, "correct_loss_per_token": 3.897385597229004, "incorrect_loss_per_token": 4.861657083034515, "correct_loss_uncond": -12.664752960205078, "incorrect_loss_uncond": -8.228704333305359}, "model_output": [{"sum_logits": -7.794771194458008, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.459524154663086, "logits_per_token": -3.897385597229004, "logits_per_char": -0.4871731996536255, "num_chars": 16}, {"sum_logits": -12.118738174438477, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.067399978637695, "logits_per_token": -6.059369087219238, "logits_per_char": -2.019789695739746, "num_chars": 6}, {"sum_logits": -9.340777397155762, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.003332138061523, "logits_per_token": -4.670388698577881, "logits_per_char": -0.5189320776197646, "num_chars": 18}, {"sum_logits": -7.454981327056885, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.17111587524414, "logits_per_token": -3.7274906635284424, "logits_per_char": -0.49699875513712566, "num_chars": 15}, {"sum_logits": -4.9893798828125, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -4.9893798828125, "logits_per_char": -0.6236724853515625, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 674, "native_id": "cc8eac9956f645533b8d7b99702e3507", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.037732124328613, "incorrect_loss_raw": 9.878372430801392, "correct_loss_per_char": 0.7196760177612305, "incorrect_loss_per_char": 1.4903360866364979, "correct_loss_per_token": 2.5188660621643066, "incorrect_loss_per_token": 5.1690216064453125, "correct_loss_uncond": -11.841734886169434, "incorrect_loss_uncond": -7.0287864208221436}, "model_output": [{"sum_logits": -8.839277267456055, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -8.839277267456055, "logits_per_char": -1.767855453491211, "num_chars": 5}, {"sum_logits": -11.228909492492676, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -21.504716873168945, "logits_per_token": -3.742969830830892, "logits_per_char": -1.6041299274989538, "num_chars": 7}, {"sum_logits": -9.772872924804688, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.24956512451172, "logits_per_token": -3.257624308268229, "logits_per_char": -0.9772872924804688, "num_chars": 10}, {"sum_logits": -9.672430038452148, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.619625091552734, "logits_per_token": -4.836215019226074, "logits_per_char": -1.6120716730753581, "num_chars": 6}, {"sum_logits": -5.037732124328613, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.879467010498047, "logits_per_token": -2.5188660621643066, "logits_per_char": -0.7196760177612305, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 675, "native_id": "c0e7fa3e39a2d9af2c323416015729dc", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 3.504556655883789, "incorrect_loss_raw": 13.966684818267822, "correct_loss_per_char": 0.43806958198547363, "incorrect_loss_per_char": 1.4671299947632683, "correct_loss_per_token": 1.168185551961263, "incorrect_loss_per_token": 6.649976968765259, "correct_loss_uncond": -12.838459014892578, "incorrect_loss_uncond": -5.969884634017944}, "model_output": [{"sum_logits": -25.578399658203125, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.77301025390625, "logits_per_token": -8.526133219401041, "logits_per_char": -1.7052266438802084, "num_chars": 15}, {"sum_logits": -3.504556655883789, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.343015670776367, "logits_per_token": -1.168185551961263, "logits_per_char": -0.43806958198547363, "num_chars": 8}, {"sum_logits": -6.850162506103516, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.7302303314209, "logits_per_token": -2.2833875020345054, "logits_per_char": -0.4281351566314697, "num_chars": 16}, {"sum_logits": -8.142597198486328, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.32465648651123, "logits_per_token": -8.142597198486328, "logits_per_char": -2.035649299621582, "num_chars": 4}, {"sum_logits": -15.29557991027832, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -25.918380737304688, "logits_per_token": -7.64778995513916, "logits_per_char": -1.6995088789198134, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 676, "native_id": "335b51bd3a8ada014bbe6754dcbd425f", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.265870094299316, "incorrect_loss_raw": 8.540730714797974, "correct_loss_per_char": 0.8531740188598633, "incorrect_loss_per_char": 0.9226462730718803, "correct_loss_per_token": 4.265870094299316, "incorrect_loss_per_token": 4.901880145072937, "correct_loss_uncond": -7.988858222961426, "incorrect_loss_uncond": -7.0809266567230225}, "model_output": [{"sum_logits": -9.046690940856934, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.549774169921875, "logits_per_token": -4.523345470428467, "logits_per_char": -1.292384420122419, "num_chars": 7}, {"sum_logits": -6.406206130981445, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.474275588989258, "logits_per_token": -3.2031030654907227, "logits_per_char": -0.33716874373586553, "num_chars": 19}, {"sum_logits": -4.265870094299316, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -4.265870094299316, "logits_per_char": -0.8531740188598633, "num_chars": 5}, {"sum_logits": -13.657907485961914, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.956235885620117, "logits_per_token": -6.828953742980957, "logits_per_char": -1.0506082681509166, "num_chars": 13}, {"sum_logits": -5.052118301391602, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.506343841552734, "logits_per_token": -5.052118301391602, "logits_per_char": -1.0104236602783203, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 677, "native_id": "c7327a1a7d12b6cc0740fc9446270e02", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.443670272827148, "incorrect_loss_raw": 13.22242021560669, "correct_loss_per_char": 0.6031193052019391, "incorrect_loss_per_char": 1.210750209542858, "correct_loss_per_token": 2.8145567576090493, "incorrect_loss_per_token": 5.380477110544841, "correct_loss_uncond": -13.030702590942383, "incorrect_loss_uncond": -4.704833745956421}, "model_output": [{"sum_logits": -5.838022232055664, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -11.596142768859863, "logits_per_token": -5.838022232055664, "logits_per_char": -1.1676044464111328, "num_chars": 5}, {"sum_logits": -14.490957260131836, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.23810577392578, "logits_per_token": -4.830319086710612, "logits_per_char": -1.0350683757237025, "num_chars": 14}, {"sum_logits": -11.397947311401367, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.390138626098633, "logits_per_token": -3.7993157704671225, "logits_per_char": -0.8767651778001052, "num_chars": 13}, {"sum_logits": -21.16275405883789, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -21.484628677368164, "logits_per_token": -7.054251352945964, "logits_per_char": -1.763562838236491, "num_chars": 12}, {"sum_logits": -8.443670272827148, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -21.47437286376953, "logits_per_token": -2.8145567576090493, "logits_per_char": -0.6031193052019391, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 678, "native_id": "2729d8502208c25d8e9293cd4e8ecbb5", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.8014180660247803, "incorrect_loss_raw": 11.8158438205719, "correct_loss_per_char": 0.3455834605477073, "incorrect_loss_per_char": 1.592724741829766, "correct_loss_per_token": 1.9007090330123901, "incorrect_loss_per_token": 9.147580067316692, "correct_loss_uncond": -11.36291241645813, "incorrect_loss_uncond": -3.3194515705108643}, "model_output": [{"sum_logits": -15.103988647460938, "num_tokens": 1, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -14.413416862487793, "logits_per_token": -15.103988647460938, "logits_per_char": -3.7759971618652344, "num_chars": 4}, {"sum_logits": -16.00958251953125, "num_tokens": 3, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -19.212238311767578, "logits_per_token": -5.336527506510417, "logits_per_char": -0.8004791259765625, "num_chars": 20}, {"sum_logits": -3.8014180660247803, "num_tokens": 2, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -15.16433048248291, "logits_per_token": -1.9007090330123901, "logits_per_char": -0.3455834605477073, "num_chars": 11}, {"sum_logits": -8.440953254699707, "num_tokens": 1, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -14.022502899169922, "logits_per_token": -8.440953254699707, "logits_per_char": -0.9378836949666342, "num_chars": 9}, {"sum_logits": -7.708850860595703, "num_tokens": 1, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -12.893023490905762, "logits_per_token": -7.708850860595703, "logits_per_char": -0.8565389845106337, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 679, "native_id": "7ea57ee4580042b0a6a40479c8ace3e4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 14.161443710327148, "incorrect_loss_raw": 16.033984899520874, "correct_loss_per_char": 1.0115316935947962, "incorrect_loss_per_char": 1.7282398983963534, "correct_loss_per_token": 7.080721855163574, "incorrect_loss_per_token": 11.87491261959076, "correct_loss_uncond": -8.335821151733398, "incorrect_loss_uncond": 0.04603981971740723}, "model_output": [{"sum_logits": -13.351419448852539, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.132861137390137, "logits_per_token": -13.351419448852539, "logits_per_char": -2.670283889770508, "num_chars": 5}, {"sum_logits": -18.20171356201172, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.39590072631836, "logits_per_token": -9.10085678100586, "logits_per_char": -1.1376070976257324, "num_chars": 16}, {"sum_logits": -14.161443710327148, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.497264862060547, "logits_per_token": -7.080721855163574, "logits_per_char": -1.0115316935947962, "num_chars": 14}, {"sum_logits": -15.0708646774292, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.083194732666016, "logits_per_token": -7.5354323387146, "logits_per_char": -1.1592972828791692, "num_chars": 13}, {"sum_logits": -17.51194190979004, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.339823722839355, "logits_per_token": -17.51194190979004, "logits_per_char": -1.9457713233100042, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 680, "native_id": "65432eb6e617514d863a465f38865fde", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.430063247680664, "incorrect_loss_raw": 14.054633378982544, "correct_loss_per_char": 0.3393789529800415, "incorrect_loss_per_char": 1.0481577502356636, "correct_loss_per_token": 2.715031623840332, "incorrect_loss_per_token": 5.671841263771057, "correct_loss_uncond": -15.029460906982422, "incorrect_loss_uncond": -7.69903826713562}, "model_output": [{"sum_logits": -5.430063247680664, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.459524154663086, "logits_per_token": -2.715031623840332, "logits_per_char": -0.3393789529800415, "num_chars": 16}, {"sum_logits": -11.259682655334473, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.553945541381836, "logits_per_token": -5.629841327667236, "logits_per_char": -0.7506455103556315, "num_chars": 15}, {"sum_logits": -21.687606811523438, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -27.170166015625, "logits_per_token": -5.421901702880859, "logits_per_char": -1.4458404541015626, "num_chars": 15}, {"sum_logits": -11.996124267578125, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.288890838623047, "logits_per_token": -5.9980621337890625, "logits_per_char": -1.3329026963975694, "num_chars": 9}, {"sum_logits": -11.27511978149414, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.001684188842773, "logits_per_token": -5.63755989074707, "logits_per_char": -0.6632423400878906, "num_chars": 17}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 681, "native_id": "316a8dee8a4dde7d95cf503a715104be", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.76745891571045, "incorrect_loss_raw": 13.96159291267395, "correct_loss_per_char": 1.2209323644638062, "incorrect_loss_per_char": 1.3610604672859876, "correct_loss_per_token": 9.76745891571045, "incorrect_loss_per_token": 7.455453157424927, "correct_loss_uncond": -4.667527198791504, "incorrect_loss_uncond": -2.68841552734375}, "model_output": [{"sum_logits": -17.341569900512695, "num_tokens": 3, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -16.18882179260254, "logits_per_token": -5.780523300170898, "logits_per_char": -1.926841100056966, "num_chars": 9}, {"sum_logits": -9.577776908874512, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -14.197783470153809, "logits_per_token": -9.577776908874512, "logits_per_char": -1.197222113609314, "num_chars": 8}, {"sum_logits": -9.76745891571045, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -14.434986114501953, "logits_per_token": -9.76745891571045, "logits_per_char": -1.2209323644638062, "num_chars": 8}, {"sum_logits": -14.103452682495117, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -20.85931396484375, "logits_per_token": -7.051726341247559, "logits_per_char": -1.0848809755765474, "num_chars": 13}, {"sum_logits": -14.823572158813477, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -15.354114532470703, "logits_per_token": -7.411786079406738, "logits_per_char": -1.235297679901123, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 682, "native_id": "520972425aed0e532fa28a91c9b55b30", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.406035900115967, "incorrect_loss_raw": 10.76041042804718, "correct_loss_per_char": 0.7117817666795518, "incorrect_loss_per_char": 1.014722438296701, "correct_loss_per_token": 3.2030179500579834, "incorrect_loss_per_token": 5.38020521402359, "correct_loss_uncond": -9.26539659500122, "incorrect_loss_uncond": -5.717314600944519}, "model_output": [{"sum_logits": -10.78262710571289, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.83720588684082, "logits_per_token": -5.391313552856445, "logits_per_char": -0.829432854285607, "num_chars": 13}, {"sum_logits": -6.406035900115967, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.671432495117188, "logits_per_token": -3.2030179500579834, "logits_per_char": -0.7117817666795518, "num_chars": 9}, {"sum_logits": -10.958008766174316, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.137508392333984, "logits_per_token": -5.479004383087158, "logits_per_char": -1.217556529574924, "num_chars": 9}, {"sum_logits": -7.092012882232666, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.938217163085938, "logits_per_token": -3.546006441116333, "logits_per_char": -0.5910010735193888, "num_chars": 12}, {"sum_logits": -14.208992958068848, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.997968673706055, "logits_per_token": -7.104496479034424, "logits_per_char": -1.4208992958068847, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 683, "native_id": "4d67cdb4ba1b0058e383c212303a9f4e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 13.95971965789795, "incorrect_loss_raw": 10.253825902938843, "correct_loss_per_char": 0.8211599798763499, "incorrect_loss_per_char": 1.247170653418889, "correct_loss_per_token": 4.653239885965983, "incorrect_loss_per_token": 6.278894662857056, "correct_loss_uncond": -11.0820951461792, "incorrect_loss_uncond": -4.8582422733306885}, "model_output": [{"sum_logits": -7.913845062255859, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.13313102722168, "logits_per_token": -1.9784612655639648, "logits_per_char": -0.5652746473039899, "num_chars": 14}, {"sum_logits": -10.721221923828125, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.943564414978027, "logits_per_token": -5.3606109619140625, "logits_per_char": -0.7658015659877232, "num_chars": 14}, {"sum_logits": -13.95971965789795, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -25.04181480407715, "logits_per_token": -4.653239885965983, "logits_per_char": -0.8211599798763499, "num_chars": 17}, {"sum_logits": -9.207460403442383, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.838539123535156, "logits_per_token": -4.603730201721191, "logits_per_char": -1.0230511559380426, "num_chars": 9}, {"sum_logits": -13.172776222229004, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.533038139343262, "logits_per_token": -13.172776222229004, "logits_per_char": -2.6345552444458007, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 684, "native_id": "95d1d968ee66b6054cbb16b58a7c6455", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.801076889038086, "incorrect_loss_raw": 11.27376914024353, "correct_loss_per_char": 1.6001346111297607, "incorrect_loss_per_char": 1.1325773346991768, "correct_loss_per_token": 12.801076889038086, "incorrect_loss_per_token": 8.297151684761047, "correct_loss_uncond": -3.1658525466918945, "incorrect_loss_uncond": -4.001253843307495}, "model_output": [{"sum_logits": -11.536176681518555, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.004643440246582, "logits_per_token": -11.536176681518555, "logits_per_char": -1.1536176681518555, "num_chars": 10}, {"sum_logits": -15.637855529785156, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.926929473876953, "logits_per_token": -7.818927764892578, "logits_per_char": -1.3031546274820964, "num_chars": 12}, {"sum_logits": -12.801076889038086, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.96692943572998, "logits_per_token": -12.801076889038086, "logits_per_char": -1.6001346111297607, "num_chars": 8}, {"sum_logits": -8.175084114074707, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.76109504699707, "logits_per_token": -4.0875420570373535, "logits_per_char": -0.6812570095062256, "num_chars": 12}, {"sum_logits": -9.745960235595703, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.407423973083496, "logits_per_token": -9.745960235595703, "logits_per_char": -1.392280033656529, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 685, "native_id": "c43b60be106662de1863097ee3ddb4d2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.102447509765625, "incorrect_loss_raw": 8.885807991027832, "correct_loss_per_char": 1.1574925013950892, "incorrect_loss_per_char": 1.0175086657206218, "correct_loss_per_token": 8.102447509765625, "incorrect_loss_per_token": 6.327374219894409, "correct_loss_uncond": -4.811014175415039, "incorrect_loss_uncond": -5.968591928482056}, "model_output": [{"sum_logits": -8.102447509765625, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.913461685180664, "logits_per_token": -8.102447509765625, "logits_per_char": -1.1574925013950892, "num_chars": 7}, {"sum_logits": -11.497848510742188, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.714458465576172, "logits_per_token": -5.748924255371094, "logits_per_char": -0.821274893624442, "num_chars": 14}, {"sum_logits": -8.969621658325195, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.465145111083984, "logits_per_token": -4.484810829162598, "logits_per_char": -0.8969621658325195, "num_chars": 10}, {"sum_logits": -6.754833221435547, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.348342895507812, "logits_per_token": -6.754833221435547, "logits_per_char": -0.9649761744907924, "num_chars": 7}, {"sum_logits": -8.320928573608398, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -8.320928573608398, "logits_per_char": -1.3868214289347331, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 686, "native_id": "456f2fb41cac8c028dcfe2f48637e473", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.112666130065918, "incorrect_loss_raw": 7.702296614646912, "correct_loss_per_char": 1.0112666130065917, "incorrect_loss_per_char": 0.8169972176353136, "correct_loss_per_token": 5.056333065032959, "incorrect_loss_per_token": 4.60471647977829, "correct_loss_uncond": -6.593762397766113, "incorrect_loss_uncond": -9.996982216835022}, "model_output": [{"sum_logits": -10.112666130065918, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.70642852783203, "logits_per_token": -5.056333065032959, "logits_per_char": -1.0112666130065917, "num_chars": 10}, {"sum_logits": -6.028545379638672, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.916854858398438, "logits_per_token": -6.028545379638672, "logits_per_char": -1.004757563273112, "num_chars": 6}, {"sum_logits": -9.932461738586426, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.59100914001465, "logits_per_token": -4.966230869293213, "logits_per_char": -0.9932461738586426, "num_chars": 10}, {"sum_logits": -5.728874683380127, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.075977325439453, "logits_per_token": -2.8644373416900635, "logits_per_char": -0.35805466771125793, "num_chars": 16}, {"sum_logits": -9.119304656982422, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.213274002075195, "logits_per_token": -4.559652328491211, "logits_per_char": -0.9119304656982422, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 687, "native_id": "a5d853d1c2fb3ef160218fb91110fbe5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.53209114074707, "incorrect_loss_raw": 11.590807676315308, "correct_loss_per_char": 0.853209114074707, "incorrect_loss_per_char": 1.5115184258933019, "correct_loss_per_token": 4.266045570373535, "incorrect_loss_per_token": 6.830633997917175, "correct_loss_uncond": -8.232120513916016, "incorrect_loss_uncond": -4.664512395858765}, "model_output": [{"sum_logits": -8.281841278076172, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.296630859375, "logits_per_token": -8.281841278076172, "logits_per_char": -0.7528946616432883, "num_chars": 11}, {"sum_logits": -8.53209114074707, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -16.764211654663086, "logits_per_token": -4.266045570373535, "logits_per_char": -0.853209114074707, "num_chars": 10}, {"sum_logits": -14.343923568725586, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -15.793710708618164, "logits_per_token": -7.171961784362793, "logits_per_char": -2.868784713745117, "num_chars": 5}, {"sum_logits": -13.188922882080078, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -18.732418060302734, "logits_per_token": -6.594461441040039, "logits_per_char": -1.4654358757866754, "num_chars": 9}, {"sum_logits": -10.548542976379395, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -16.19852066040039, "logits_per_token": -5.274271488189697, "logits_per_char": -0.9589584523981268, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 688, "native_id": "3df1b88da6a90c9526be2c8a6cc736dc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.886094093322754, "incorrect_loss_raw": 8.93140959739685, "correct_loss_per_char": 1.8143490155537922, "incorrect_loss_per_char": 1.376010677171132, "correct_loss_per_token": 10.886094093322754, "incorrect_loss_per_token": 7.873913645744324, "correct_loss_uncond": -1.0035591125488281, "incorrect_loss_uncond": -6.156320571899414}, "model_output": [{"sum_logits": -8.459967613220215, "num_tokens": 2, "num_tokens_all": 167, "is_greedy": false, "sum_logits_uncond": -16.11893081665039, "logits_per_token": -4.229983806610107, "logits_per_char": -1.2085668018886022, "num_chars": 7}, {"sum_logits": -10.886094093322754, "num_tokens": 1, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -10.886094093322754, "logits_per_char": -1.8143490155537922, "num_chars": 6}, {"sum_logits": -9.22133731842041, "num_tokens": 1, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -15.179370880126953, "logits_per_token": -9.22133731842041, "logits_per_char": -1.5368895530700684, "num_chars": 6}, {"sum_logits": -9.565653800964355, "num_tokens": 1, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -15.224496841430664, "logits_per_token": -9.565653800964355, "logits_per_char": -1.0628504223293729, "num_chars": 9}, {"sum_logits": -8.478679656982422, "num_tokens": 1, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -8.478679656982422, "logits_per_char": -1.6957359313964844, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 689, "native_id": "f912bcd7479b76db9b1c57a612b90f00", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.3413920402526855, "incorrect_loss_raw": 10.372238636016846, "correct_loss_per_char": 0.4894261360168457, "incorrect_loss_per_char": 1.0999985920358029, "correct_loss_per_token": 3.6706960201263428, "incorrect_loss_per_token": 6.350100517272949, "correct_loss_uncond": -13.85365343093872, "incorrect_loss_uncond": -9.147875785827637}, "model_output": [{"sum_logits": -7.355434417724609, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -23.914087295532227, "logits_per_token": -3.6777172088623047, "logits_per_char": -0.43267261280732994, "num_chars": 17}, {"sum_logits": -7.3413920402526855, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -21.195045471191406, "logits_per_token": -3.6706960201263428, "logits_per_char": -0.4894261360168457, "num_chars": 15}, {"sum_logits": -14.793102264404297, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -18.62615203857422, "logits_per_token": -7.396551132202148, "logits_per_char": -1.1379309434157152, "num_chars": 13}, {"sum_logits": -9.311849594116211, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -13.619281768798828, "logits_per_token": -9.311849594116211, "logits_per_char": -2.3279623985290527, "num_chars": 4}, {"sum_logits": -10.028568267822266, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -21.920936584472656, "logits_per_token": -5.014284133911133, "logits_per_char": -0.5014284133911133, "num_chars": 20}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 690, "native_id": "94f34cc1e6aa9eefe06563cce8225658", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.020397186279297, "incorrect_loss_raw": 9.71368944644928, "correct_loss_per_char": 0.813774824142456, "incorrect_loss_per_char": 1.1798968548726554, "correct_loss_per_token": 4.340132395426433, "incorrect_loss_per_token": 7.650396645069122, "correct_loss_uncond": -8.287490844726562, "incorrect_loss_uncond": -6.951210379600525}, "model_output": [{"sum_logits": -13.020397186279297, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.30788803100586, "logits_per_token": -4.340132395426433, "logits_per_char": -0.813774824142456, "num_chars": 16}, {"sum_logits": -7.655050754547119, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.821346282958984, "logits_per_token": -3.8275253772735596, "logits_per_char": -0.695913704958829, "num_chars": 11}, {"sum_logits": -11.798752784729004, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.66738510131836, "logits_per_token": -11.798752784729004, "logits_per_char": -2.3597505569458006, "num_chars": 5}, {"sum_logits": -8.85129165649414, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.78818130493164, "logits_per_token": -4.42564582824707, "logits_per_char": -0.4917384253607856, "num_chars": 18}, {"sum_logits": -10.549662590026855, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.382686614990234, "logits_per_token": -10.549662590026855, "logits_per_char": -1.1721847322252061, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 691, "native_id": "bb503ece4eac41dfe608a1dcb654e6bf", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.99736213684082, "incorrect_loss_raw": 13.161339521408081, "correct_loss_per_char": 0.6663735707600912, "incorrect_loss_per_char": 1.452952005336811, "correct_loss_per_token": 2.99868106842041, "incorrect_loss_per_token": 11.111348867416382, "correct_loss_uncond": -9.188750267028809, "incorrect_loss_uncond": -1.9521584510803223}, "model_output": [{"sum_logits": -15.600975036621094, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.428391456604004, "logits_per_token": -15.600975036621094, "logits_per_char": -2.228710719517299, "num_chars": 7}, {"sum_logits": -13.34751033782959, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.28304672241211, "logits_per_token": -13.34751033782959, "logits_per_char": -1.213410030711781, "num_chars": 11}, {"sum_logits": -5.99736213684082, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.186112403869629, "logits_per_token": -2.99868106842041, "logits_per_char": -0.6663735707600912, "num_chars": 9}, {"sum_logits": -7.296947479248047, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.699138641357422, "logits_per_token": -7.296947479248047, "logits_per_char": -0.7296947479248047, "num_chars": 10}, {"sum_logits": -16.399925231933594, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.043415069580078, "logits_per_token": -8.199962615966797, "logits_per_char": -1.6399925231933594, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 692, "native_id": "5502dc807d4921679ae1abd0dc9570d6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.90000057220459, "incorrect_loss_raw": 11.362154006958008, "correct_loss_per_char": 0.38333336512247723, "incorrect_loss_per_char": 1.2798019117779202, "correct_loss_per_token": 3.450000286102295, "incorrect_loss_per_token": 6.859203934669495, "correct_loss_uncond": -11.304156303405762, "incorrect_loss_uncond": -6.380256652832031}, "model_output": [{"sum_logits": -9.959185600280762, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.926565170288086, "logits_per_token": -4.979592800140381, "logits_per_char": -1.1065761778089735, "num_chars": 9}, {"sum_logits": -9.711395263671875, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.88397789001465, "logits_per_token": -4.8556976318359375, "logits_per_char": -1.0790439181857638, "num_chars": 9}, {"sum_logits": -16.35301971435547, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.13559913635254, "logits_per_token": -8.176509857177734, "logits_per_char": -1.3627516428629558, "num_chars": 12}, {"sum_logits": -6.90000057220459, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.20415687561035, "logits_per_token": -3.450000286102295, "logits_per_char": -0.38333336512247723, "num_chars": 18}, {"sum_logits": -9.425015449523926, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.023500442504883, "logits_per_token": -9.425015449523926, "logits_per_char": -1.5708359082539876, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 693, "native_id": "a7e3de0719fe30e7048f67426e29fdd1", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.295839309692383, "incorrect_loss_raw": 9.695797204971313, "correct_loss_per_char": 1.1619799137115479, "incorrect_loss_per_char": 1.3714831499826343, "correct_loss_per_token": 4.647919654846191, "incorrect_loss_per_token": 8.257099866867065, "correct_loss_uncond": -4.925232887268066, "incorrect_loss_uncond": -3.2323484420776367}, "model_output": [{"sum_logits": -8.823458671569824, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -12.753384590148926, "logits_per_token": -8.823458671569824, "logits_per_char": -1.4705764452616374, "num_chars": 6}, {"sum_logits": -10.368875503540039, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -12.349780082702637, "logits_per_token": -10.368875503540039, "logits_per_char": -0.9426250457763672, "num_chars": 11}, {"sum_logits": -11.509578704833984, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -12.367711067199707, "logits_per_token": -5.754789352416992, "logits_per_char": -1.9182631174723308, "num_chars": 6}, {"sum_logits": -9.295839309692383, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -14.22107219696045, "logits_per_token": -4.647919654846191, "logits_per_char": -1.1619799137115479, "num_chars": 8}, {"sum_logits": -8.081275939941406, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -14.241706848144531, "logits_per_token": -8.081275939941406, "logits_per_char": -1.154467991420201, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 694, "native_id": "d6107d454181b701ddcaa449a1e422a3", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.038888931274414, "incorrect_loss_raw": 8.119369983673096, "correct_loss_per_char": 0.926068379328801, "incorrect_loss_per_char": 0.824526546695201, "correct_loss_per_token": 6.019444465637207, "incorrect_loss_per_token": 4.900813817977905, "correct_loss_uncond": -8.636037826538086, "incorrect_loss_uncond": -9.717127084732056}, "model_output": [{"sum_logits": -12.038888931274414, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.6749267578125, "logits_per_token": -6.019444465637207, "logits_per_char": -0.926068379328801, "num_chars": 13}, {"sum_logits": -6.866097450256348, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.716712951660156, "logits_per_token": -3.433048725128174, "logits_per_char": -0.6241906772960316, "num_chars": 11}, {"sum_logits": -9.028209686279297, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -24.259963989257812, "logits_per_token": -4.514104843139648, "logits_per_char": -0.6944776681753305, "num_chars": 13}, {"sum_logits": -9.854142189025879, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.105693817138672, "logits_per_token": -4.9270710945129395, "logits_per_char": -1.2317677736282349, "num_chars": 8}, {"sum_logits": -6.729030609130859, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.263617515563965, "logits_per_token": -6.729030609130859, "logits_per_char": -0.7476700676812066, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 695, "native_id": "ab2eb930b29bb6d5e94a6cd3b04ba01e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.0111448764801025, "incorrect_loss_raw": 10.852248311042786, "correct_loss_per_char": 0.28730641092572895, "incorrect_loss_per_char": 1.3639793292784588, "correct_loss_per_token": 2.0111448764801025, "incorrect_loss_per_token": 7.716543078422546, "correct_loss_uncond": -10.553574323654175, "incorrect_loss_uncond": -4.785816073417664}, "model_output": [{"sum_logits": -14.530570030212402, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.234086990356445, "logits_per_token": -7.265285015106201, "logits_per_char": -1.3209609118374912, "num_chars": 11}, {"sum_logits": -3.852292537689209, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.306157112121582, "logits_per_token": -3.852292537689209, "logits_per_char": -0.5503275053841727, "num_chars": 7}, {"sum_logits": -10.555071830749512, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.940622329711914, "logits_per_token": -5.277535915374756, "logits_per_char": -1.172785758972168, "num_chars": 9}, {"sum_logits": -14.47105884552002, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -14.47105884552002, "logits_per_char": -2.4118431409200034, "num_chars": 6}, {"sum_logits": -2.0111448764801025, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": true, "sum_logits_uncond": -12.564719200134277, "logits_per_token": -2.0111448764801025, "logits_per_char": -0.28730641092572895, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 696, "native_id": "92869fc0be5dc45f407700692ffd80a0", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.6121312379837036, "incorrect_loss_raw": 13.627781391143799, "correct_loss_per_char": 0.12242624759674073, "incorrect_loss_per_char": 1.0697062214215598, "correct_loss_per_token": 0.6121312379837036, "incorrect_loss_per_token": 8.592811266581219, "correct_loss_uncond": -12.462590098381042, "incorrect_loss_uncond": -4.140306234359741}, "model_output": [{"sum_logits": -14.13937759399414, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.317642211914062, "logits_per_token": -4.713125864664714, "logits_per_char": -0.9426251729329427, "num_chars": 15}, {"sum_logits": -14.996390342712402, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -14.996390342712402, "logits_per_char": -1.2496991952260335, "num_chars": 12}, {"sum_logits": -21.427257537841797, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.537311553955078, "logits_per_token": -10.713628768920898, "logits_per_char": -1.4284838358561198, "num_chars": 15}, {"sum_logits": -3.9481000900268555, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.059861183166504, "logits_per_token": -3.9481000900268555, "logits_per_char": -0.6580166816711426, "num_chars": 6}, {"sum_logits": -0.6121312379837036, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": true, "sum_logits_uncond": -13.074721336364746, "logits_per_token": -0.6121312379837036, "logits_per_char": -0.12242624759674073, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 697, "native_id": "6a0177586d506cb7b741f4207b428e42", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.900039196014404, "incorrect_loss_raw": 11.414549112319946, "correct_loss_per_char": 0.34500195980072024, "incorrect_loss_per_char": 1.1982324063777925, "correct_loss_per_token": 3.450019598007202, "incorrect_loss_per_token": 7.399554173151651, "correct_loss_uncond": -14.043762683868408, "incorrect_loss_uncond": -5.090178489685059}, "model_output": [{"sum_logits": -7.740015983581543, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.709348678588867, "logits_per_token": -7.740015983581543, "logits_per_char": -0.9675019979476929, "num_chars": 8}, {"sum_logits": -6.900039196014404, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -20.943801879882812, "logits_per_token": -3.450019598007202, "logits_per_char": -0.34500195980072024, "num_chars": 20}, {"sum_logits": -11.800188064575195, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.45903778076172, "logits_per_token": -3.933396021525065, "logits_per_char": -0.786679204305013, "num_chars": 15}, {"sum_logits": -9.731616973876953, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.509245872497559, "logits_per_token": -9.731616973876953, "logits_per_char": -1.9463233947753906, "num_chars": 5}, {"sum_logits": -16.386375427246094, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -19.341278076171875, "logits_per_token": -8.193187713623047, "logits_per_char": -1.092425028483073, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 698, "native_id": "584188da9a429f1bc319abda5e5c7a76", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.132332801818848, "incorrect_loss_raw": 8.80657148361206, "correct_loss_per_char": 0.8760475431169782, "incorrect_loss_per_char": 1.5220319562488132, "correct_loss_per_token": 6.132332801818848, "incorrect_loss_per_token": 7.739040851593018, "correct_loss_uncond": -9.869425773620605, "incorrect_loss_uncond": -5.489461660385132}, "model_output": [{"sum_logits": -8.540245056152344, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.253192901611328, "logits_per_token": -4.270122528076172, "logits_per_char": -0.8540245056152344, "num_chars": 10}, {"sum_logits": -11.770750999450684, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.915053367614746, "logits_per_token": -11.770750999450684, "logits_per_char": -2.3541501998901366, "num_chars": 5}, {"sum_logits": -8.803430557250977, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.587641716003418, "logits_per_token": -8.803430557250977, "logits_per_char": -2.200857639312744, "num_chars": 4}, {"sum_logits": -6.111859321594238, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.428244590759277, "logits_per_token": -6.111859321594238, "logits_per_char": -0.6790954801771376, "num_chars": 9}, {"sum_logits": -6.132332801818848, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.001758575439453, "logits_per_token": -6.132332801818848, "logits_per_char": -0.8760475431169782, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 699, "native_id": "e480d4a672af0194e0a6ccdb8c37499b", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.8451056480407715, "incorrect_loss_raw": 11.019209384918213, "correct_loss_per_char": 0.7845105648040771, "incorrect_loss_per_char": 0.9946306149164835, "correct_loss_per_token": 3.9225528240203857, "incorrect_loss_per_token": 9.53385865688324, "correct_loss_uncond": -8.463690280914307, "incorrect_loss_uncond": -4.428221702575684}, "model_output": [{"sum_logits": -12.32772445678711, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.6738920211792, "logits_per_token": -12.32772445678711, "logits_per_char": -1.3697471618652344, "num_chars": 9}, {"sum_logits": -10.551377296447754, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.317315101623535, "logits_per_token": -10.551377296447754, "logits_per_char": -1.1723752551608615, "num_chars": 9}, {"sum_logits": -9.314929962158203, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.098270416259766, "logits_per_token": -9.314929962158203, "logits_per_char": -0.7762441635131836, "num_chars": 12}, {"sum_logits": -11.882805824279785, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.700246810913086, "logits_per_token": -5.941402912139893, "logits_per_char": -0.6601558791266547, "num_chars": 18}, {"sum_logits": -7.8451056480407715, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.308795928955078, "logits_per_token": -3.9225528240203857, "logits_per_char": -0.7845105648040771, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 700, "native_id": "275c859994f7d3acd3c8863be591ab2c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.153858184814453, "incorrect_loss_raw": 6.893291711807251, "correct_loss_per_char": 0.7041429372934195, "incorrect_loss_per_char": 0.8724299972019498, "correct_loss_per_token": 4.576929092407227, "incorrect_loss_per_token": 5.561504006385803, "correct_loss_uncond": -10.12347412109375, "incorrect_loss_uncond": -6.8327367305755615}, "model_output": [{"sum_logits": -9.118717193603516, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.96494197845459, "logits_per_token": -9.118717193603516, "logits_per_char": -1.3026738848005022, "num_chars": 7}, {"sum_logits": -4.5887908935546875, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.495595932006836, "logits_per_token": -4.5887908935546875, "logits_per_char": -0.7647984822591146, "num_chars": 6}, {"sum_logits": -3.2113571166992188, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.413294792175293, "logits_per_token": -3.2113571166992188, "logits_per_char": -0.35681745741102433, "num_chars": 9}, {"sum_logits": -10.654301643371582, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.03028106689453, "logits_per_token": -5.327150821685791, "logits_per_char": -1.0654301643371582, "num_chars": 10}, {"sum_logits": -9.153858184814453, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.277332305908203, "logits_per_token": -4.576929092407227, "logits_per_char": -0.7041429372934195, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 701, "native_id": "32758ab86d888be680845b0dfe7de35e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.419831275939941, "incorrect_loss_raw": 15.091386318206787, "correct_loss_per_char": 0.647679328918457, "incorrect_loss_per_char": 1.6181192977259857, "correct_loss_per_token": 4.209915637969971, "incorrect_loss_per_token": 7.5456931591033936, "correct_loss_uncond": -12.783904075622559, "incorrect_loss_uncond": -3.250730514526367}, "model_output": [{"sum_logits": -14.230291366577148, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -18.39535140991211, "logits_per_token": -7.115145683288574, "logits_per_char": -1.5811434851752386, "num_chars": 9}, {"sum_logits": -14.970457077026367, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -18.67377471923828, "logits_per_token": -7.485228538513184, "logits_per_char": -1.3609506433660334, "num_chars": 11}, {"sum_logits": -14.608661651611328, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.494699478149414, "logits_per_token": -7.304330825805664, "logits_per_char": -1.4608661651611328, "num_chars": 10}, {"sum_logits": -16.556135177612305, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -18.804641723632812, "logits_per_token": -8.278067588806152, "logits_per_char": -2.069516897201538, "num_chars": 8}, {"sum_logits": -8.419831275939941, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -21.2037353515625, "logits_per_token": -4.209915637969971, "logits_per_char": -0.647679328918457, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 702, "native_id": "69335eb9bc5b7b5df840c38a086bf8b2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.1492477655410767, "incorrect_loss_raw": 9.738489866256714, "correct_loss_per_char": 0.22984955310821534, "incorrect_loss_per_char": 1.2624837868743473, "correct_loss_per_token": 1.1492477655410767, "incorrect_loss_per_token": 9.738489866256714, "correct_loss_uncond": -10.248908400535583, "incorrect_loss_uncond": -3.306563377380371}, "model_output": [{"sum_logits": -12.46928596496582, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -14.184654235839844, "logits_per_token": -12.46928596496582, "logits_per_char": -1.0391071637471516, "num_chars": 12}, {"sum_logits": -6.555083274841309, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -12.215597152709961, "logits_per_token": -6.555083274841309, "logits_per_char": -0.8193854093551636, "num_chars": 8}, {"sum_logits": -10.991741180419922, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -12.987201690673828, "logits_per_token": -10.991741180419922, "logits_per_char": -2.1983482360839846, "num_chars": 5}, {"sum_logits": -1.1492477655410767, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": true, "sum_logits_uncond": -11.39815616607666, "logits_per_token": -1.1492477655410767, "logits_per_char": -0.22984955310821534, "num_chars": 5}, {"sum_logits": -8.937849044799805, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -12.792759895324707, "logits_per_token": -8.937849044799805, "logits_per_char": -0.9930943383110894, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 703, "native_id": "4396cb65629672723c7b184424e139bb", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.166855812072754, "incorrect_loss_raw": 9.155195593833923, "correct_loss_per_char": 0.29365980057489305, "incorrect_loss_per_char": 0.7990089742546408, "correct_loss_per_token": 2.0556186040242515, "incorrect_loss_per_token": 4.577597796916962, "correct_loss_uncond": -12.702858924865723, "incorrect_loss_uncond": -9.135403275489807}, "model_output": [{"sum_logits": -4.590762615203857, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.944881439208984, "logits_per_token": -2.2953813076019287, "logits_per_char": -0.3060508410135905, "num_chars": 15}, {"sum_logits": -6.166855812072754, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.869714736938477, "logits_per_token": -2.0556186040242515, "logits_per_char": -0.29365980057489305, "num_chars": 21}, {"sum_logits": -9.097105026245117, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.956235885620117, "logits_per_token": -4.548552513122559, "logits_per_char": -0.6997773097111628, "num_chars": 13}, {"sum_logits": -12.884180068969727, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.228553771972656, "logits_per_token": -6.442090034484863, "logits_per_char": -1.0736816724141438, "num_chars": 12}, {"sum_logits": -10.048734664916992, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.032724380493164, "logits_per_token": -5.024367332458496, "logits_per_char": -1.1165260738796658, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 704, "native_id": "2a58e81a9c4ce095d099e0d785fc2da4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.695746421813965, "incorrect_loss_raw": 9.158884525299072, "correct_loss_per_char": 2.339149284362793, "incorrect_loss_per_char": 1.048944052722719, "correct_loss_per_token": 11.695746421813965, "incorrect_loss_per_token": 6.988455772399902, "correct_loss_uncond": -2.4915761947631836, "incorrect_loss_uncond": -7.532508373260498}, "model_output": [{"sum_logits": -10.729984283447266, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.695972442626953, "logits_per_token": -10.729984283447266, "logits_per_char": -1.192220475938585, "num_chars": 9}, {"sum_logits": -8.542123794555664, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -13.978975296020508, "logits_per_token": -8.542123794555664, "logits_per_char": -0.9491248660617404, "num_chars": 9}, {"sum_logits": -8.351847648620605, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -20.22335433959961, "logits_per_token": -4.175923824310303, "logits_per_char": -0.9279830720689561, "num_chars": 9}, {"sum_logits": -9.011582374572754, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -17.86726951599121, "logits_per_token": -4.505791187286377, "logits_per_char": -1.1264477968215942, "num_chars": 8}, {"sum_logits": -11.695746421813965, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.187322616577148, "logits_per_token": -11.695746421813965, "logits_per_char": -2.339149284362793, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 705, "native_id": "07f108d5321a66f460685f5c7499ecb2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.800858497619629, "incorrect_loss_raw": 9.355696022510529, "correct_loss_per_char": 0.43338102764553493, "incorrect_loss_per_char": 1.5017369472554754, "correct_loss_per_token": 3.9004292488098145, "incorrect_loss_per_token": 9.355696022510529, "correct_loss_uncond": -12.235751152038574, "incorrect_loss_uncond": -5.1482409834861755}, "model_output": [{"sum_logits": -14.944951057434082, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.641561508178711, "logits_per_token": -14.944951057434082, "logits_per_char": -2.134993008204869, "num_chars": 7}, {"sum_logits": -10.24841594696045, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.709084510803223, "logits_per_token": -10.24841594696045, "logits_per_char": -1.7080693244934082, "num_chars": 6}, {"sum_logits": -7.800858497619629, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.036609649658203, "logits_per_token": -3.9004292488098145, "logits_per_char": -0.43338102764553493, "num_chars": 18}, {"sum_logits": -3.7599728107452393, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.836979866027832, "logits_per_token": -3.7599728107452393, "logits_per_char": -0.4699966013431549, "num_chars": 8}, {"sum_logits": -8.469444274902344, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -8.469444274902344, "logits_per_char": -1.6938888549804687, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 706, "native_id": "69bef3eb55463d040bdf98e2c97bfe1f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.2579803466796875, "incorrect_loss_raw": 12.606519937515259, "correct_loss_per_char": 0.21719868977864584, "incorrect_loss_per_char": 1.2428776483343105, "correct_loss_per_token": 1.6289901733398438, "incorrect_loss_per_token": 6.833340128262837, "correct_loss_uncond": -13.066143035888672, "incorrect_loss_uncond": -3.8119428157806396}, "model_output": [{"sum_logits": -3.2579803466796875, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.32412338256836, "logits_per_token": -1.6289901733398438, "logits_per_char": -0.21719868977864584, "num_chars": 15}, {"sum_logits": -8.276946067810059, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.698516845703125, "logits_per_token": -4.138473033905029, "logits_per_char": -0.7524496425281871, "num_chars": 11}, {"sum_logits": -15.62803840637207, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.23089599609375, "logits_per_token": -7.814019203186035, "logits_per_char": -1.736448711819119, "num_chars": 9}, {"sum_logits": -16.71034049987793, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -19.71957015991211, "logits_per_token": -5.57011349995931, "logits_per_char": -1.3925283749898274, "num_chars": 12}, {"sum_logits": -9.810754776000977, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.02486801147461, "logits_per_token": -9.810754776000977, "logits_per_char": -1.0900838640001085, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 707, "native_id": "912676495cceefadccbbf8c604486f97", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.013666152954102, "incorrect_loss_raw": 14.818054914474487, "correct_loss_per_char": 0.5724047252110073, "incorrect_loss_per_char": 1.0504964564448698, "correct_loss_per_token": 4.006833076477051, "incorrect_loss_per_token": 5.896745324134827, "correct_loss_uncond": -7.929898262023926, "incorrect_loss_uncond": -3.524338483810425}, "model_output": [{"sum_logits": -8.013666152954102, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.943564414978027, "logits_per_token": -4.006833076477051, "logits_per_char": -0.5724047252110073, "num_chars": 14}, {"sum_logits": -15.600696563720703, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.107362747192383, "logits_per_token": -7.800348281860352, "logits_per_char": -1.1143354688371931, "num_chars": 14}, {"sum_logits": -14.843561172485352, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.096107482910156, "logits_per_token": -7.421780586242676, "logits_per_char": -1.4843561172485351, "num_chars": 10}, {"sum_logits": -13.894343376159668, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -22.853904724121094, "logits_per_token": -4.631447792053223, "logits_per_char": -0.8173143162446863, "num_chars": 17}, {"sum_logits": -14.933618545532227, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.312198638916016, "logits_per_token": -3.7334046363830566, "logits_per_char": -0.7859799234490645, "num_chars": 19}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 708, "native_id": "bdf92566f14599f1606109677206001f", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.16976547241211, "incorrect_loss_raw": 7.1638206243515015, "correct_loss_per_char": 0.7641471227010092, "incorrect_loss_per_char": 0.7939519320215498, "correct_loss_per_token": 4.584882736206055, "incorrect_loss_per_token": 6.030021548271179, "correct_loss_uncond": -9.925939559936523, "incorrect_loss_uncond": -7.918059706687927}, "model_output": [{"sum_logits": -9.16976547241211, "num_tokens": 2, "num_tokens_all": 162, "is_greedy": false, "sum_logits_uncond": -19.095705032348633, "logits_per_token": -4.584882736206055, "logits_per_char": -0.7641471227010092, "num_chars": 12}, {"sum_logits": -9.070392608642578, "num_tokens": 2, "num_tokens_all": 162, "is_greedy": false, "sum_logits_uncond": -19.26300621032715, "logits_per_token": -4.535196304321289, "logits_per_char": -0.6046928405761719, "num_chars": 15}, {"sum_logits": -5.290285587310791, "num_tokens": 1, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -14.412413597106934, "logits_per_token": -5.290285587310791, "logits_per_char": -0.5290285587310791, "num_chars": 10}, {"sum_logits": -6.637447357177734, "num_tokens": 1, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -13.181896209716797, "logits_per_token": -6.637447357177734, "logits_per_char": -0.9482067653111049, "num_chars": 7}, {"sum_logits": -7.657156944274902, "num_tokens": 1, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -13.470205307006836, "logits_per_token": -7.657156944274902, "logits_per_char": -1.093879563467843, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 709, "native_id": "0df042743128b57e874bd5d79b7aae7a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.335039138793945, "incorrect_loss_raw": 11.110023736953735, "correct_loss_per_char": 0.4816710154215495, "incorrect_loss_per_char": 1.135959893854615, "correct_loss_per_token": 2.1675195693969727, "incorrect_loss_per_token": 5.37394920984904, "correct_loss_uncond": -11.59152603149414, "incorrect_loss_uncond": -5.024206638336182}, "model_output": [{"sum_logits": -10.723580360412598, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.578357696533203, "logits_per_token": -5.361790180206299, "logits_per_char": -0.6307988447301528, "num_chars": 17}, {"sum_logits": -4.335039138793945, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.926565170288086, "logits_per_token": -2.1675195693969727, "logits_per_char": -0.4816710154215495, "num_chars": 9}, {"sum_logits": -8.962512016296387, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.079458236694336, "logits_per_token": -8.962512016296387, "logits_per_char": -1.7925024032592773, "num_chars": 5}, {"sum_logits": -11.795928001403809, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.290799140930176, "logits_per_token": -3.931976000467936, "logits_per_char": -1.3106586668226454, "num_chars": 9}, {"sum_logits": -12.958074569702148, "num_tokens": 4, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.588306427001953, "logits_per_token": -3.239518642425537, "logits_per_char": -0.8098796606063843, "num_chars": 16}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 710, "native_id": "866ef7266d34c11e5a1b3df49fab96a4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.456531524658203, "incorrect_loss_raw": 8.900931119918823, "correct_loss_per_char": 0.8285035027398003, "incorrect_loss_per_char": 0.7357344366801091, "correct_loss_per_token": 3.7282657623291016, "incorrect_loss_per_token": 4.761718253294627, "correct_loss_uncond": -8.942638397216797, "incorrect_loss_uncond": -9.40883755683899}, "model_output": [{"sum_logits": -6.0018134117126465, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -20.719802856445312, "logits_per_token": -3.0009067058563232, "logits_per_char": -0.5001511176427206, "num_chars": 12}, {"sum_logits": -16.479860305786133, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -19.798038482666016, "logits_per_token": -8.239930152893066, "logits_per_char": -1.2676815619835486, "num_chars": 13}, {"sum_logits": -5.14802885055542, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.16602897644043, "logits_per_token": -5.14802885055542, "logits_per_char": -0.6435036063194275, "num_chars": 8}, {"sum_logits": -7.456531524658203, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -16.399169921875, "logits_per_token": -3.7282657623291016, "logits_per_char": -0.8285035027398003, "num_chars": 9}, {"sum_logits": -7.974021911621094, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.555204391479492, "logits_per_token": -2.6580073038736978, "logits_per_char": -0.5316014607747396, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 711, "native_id": "67ffcb4c3f2c6a1155e356f8a15ed250", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 3.3796114921569824, "incorrect_loss_raw": 6.265174865722656, "correct_loss_per_char": 0.8449028730392456, "incorrect_loss_per_char": 0.7918040990829468, "correct_loss_per_token": 3.3796114921569824, "incorrect_loss_per_token": 4.547161340713501, "correct_loss_uncond": -10.945044994354248, "incorrect_loss_uncond": -7.858124017715454}, "model_output": [{"sum_logits": -3.3796114921569824, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.32465648651123, "logits_per_token": -3.3796114921569824, "logits_per_char": -0.8449028730392456, "num_chars": 4}, {"sum_logits": -13.744108200073242, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.417407989501953, "logits_per_token": -6.872054100036621, "logits_per_char": -0.9162738800048829, "num_chars": 15}, {"sum_logits": -2.844546318054199, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": true, "sum_logits_uncond": -12.424250602722168, "logits_per_token": -2.844546318054199, "logits_per_char": -0.7111365795135498, "num_chars": 4}, {"sum_logits": -3.8547019958496094, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.776235580444336, "logits_per_token": -3.8547019958496094, "logits_per_char": -0.3854701995849609, "num_chars": 10}, {"sum_logits": -4.617342948913574, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -9.875301361083984, "logits_per_token": -4.617342948913574, "logits_per_char": -1.1543357372283936, "num_chars": 4}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 712, "native_id": "87a133afae5d9d29d634f3384f28ef24", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.207969665527344, "incorrect_loss_raw": 10.227715134620667, "correct_loss_per_char": 0.700498104095459, "incorrect_loss_per_char": 1.0615167082130135, "correct_loss_per_token": 5.603984832763672, "incorrect_loss_per_token": 5.572796901067099, "correct_loss_uncond": -6.268362045288086, "incorrect_loss_uncond": -5.817891955375671}, "model_output": [{"sum_logits": -10.65811538696289, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.379087448120117, "logits_per_token": -3.5527051289876304, "logits_per_char": -0.9689195806329901, "num_chars": 11}, {"sum_logits": -7.056206703186035, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.99239158630371, "logits_per_token": -3.5281033515930176, "logits_per_char": -0.4704137802124023, "num_chars": 15}, {"sum_logits": -15.972318649291992, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.90981101989746, "logits_per_token": -7.986159324645996, "logits_per_char": -1.7747020721435547, "num_chars": 9}, {"sum_logits": -7.224219799041748, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.901138305664062, "logits_per_token": -7.224219799041748, "logits_per_char": -1.0320313998631068, "num_chars": 7}, {"sum_logits": -11.207969665527344, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.47633171081543, "logits_per_token": -5.603984832763672, "logits_per_char": -0.700498104095459, "num_chars": 16}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 713, "native_id": "4779be55f47a301debfc472e4fc2c7b6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.5138487815856934, "incorrect_loss_raw": 11.238159656524658, "correct_loss_per_char": 0.1376226165077903, "incorrect_loss_per_char": 1.0553522380915554, "correct_loss_per_token": 1.5138487815856934, "incorrect_loss_per_token": 7.790512561798096, "correct_loss_uncond": -12.559381008148193, "incorrect_loss_uncond": -4.35912561416626}, "model_output": [{"sum_logits": -18.387451171875, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.075702667236328, "logits_per_token": -4.59686279296875, "logits_per_char": -1.225830078125, "num_chars": 15}, {"sum_logits": -1.5138487815856934, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.073229789733887, "logits_per_token": -1.5138487815856934, "logits_per_char": -0.1376226165077903, "num_chars": 11}, {"sum_logits": -9.120777130126953, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.75567626953125, "logits_per_token": -9.120777130126953, "logits_per_char": -1.1400971412658691, "num_chars": 8}, {"sum_logits": -9.53537368774414, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.359978675842285, "logits_per_token": -9.53537368774414, "logits_per_char": -0.8668521534312855, "num_chars": 11}, {"sum_logits": -7.909036636352539, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.197783470153809, "logits_per_token": -7.909036636352539, "logits_per_char": -0.9886295795440674, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 714, "native_id": "7a28d31e66d870370642de3be47b9ef9", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 15.245220184326172, "incorrect_loss_raw": 11.54922866821289, "correct_loss_per_char": 0.8967776579015395, "incorrect_loss_per_char": 1.0544768698486215, "correct_loss_per_token": 7.622610092163086, "incorrect_loss_per_token": 5.774614334106445, "correct_loss_uncond": -8.195060729980469, "incorrect_loss_uncond": -6.816522598266602}, "model_output": [{"sum_logits": -9.842061996459961, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.093673706054688, "logits_per_token": -4.9210309982299805, "logits_per_char": -1.0935624440511067, "num_chars": 9}, {"sum_logits": -12.578638076782227, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.06609344482422, "logits_per_token": -6.289319038391113, "logits_per_char": -0.9675875443678635, "num_chars": 13}, {"sum_logits": -8.557853698730469, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.98387908935547, "logits_per_token": -4.278926849365234, "logits_per_char": -1.0697317123413086, "num_chars": 8}, {"sum_logits": -15.245220184326172, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -23.44028091430664, "logits_per_token": -7.622610092163086, "logits_per_char": -0.8967776579015395, "num_chars": 17}, {"sum_logits": -15.218360900878906, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.319358825683594, "logits_per_token": -7.609180450439453, "logits_per_char": -1.0870257786342077, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 715, "native_id": "042898e0c71adac5d123aaa6221c9754", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.150141716003418, "incorrect_loss_raw": 11.771057367324829, "correct_loss_per_char": 0.5107244082859584, "incorrect_loss_per_char": 1.1251486483074369, "correct_loss_per_token": 1.7875354290008545, "incorrect_loss_per_token": 6.687111934026082, "correct_loss_uncond": -10.58215618133545, "incorrect_loss_uncond": -7.6579766273498535}, "model_output": [{"sum_logits": -9.620362281799316, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.316246032714844, "logits_per_token": -4.810181140899658, "logits_per_char": -0.9620362281799316, "num_chars": 10}, {"sum_logits": -12.669743537902832, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -26.6953067779541, "logits_per_token": -4.22324784596761, "logits_per_char": -0.6033211208525158, "num_chars": 21}, {"sum_logits": -14.158209800720215, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.645164489746094, "logits_per_token": -7.079104900360107, "logits_per_char": -1.4158209800720214, "num_chars": 10}, {"sum_logits": -7.150141716003418, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.732297897338867, "logits_per_token": -1.7875354290008545, "logits_per_char": -0.5107244082859584, "num_chars": 14}, {"sum_logits": -10.635913848876953, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.059418678283691, "logits_per_token": -10.635913848876953, "logits_per_char": -1.519416264125279, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 716, "native_id": "93bbaccb1c46d22124a846b8514de5bc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.796298027038574, "incorrect_loss_raw": 10.716413974761963, "correct_loss_per_char": 0.4586057662963867, "incorrect_loss_per_char": 1.0730475523255087, "correct_loss_per_token": 2.598766009012858, "incorrect_loss_per_token": 4.432625810305277, "correct_loss_uncond": -10.718165397644043, "incorrect_loss_uncond": -7.185460090637207}, "model_output": [{"sum_logits": -7.796298027038574, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.514463424682617, "logits_per_token": -2.598766009012858, "logits_per_char": -0.4586057662963867, "num_chars": 17}, {"sum_logits": -11.391122817993164, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.74608039855957, "logits_per_token": -5.695561408996582, "logits_per_char": -2.278224563598633, "num_chars": 5}, {"sum_logits": -11.05143928527832, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.366369247436523, "logits_per_token": -3.6838130950927734, "logits_per_char": -0.6139688491821289, "num_chars": 18}, {"sum_logits": -11.162508964538574, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.52379035949707, "logits_per_token": -3.720836321512858, "logits_per_char": -0.5581254482269287, "num_chars": 20}, {"sum_logits": -9.260584831237793, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.971256256103516, "logits_per_token": -4.6302924156188965, "logits_per_char": -0.8418713482943448, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 717, "native_id": "ef889edd1b57d8d0c81e43f73c98c8e9", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.2118663787841797, "incorrect_loss_raw": 10.682107329368591, "correct_loss_per_char": 0.40148329734802246, "incorrect_loss_per_char": 0.9201657427681817, "correct_loss_per_token": 3.2118663787841797, "incorrect_loss_per_token": 5.741278608640036, "correct_loss_uncond": -10.36497974395752, "incorrect_loss_uncond": -7.731822371482849}, "model_output": [{"sum_logits": -8.31422233581543, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.922504425048828, "logits_per_token": -2.77140744527181, "logits_per_char": -0.39591534932454425, "num_chars": 21}, {"sum_logits": -5.973206996917725, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.986479759216309, "logits_per_token": -5.973206996917725, "logits_per_char": -0.6636896663241916, "num_chars": 9}, {"sum_logits": -3.2118663787841797, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -3.2118663787841797, "logits_per_char": -0.40148329734802246, "num_chars": 8}, {"sum_logits": -13.382522583007812, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.790130615234375, "logits_per_token": -6.691261291503906, "logits_per_char": -1.1152102152506511, "num_chars": 12}, {"sum_logits": -15.058477401733398, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.95660400390625, "logits_per_token": -7.529238700866699, "logits_per_char": -1.5058477401733399, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 718, "native_id": "f4bb8ecacb9ce89e040f5f76bc79afb3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.546175003051758, "incorrect_loss_raw": 15.833347797393799, "correct_loss_per_char": 0.47163593769073486, "incorrect_loss_per_char": 1.016865631748951, "correct_loss_per_token": 3.773087501525879, "incorrect_loss_per_token": 5.423043429851532, "correct_loss_uncond": -15.284826278686523, "incorrect_loss_uncond": -3.500579833984375}, "model_output": [{"sum_logits": -15.389360427856445, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.227394104003906, "logits_per_token": -3.8473401069641113, "logits_per_char": -0.6412233511606852, "num_chars": 24}, {"sum_logits": -7.546175003051758, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.83100128173828, "logits_per_token": -3.773087501525879, "logits_per_char": -0.47163593769073486, "num_chars": 16}, {"sum_logits": -19.087371826171875, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.437105178833008, "logits_per_token": -9.543685913085938, "logits_per_char": -1.5906143188476562, "num_chars": 12}, {"sum_logits": -15.81286334991455, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.955930709838867, "logits_per_token": -3.9532158374786377, "logits_per_char": -0.8322559657849764, "num_chars": 19}, {"sum_logits": -13.043795585632324, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.715280532836914, "logits_per_token": -4.347931861877441, "logits_per_char": -1.0033688912024865, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 719, "native_id": "ec2e18fd8c18a4ebe5a091e0c8b94462", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.487356185913086, "incorrect_loss_raw": 16.839667320251465, "correct_loss_per_char": 1.3487356185913086, "incorrect_loss_per_char": 1.4247084801847283, "correct_loss_per_token": 6.743678092956543, "incorrect_loss_per_token": 7.796967506408691, "correct_loss_uncond": -4.514936447143555, "incorrect_loss_uncond": -3.942460060119629}, "model_output": [{"sum_logits": -17.04364776611328, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.50368309020996, "logits_per_token": -8.52182388305664, "logits_per_char": -1.5494225241921165, "num_chars": 11}, {"sum_logits": -13.487356185913086, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.00229263305664, "logits_per_token": -6.743678092956543, "logits_per_char": -1.3487356185913086, "num_chars": 10}, {"sum_logits": -14.948787689208984, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.358720779418945, "logits_per_token": -4.982929229736328, "logits_per_char": -0.9342992305755615, "num_chars": 16}, {"sum_logits": -18.273303985595703, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -23.081195831298828, "logits_per_token": -9.136651992797852, "logits_per_char": -1.661209453235973, "num_chars": 11}, {"sum_logits": -17.09292984008789, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.18490982055664, "logits_per_token": -8.546464920043945, "logits_per_char": -1.5539027127352627, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 720, "native_id": "07b51b231a9d6a143d8a73e69121e1b1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.129706859588623, "incorrect_loss_raw": 11.368908166885376, "correct_loss_per_char": 0.5941422382990519, "incorrect_loss_per_char": 0.8734783147062574, "correct_loss_per_token": 3.5648534297943115, "incorrect_loss_per_token": 5.98264753818512, "correct_loss_uncond": -11.15564775466919, "incorrect_loss_uncond": -4.650629758834839}, "model_output": [{"sum_logits": -12.6079683303833, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.65354347229004, "logits_per_token": -6.30398416519165, "logits_per_char": -0.6003794443039667, "num_chars": 21}, {"sum_logits": -7.129706859588623, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.285354614257812, "logits_per_token": -3.5648534297943115, "logits_per_char": -0.5941422382990519, "num_chars": 12}, {"sum_logits": -13.965116500854492, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.30598258972168, "logits_per_token": -4.655038833618164, "logits_per_char": -0.8728197813034058, "num_chars": 16}, {"sum_logits": -7.040586471557617, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.495595932006836, "logits_per_token": -7.040586471557617, "logits_per_char": -1.1734310785929363, "num_chars": 6}, {"sum_logits": -11.861961364746094, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.623029708862305, "logits_per_token": -5.930980682373047, "logits_per_char": -0.847282954624721, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 721, "native_id": "e1744fc698cffb574e5cf4b29a81ce76", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.653838157653809, "incorrect_loss_raw": 7.835948944091797, "correct_loss_per_char": 0.35336488485336304, "incorrect_loss_per_char": 0.8412362535794575, "correct_loss_per_token": 2.8269190788269043, "incorrect_loss_per_token": 5.6344945430755615, "correct_loss_uncond": -12.89394474029541, "incorrect_loss_uncond": -7.400284051895142}, "model_output": [{"sum_logits": -5.653838157653809, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.54778289794922, "logits_per_token": -2.8269190788269043, "logits_per_char": -0.35336488485336304, "num_chars": 16}, {"sum_logits": -8.805817604064941, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.801706314086914, "logits_per_token": -4.402908802032471, "logits_per_char": -0.6289869717189244, "num_chars": 14}, {"sum_logits": -6.0998215675354, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.77853012084961, "logits_per_token": -6.0998215675354, "logits_per_char": -1.0166369279225667, "num_chars": 6}, {"sum_logits": -7.632339000701904, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -7.632339000701904, "logits_per_char": -1.090334142957415, "num_chars": 7}, {"sum_logits": -8.805817604064941, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.801706314086914, "logits_per_token": -4.402908802032471, "logits_per_char": -0.6289869717189244, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 722, "native_id": "27604394ccee83e089f9ffae1883cf07", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.168046951293945, "incorrect_loss_raw": 11.187929630279541, "correct_loss_per_char": 0.6853385501437717, "incorrect_loss_per_char": 1.3076135823220918, "correct_loss_per_token": 3.0840234756469727, "incorrect_loss_per_token": 8.208338260650635, "correct_loss_uncond": -10.365583419799805, "incorrect_loss_uncond": -5.937240123748779}, "model_output": [{"sum_logits": -6.168046951293945, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.53363037109375, "logits_per_token": -3.0840234756469727, "logits_per_char": -0.6853385501437717, "num_chars": 9}, {"sum_logits": -10.316617965698242, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.018508911132812, "logits_per_token": -5.158308982849121, "logits_per_char": -0.937874360518022, "num_chars": 11}, {"sum_logits": -10.136459350585938, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.755424499511719, "logits_per_token": -10.136459350585938, "logits_per_char": -1.2670574188232422, "num_chars": 8}, {"sum_logits": -10.778528213500977, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.893854141235352, "logits_per_token": -10.778528213500977, "logits_per_char": -1.7964213689168294, "num_chars": 6}, {"sum_logits": -13.520112991333008, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.8328914642334, "logits_per_token": -6.760056495666504, "logits_per_char": -1.2291011810302734, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 723, "native_id": "1272e693cf9152e7ac71095c643676b5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.4550328254699707, "incorrect_loss_raw": 7.0101436376571655, "correct_loss_per_char": 0.43187910318374634, "incorrect_loss_per_char": 0.8397335758576028, "correct_loss_per_token": 3.4550328254699707, "incorrect_loss_per_token": 6.360225081443787, "correct_loss_uncond": -11.381947040557861, "incorrect_loss_uncond": -7.910786271095276}, "model_output": [{"sum_logits": -3.4550328254699707, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -14.836979866027832, "logits_per_token": -3.4550328254699707, "logits_per_char": -0.43187910318374634, "num_chars": 8}, {"sum_logits": -11.212568283081055, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -16.054000854492188, "logits_per_token": -11.212568283081055, "logits_per_char": -1.6017954690115792, "num_chars": 7}, {"sum_logits": -5.199348449707031, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -18.424861907958984, "logits_per_token": -2.5996742248535156, "logits_per_char": -0.3999498807466947, "num_chars": 13}, {"sum_logits": -7.0944504737854, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -12.641867637634277, "logits_per_token": -7.0944504737854, "logits_per_char": -0.7094450473785401, "num_chars": 10}, {"sum_logits": -4.534207344055176, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -4.534207344055176, "logits_per_char": -0.6477439062935966, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 724, "native_id": "7bff23f6c12e9136f0961514bebb8cd3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.272849082946777, "incorrect_loss_raw": 9.544679880142212, "correct_loss_per_char": 0.43940409024556476, "incorrect_loss_per_char": 1.048367977142334, "correct_loss_per_token": 1.757616360982259, "incorrect_loss_per_token": 5.964760661125183, "correct_loss_uncond": -11.868752479553223, "incorrect_loss_uncond": -6.807691335678101}, "model_output": [{"sum_logits": -3.1870241165161133, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.495595932006836, "logits_per_token": -3.1870241165161133, "logits_per_char": -0.5311706860860189, "num_chars": 6}, {"sum_logits": -6.352341651916504, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -10.74312973022461, "logits_per_token": -6.352341651916504, "logits_per_char": -1.0587236086527507, "num_chars": 6}, {"sum_logits": -13.30904769897461, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.208065032958984, "logits_per_token": -6.654523849487305, "logits_per_char": -1.20991342717951, "num_chars": 11}, {"sum_logits": -5.272849082946777, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.1416015625, "logits_per_token": -1.757616360982259, "logits_per_char": -0.43940409024556476, "num_chars": 12}, {"sum_logits": -15.330306053161621, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -22.96269416809082, "logits_per_token": -7.6651530265808105, "logits_per_char": -1.3936641866510564, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 725, "native_id": "20ae70b9b157b298569cd761787833e7", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.7055017948150635, "incorrect_loss_raw": 8.04314512014389, "correct_loss_per_char": 0.37055017948150637, "incorrect_loss_per_char": 1.2146486749251681, "correct_loss_per_token": 3.7055017948150635, "incorrect_loss_per_token": 5.648091644048691, "correct_loss_uncond": -10.750462293624878, "incorrect_loss_uncond": -7.470198929309845}, "model_output": [{"sum_logits": -8.730091094970703, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.73328971862793, "logits_per_token": -8.730091094970703, "logits_per_char": -1.7460182189941407, "num_chars": 5}, {"sum_logits": -8.099422454833984, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.587641716003418, "logits_per_token": -8.099422454833984, "logits_per_char": -2.024855613708496, "num_chars": 4}, {"sum_logits": -3.890984296798706, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.658267974853516, "logits_per_token": -1.945492148399353, "logits_per_char": -0.32424869139989215, "num_chars": 12}, {"sum_logits": -11.452082633972168, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.074176788330078, "logits_per_token": -3.8173608779907227, "logits_per_char": -0.7634721755981445, "num_chars": 15}, {"sum_logits": -3.7055017948150635, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.455964088439941, "logits_per_token": -3.7055017948150635, "logits_per_char": -0.37055017948150637, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 726, "native_id": "bdd29d7c12e3d795b78ffc048631e7e7", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 2.485065460205078, "incorrect_loss_raw": 10.36760687828064, "correct_loss_per_char": 0.4970130920410156, "incorrect_loss_per_char": 1.0584665173558867, "correct_loss_per_token": 2.485065460205078, "incorrect_loss_per_token": 5.484369119008382, "correct_loss_uncond": -11.903084754943848, "incorrect_loss_uncond": -6.968796253204346}, "model_output": [{"sum_logits": -12.733484268188477, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.835020065307617, "logits_per_token": -4.244494756062825, "logits_per_char": -1.4148315853542752, "num_chars": 9}, {"sum_logits": -10.494232177734375, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.06551170349121, "logits_per_token": -5.2471160888671875, "logits_per_char": -0.8072486290564904, "num_chars": 13}, {"sum_logits": -6.649020195007324, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.641695976257324, "logits_per_token": -6.649020195007324, "logits_per_char": -1.329804039001465, "num_chars": 5}, {"sum_logits": -2.485065460205078, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.388150215148926, "logits_per_token": -2.485065460205078, "logits_per_char": -0.4970130920410156, "num_chars": 5}, {"sum_logits": -11.593690872192383, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -23.80338478088379, "logits_per_token": -5.796845436096191, "logits_per_char": -0.6819818160113167, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 727, "native_id": "cc1a547bdfdcc95e4d632453af14bc96", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.083414077758789, "incorrect_loss_raw": 11.253262042999268, "correct_loss_per_char": 1.5139023462931316, "incorrect_loss_per_char": 1.7099071502685548, "correct_loss_per_token": 9.083414077758789, "incorrect_loss_per_token": 11.253262042999268, "correct_loss_uncond": -2.806239128112793, "incorrect_loss_uncond": -3.2738380432128906}, "model_output": [{"sum_logits": -11.775703430175781, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -14.307762145996094, "logits_per_token": -11.775703430175781, "logits_per_char": -1.4719629287719727, "num_chars": 8}, {"sum_logits": -11.565319061279297, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -14.986479759216309, "logits_per_token": -11.565319061279297, "logits_per_char": -1.2850354512532551, "num_chars": 9}, {"sum_logits": -9.083414077758789, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -9.083414077758789, "logits_per_char": -1.5139023462931316, "num_chars": 6}, {"sum_logits": -7.553247451782227, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -15.672751426696777, "logits_per_token": -7.553247451782227, "logits_per_char": -1.2588745752970378, "num_chars": 6}, {"sum_logits": -14.118778228759766, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -13.141407012939453, "logits_per_token": -14.118778228759766, "logits_per_char": -2.823755645751953, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 728, "native_id": "896b25dc41f84357add1c798d4a96cd8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.0519819259643555, "incorrect_loss_raw": 8.344554245471954, "correct_loss_per_char": 0.841996987660726, "incorrect_loss_per_char": 0.8363516955803603, "correct_loss_per_token": 5.0519819259643555, "incorrect_loss_per_token": 4.450174470742544, "correct_loss_uncond": -4.837010383605957, "incorrect_loss_uncond": -8.852732121944427}, "model_output": [{"sum_logits": -3.2015836238861084, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -3.2015836238861084, "logits_per_char": -0.5335972706476847, "num_chars": 6}, {"sum_logits": -11.76392650604248, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.29747772216797, "logits_per_token": -3.9213088353474936, "logits_per_char": -0.7842617670694987, "num_chars": 15}, {"sum_logits": -5.0519819259643555, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -5.0519819259643555, "logits_per_char": -0.841996987660726, "num_chars": 6}, {"sum_logits": -11.602352142333984, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.130722045898438, "logits_per_token": -3.867450714111328, "logits_per_char": -0.8924886263333834, "num_chars": 13}, {"sum_logits": -6.810354709625244, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.289554595947266, "logits_per_token": -6.810354709625244, "logits_per_char": -1.135059118270874, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 729, "native_id": "1ca3cd9475d7e9da2ddb74911ee2bb68", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 12.527987480163574, "incorrect_loss_raw": 14.753291368484497, "correct_loss_per_char": 1.2527987480163574, "incorrect_loss_per_char": 1.398437843952344, "correct_loss_per_token": 6.263993740081787, "incorrect_loss_per_token": 11.095255017280579, "correct_loss_uncond": -3.031765937805176, "incorrect_loss_uncond": -0.5741305351257324}, "model_output": [{"sum_logits": -13.853032112121582, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.561187744140625, "logits_per_token": -13.853032112121582, "logits_per_char": -1.1544193426767986, "num_chars": 12}, {"sum_logits": -19.5095272064209, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.239635467529297, "logits_per_token": -4.877381801605225, "logits_per_char": -1.7735933824019, "num_chars": 11}, {"sum_logits": -11.669734954833984, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.043295860290527, "logits_per_token": -11.669734954833984, "logits_per_char": -1.667104993547712, "num_chars": 7}, {"sum_logits": -13.980871200561523, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.465568542480469, "logits_per_token": -13.980871200561523, "logits_per_char": -0.9986336571829659, "num_chars": 14}, {"sum_logits": -12.527987480163574, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.55975341796875, "logits_per_token": -6.263993740081787, "logits_per_char": -1.2527987480163574, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 730, "native_id": "129ec46cc2541b73198d774ee632c8d7", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.340161323547363, "incorrect_loss_raw": 7.211829781532288, "correct_loss_per_char": 0.6675201654434204, "incorrect_loss_per_char": 1.040249716667902, "correct_loss_per_token": 2.6700806617736816, "incorrect_loss_per_token": 4.905820965766907, "correct_loss_uncond": -9.03069019317627, "incorrect_loss_uncond": -8.02268922328949}, "model_output": [{"sum_logits": -11.018566131591797, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.102439880371094, "logits_per_token": -5.509283065795898, "logits_per_char": -1.5740808759416853, "num_chars": 7}, {"sum_logits": -7.42950439453125, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.75267791748047, "logits_per_token": -3.714752197265625, "logits_per_char": -1.0613577706473214, "num_chars": 7}, {"sum_logits": -5.340161323547363, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.370851516723633, "logits_per_token": -2.6700806617736816, "logits_per_char": -0.6675201654434204, "num_chars": 8}, {"sum_logits": -4.163491725921631, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -11.66738510131836, "logits_per_token": -4.163491725921631, "logits_per_char": -0.8326983451843262, "num_chars": 5}, {"sum_logits": -6.235756874084473, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.415573120117188, "logits_per_token": -6.235756874084473, "logits_per_char": -0.6928618748982748, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 731, "native_id": "0e5c7c0cec5b693e52f74f5f879d84fb", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.94983434677124, "incorrect_loss_raw": 16.33995223045349, "correct_loss_per_char": 0.4968646466732025, "incorrect_loss_per_char": 1.0203978930688886, "correct_loss_per_token": 3.97491717338562, "incorrect_loss_per_token": 7.0712826490402225, "correct_loss_uncond": -16.90532350540161, "incorrect_loss_uncond": -4.472728490829468}, "model_output": [{"sum_logits": -16.413776397705078, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.268056869506836, "logits_per_token": -8.206888198852539, "logits_per_char": -1.262598184438852, "num_chars": 13}, {"sum_logits": -7.94983434677124, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -24.85515785217285, "logits_per_token": -3.97491717338562, "logits_per_char": -0.4968646466732025, "num_chars": 16}, {"sum_logits": -18.67080307006836, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -21.566604614257812, "logits_per_token": -9.33540153503418, "logits_per_char": -1.098282533533433, "num_chars": 17}, {"sum_logits": -15.625983238220215, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -23.368438720703125, "logits_per_token": -7.812991619110107, "logits_per_char": -0.7440944399152484, "num_chars": 21}, {"sum_logits": -14.649246215820312, "num_tokens": 5, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -20.047622680664062, "logits_per_token": -2.9298492431640626, "logits_per_char": -0.9766164143880208, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 732, "native_id": "af035b75b6f7a1927b1648963f281c5e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.478407382965088, "incorrect_loss_raw": 10.678839564323425, "correct_loss_per_char": 0.7464012304941813, "incorrect_loss_per_char": 1.1501729759104522, "correct_loss_per_token": 4.478407382965088, "incorrect_loss_per_token": 8.354231715202332, "correct_loss_uncond": -7.411245822906494, "incorrect_loss_uncond": -4.081839442253113}, "model_output": [{"sum_logits": -8.857738494873047, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.434986114501953, "logits_per_token": -8.857738494873047, "logits_per_char": -1.1072173118591309, "num_chars": 8}, {"sum_logits": -4.478407382965088, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -4.478407382965088, "logits_per_char": -0.7464012304941813, "num_chars": 6}, {"sum_logits": -13.947647094726562, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.2445011138916, "logits_per_token": -4.6492156982421875, "logits_per_char": -1.0728959303635817, "num_chars": 13}, {"sum_logits": -13.346649169921875, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.893023490905762, "logits_per_token": -13.346649169921875, "logits_per_char": -1.4829610188802083, "num_chars": 9}, {"sum_logits": -6.563323497772217, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.470205307006836, "logits_per_token": -6.563323497772217, "logits_per_char": -0.9376176425388881, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 733, "native_id": "32d5b7fcae24f0d4871cfb219c5a4b47", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.635340690612793, "incorrect_loss_raw": 11.959036350250244, "correct_loss_per_char": 0.46961172421773273, "incorrect_loss_per_char": 1.4264464839069162, "correct_loss_per_token": 5.635340690612793, "incorrect_loss_per_token": 7.283210039138794, "correct_loss_uncond": -9.548073768615723, "incorrect_loss_uncond": -5.8904478549957275}, "model_output": [{"sum_logits": -12.261201858520508, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.289138793945312, "logits_per_token": -6.130600929260254, "logits_per_char": -1.3623557620578342, "num_chars": 9}, {"sum_logits": -8.84988021850586, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.642197608947754, "logits_per_token": -8.84988021850586, "logits_per_char": -2.212470054626465, "num_chars": 4}, {"sum_logits": -5.635340690612793, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.183414459228516, "logits_per_token": -5.635340690612793, "logits_per_char": -0.46961172421773273, "num_chars": 12}, {"sum_logits": -18.85905647277832, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -25.28786277770996, "logits_per_token": -6.286352157592773, "logits_per_char": -0.8199589770773182, "num_chars": 23}, {"sum_logits": -7.866006851196289, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -16.17873764038086, "logits_per_token": -7.866006851196289, "logits_per_char": -1.311001141866048, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 734, "native_id": "87505da761eaa5c3c4703d02a12d46bc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 14.201030731201172, "incorrect_loss_raw": 11.898962020874023, "correct_loss_per_char": 0.7474226700632196, "incorrect_loss_per_char": 1.0650869833700585, "correct_loss_per_token": 4.733676910400391, "incorrect_loss_per_token": 3.692529857158661, "correct_loss_uncond": -13.507226943969727, "incorrect_loss_uncond": -7.467327356338501}, "model_output": [{"sum_logits": -8.038045883178711, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.22107219696045, "logits_per_token": -4.0190229415893555, "logits_per_char": -1.0047557353973389, "num_chars": 8}, {"sum_logits": -19.34083366394043, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -26.61613655090332, "logits_per_token": -4.835208415985107, "logits_per_char": -1.2088021039962769, "num_chars": 16}, {"sum_logits": -9.877217292785645, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.703327178955078, "logits_per_token": -2.469304323196411, "logits_per_char": -0.897928844798695, "num_chars": 11}, {"sum_logits": -14.201030731201172, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -27.7082576751709, "logits_per_token": -4.733676910400391, "logits_per_char": -0.7474226700632196, "num_chars": 19}, {"sum_logits": -10.339751243591309, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.92462158203125, "logits_per_token": -3.4465837478637695, "logits_per_char": -1.148861249287923, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 735, "native_id": "ef3d5d35128678937c36438466e0fc93", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.901956081390381, "incorrect_loss_raw": 7.002247095108032, "correct_loss_per_char": 0.32679707209269204, "incorrect_loss_per_char": 0.8558089724092772, "correct_loss_per_token": 2.4509780406951904, "incorrect_loss_per_token": 5.059603452682495, "correct_loss_uncond": -13.853426456451416, "incorrect_loss_uncond": -8.106826543807983}, "model_output": [{"sum_logits": -6.669859886169434, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.98387908935547, "logits_per_token": -3.334929943084717, "logits_per_char": -0.8337324857711792, "num_chars": 8}, {"sum_logits": -7.078815460205078, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.347005844116211, "logits_per_token": -7.078815460205078, "logits_per_char": -0.8848519325256348, "num_chars": 8}, {"sum_logits": -5.389023780822754, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.262615203857422, "logits_per_token": -5.389023780822754, "logits_per_char": -0.8981706301371256, "num_chars": 6}, {"sum_logits": -8.871289253234863, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.842794418334961, "logits_per_token": -4.435644626617432, "logits_per_char": -0.8064808412031694, "num_chars": 11}, {"sum_logits": -4.901956081390381, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.755382537841797, "logits_per_token": -2.4509780406951904, "logits_per_char": -0.32679707209269204, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 736, "native_id": "4f1d8007b446b0e10f07fd63cbd31b6f", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.3667213916778564, "incorrect_loss_raw": 10.652183771133423, "correct_loss_per_char": 0.4733442783355713, "incorrect_loss_per_char": 1.2754027616410026, "correct_loss_per_token": 2.3667213916778564, "incorrect_loss_per_token": 6.815443476041158, "correct_loss_uncond": -9.356676816940308, "incorrect_loss_uncond": -3.8539950847625732}, "model_output": [{"sum_logits": -12.45888614654541, "num_tokens": 1, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -12.45888614654541, "logits_per_char": -2.076481024424235, "num_chars": 6}, {"sum_logits": -2.3667213916778564, "num_tokens": 1, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -11.723398208618164, "logits_per_token": -2.3667213916778564, "logits_per_char": -0.4733442783355713, "num_chars": 5}, {"sum_logits": -13.921120643615723, "num_tokens": 3, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -17.763031005859375, "logits_per_token": -4.640373547871907, "logits_per_char": -0.994365760258266, "num_chars": 14}, {"sum_logits": -12.132428169250488, "num_tokens": 2, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -16.2939453125, "logits_per_token": -6.066214084625244, "logits_per_char": -1.3480475743611653, "num_chars": 9}, {"sum_logits": -4.09630012512207, "num_tokens": 1, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -11.896347999572754, "logits_per_token": -4.09630012512207, "logits_per_char": -0.6827166875203451, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 737, "native_id": "4c30d5eed4137cba89747510973f37a3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.8572163581848145, "incorrect_loss_raw": 12.183409929275513, "correct_loss_per_char": 0.2857216358184814, "incorrect_loss_per_char": 1.2530333195413863, "correct_loss_per_token": 2.8572163581848145, "incorrect_loss_per_token": 8.246675491333008, "correct_loss_uncond": -12.578901767730713, "incorrect_loss_uncond": -4.296499252319336}, "model_output": [{"sum_logits": -9.745694160461426, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.260344505310059, "logits_per_token": -9.745694160461426, "logits_per_char": -1.9491388320922851, "num_chars": 5}, {"sum_logits": -2.8572163581848145, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.436118125915527, "logits_per_token": -2.8572163581848145, "logits_per_char": -0.2857216358184814, "num_chars": 10}, {"sum_logits": -13.812606811523438, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.54778289794922, "logits_per_token": -6.906303405761719, "logits_per_char": -0.8632879257202148, "num_chars": 16}, {"sum_logits": -17.6812686920166, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.3560848236084, "logits_per_token": -8.8406343460083, "logits_per_char": -1.2629477637154716, "num_chars": 14}, {"sum_logits": -7.494070053100586, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.755424499511719, "logits_per_token": -7.494070053100586, "logits_per_char": -0.9367587566375732, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 738, "native_id": "515834727e23e30ab7c8fe5ba7e9a765", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.355313777923584, "incorrect_loss_raw": 11.304683089256287, "correct_loss_per_char": 0.622187682560512, "incorrect_loss_per_char": 0.9550517225549335, "correct_loss_per_token": 4.355313777923584, "incorrect_loss_per_token": 6.344576358795166, "correct_loss_uncond": -11.276922702789307, "incorrect_loss_uncond": -6.623314261436462}, "model_output": [{"sum_logits": -17.490421295166016, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -19.2803955078125, "logits_per_token": -8.745210647583008, "logits_per_char": -1.5900382995605469, "num_chars": 11}, {"sum_logits": -4.355313777923584, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.63223648071289, "logits_per_token": -4.355313777923584, "logits_per_char": -0.622187682560512, "num_chars": 7}, {"sum_logits": -5.537878513336182, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -5.537878513336182, "logits_per_char": -0.6922348141670227, "num_chars": 8}, {"sum_logits": -12.300037384033203, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -18.478736877441406, "logits_per_token": -6.150018692016602, "logits_per_char": -0.8785740988595145, "num_chars": 14}, {"sum_logits": -9.890395164489746, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -20.37601089477539, "logits_per_token": -4.945197582244873, "logits_per_char": -0.6593596776326497, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 739, "native_id": "34ec6393db5a01f689c11fac153e31c1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.656700134277344, "incorrect_loss_raw": 23.334474563598633, "correct_loss_per_char": 1.6094500223795574, "incorrect_loss_per_char": 1.6433792051433418, "correct_loss_per_token": 9.656700134277344, "incorrect_loss_per_token": 8.662166754404705, "correct_loss_uncond": -3.9365482330322266, "incorrect_loss_uncond": 1.3753166198730469}, "model_output": [{"sum_logits": -9.656700134277344, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.59324836730957, "logits_per_token": -9.656700134277344, "logits_per_char": -1.6094500223795574, "num_chars": 6}, {"sum_logits": -20.442039489746094, "num_tokens": 3, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -19.937788009643555, "logits_per_token": -6.814013163248698, "logits_per_char": -1.1356688605414496, "num_chars": 18}, {"sum_logits": -22.032001495361328, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.636964797973633, "logits_per_token": -11.016000747680664, "logits_per_char": -2.4480001661512585, "num_chars": 9}, {"sum_logits": -34.453102111816406, "num_tokens": 4, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -30.248432159423828, "logits_per_token": -8.613275527954102, "logits_per_char": -1.497960961383322, "num_chars": 23}, {"sum_logits": -16.410755157470703, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.013446807861328, "logits_per_token": -8.205377578735352, "logits_per_char": -1.4918868324973367, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 740, "native_id": "0f0e339412f719a019bf373e6daf2530", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.910534858703613, "incorrect_loss_raw": 12.390506267547607, "correct_loss_per_char": 0.5315796045156626, "incorrect_loss_per_char": 1.4282408396402997, "correct_loss_per_token": 2.303511619567871, "incorrect_loss_per_token": 7.280019283294678, "correct_loss_uncond": -16.511034965515137, "incorrect_loss_uncond": -5.863046884536743}, "model_output": [{"sum_logits": -6.910534858703613, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -23.42156982421875, "logits_per_token": -2.303511619567871, "logits_per_char": -0.5315796045156626, "num_chars": 13}, {"sum_logits": -12.719645500183105, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -23.389488220214844, "logits_per_token": -6.359822750091553, "logits_per_char": -0.8479763666788737, "num_chars": 15}, {"sum_logits": -8.678129196166992, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.730328559875488, "logits_per_token": -8.678129196166992, "logits_per_char": -1.7356258392333985, "num_chars": 5}, {"sum_logits": -12.790072441101074, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.763940811157227, "logits_per_token": -6.395036220550537, "logits_per_char": -1.4211191601223416, "num_chars": 9}, {"sum_logits": -15.374177932739258, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -18.130455017089844, "logits_per_token": -7.687088966369629, "logits_per_char": -1.7082419925265842, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 741, "native_id": "489a082aab43dd1a53f3f1f89c2365ed", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.0057244300842285, "incorrect_loss_raw": 16.182321786880493, "correct_loss_per_char": 0.12571555376052856, "incorrect_loss_per_char": 1.5168017120072335, "correct_loss_per_token": 1.0057244300842285, "incorrect_loss_per_token": 8.091160893440247, "correct_loss_uncond": -12.124990940093994, "incorrect_loss_uncond": -1.6994078159332275}, "model_output": [{"sum_logits": -1.0057244300842285, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": true, "sum_logits_uncond": -13.130715370178223, "logits_per_token": -1.0057244300842285, "logits_per_char": -0.12571555376052856, "num_chars": 8}, {"sum_logits": -15.854571342468262, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.578195571899414, "logits_per_token": -7.927285671234131, "logits_per_char": -1.4413246674971147, "num_chars": 11}, {"sum_logits": -20.41831398010254, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.54186248779297, "logits_per_token": -10.20915699005127, "logits_per_char": -2.041831398010254, "num_chars": 10}, {"sum_logits": -15.695363998413086, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.07761001586914, "logits_per_token": -7.847681999206543, "logits_per_char": -1.307946999867757, "num_chars": 12}, {"sum_logits": -12.761037826538086, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.32925033569336, "logits_per_token": -6.380518913269043, "logits_per_char": -1.2761037826538086, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 742, "native_id": "7c45033e9fd9f1a759923971b14390ed", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.2873845100402832, "incorrect_loss_raw": 23.557411193847656, "correct_loss_per_char": 0.3218461275100708, "incorrect_loss_per_char": 1.5092580456794042, "correct_loss_per_token": 1.2873845100402832, "incorrect_loss_per_token": 9.847424030303957, "correct_loss_uncond": -11.755675792694092, "incorrect_loss_uncond": 1.4837408065795898}, "model_output": [{"sum_logits": -17.896583557128906, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.514240264892578, "logits_per_token": -17.896583557128906, "logits_per_char": -1.6269621415571733, "num_chars": 11}, {"sum_logits": -17.86989974975586, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.48796272277832, "logits_per_token": -5.95663324991862, "logits_per_char": -1.1168687343597412, "num_chars": 16}, {"sum_logits": -17.377857208251953, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.218969345092773, "logits_per_token": -8.688928604125977, "logits_per_char": -1.3367582467886119, "num_chars": 13}, {"sum_logits": -41.085304260253906, "num_tokens": 6, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -37.073509216308594, "logits_per_token": -6.847550710042317, "logits_per_char": -1.9564430600120908, "num_chars": 21}, {"sum_logits": -1.2873845100402832, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": true, "sum_logits_uncond": -13.043060302734375, "logits_per_token": -1.2873845100402832, "logits_per_char": -0.3218461275100708, "num_chars": 4}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 743, "native_id": "061f326d2a87a10da6316b55bd5522bd", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.220664978027344, "incorrect_loss_raw": 9.691166400909424, "correct_loss_per_char": 0.602952139718192, "incorrect_loss_per_char": 0.8746595179191743, "correct_loss_per_token": 4.220664978027344, "incorrect_loss_per_token": 5.814722100893657, "correct_loss_uncond": -11.411571502685547, "incorrect_loss_uncond": -8.564699172973633}, "model_output": [{"sum_logits": -8.04929256439209, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -18.799386978149414, "logits_per_token": -4.024646282196045, "logits_per_char": -0.6707743803660074, "num_chars": 12}, {"sum_logits": -7.929037094116211, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -20.37601089477539, "logits_per_token": -3.9645185470581055, "logits_per_char": -0.5286024729410808, "num_chars": 15}, {"sum_logits": -11.511417388916016, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -13.643386840820312, "logits_per_token": -11.511417388916016, "logits_per_char": -1.046492489901456, "num_chars": 11}, {"sum_logits": -4.220664978027344, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -15.63223648071289, "logits_per_token": -4.220664978027344, "logits_per_char": -0.602952139718192, "num_chars": 7}, {"sum_logits": -11.274918556213379, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -20.20467758178711, "logits_per_token": -3.7583061854044595, "logits_per_char": -1.2527687284681532, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 744, "native_id": "d747c4e463b80bfcc49b874063f9fae1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.850337982177734, "incorrect_loss_raw": 11.173799514770508, "correct_loss_per_char": 0.41788128444126676, "incorrect_loss_per_char": 1.4821285899446792, "correct_loss_per_token": 2.925168991088867, "incorrect_loss_per_token": 8.379844546318054, "correct_loss_uncond": -11.098247528076172, "incorrect_loss_uncond": -4.946467399597168}, "model_output": [{"sum_logits": -11.155135154724121, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.140838623046875, "logits_per_token": -5.5775675773620605, "logits_per_char": -1.3943918943405151, "num_chars": 8}, {"sum_logits": -10.381124496459961, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.737959861755371, "logits_per_token": -10.381124496459961, "logits_per_char": -1.4830177852085658, "num_chars": 7}, {"sum_logits": -11.962433815002441, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.074721336364746, "logits_per_token": -11.962433815002441, "logits_per_char": -2.3924867630004885, "num_chars": 5}, {"sum_logits": -5.850337982177734, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.948585510253906, "logits_per_token": -2.925168991088867, "logits_per_char": -0.41788128444126676, "num_chars": 14}, {"sum_logits": -11.196504592895508, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.52754783630371, "logits_per_token": -5.598252296447754, "logits_per_char": -0.6586179172291475, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 745, "native_id": "df3d27338bcf86b341b8b02d4309daf5", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.910087585449219, "incorrect_loss_raw": 8.39169991016388, "correct_loss_per_char": 0.8183479309082031, "incorrect_loss_per_char": 1.1269011177799917, "correct_loss_per_token": 4.910087585449219, "incorrect_loss_per_token": 8.39169991016388, "correct_loss_uncond": -7.9721879959106445, "incorrect_loss_uncond": -4.8659409284591675}, "model_output": [{"sum_logits": -8.089138984680176, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -8.089138984680176, "logits_per_char": -1.3481898307800293, "num_chars": 6}, {"sum_logits": -4.910087585449219, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.882275581359863, "logits_per_token": -4.910087585449219, "logits_per_char": -0.8183479309082031, "num_chars": 6}, {"sum_logits": -8.661697387695312, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.953621864318848, "logits_per_token": -8.661697387695312, "logits_per_char": -0.7874270352450284, "num_chars": 11}, {"sum_logits": -3.59989595413208, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.800455093383789, "logits_per_token": -3.59989595413208, "logits_per_char": -0.719979190826416, "num_chars": 5}, {"sum_logits": -13.21606731414795, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.386833190917969, "logits_per_token": -13.21606731414795, "logits_per_char": -1.6520084142684937, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 746, "native_id": "db63bf66a8bfd16e5103cbdd350f5202", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.786960601806641, "incorrect_loss_raw": 13.445556402206421, "correct_loss_per_char": 0.9733700752258301, "incorrect_loss_per_char": 1.4046043369505141, "correct_loss_per_token": 7.786960601806641, "incorrect_loss_per_token": 9.46507215499878, "correct_loss_uncond": -7.050019264221191, "incorrect_loss_uncond": -2.3861021995544434}, "model_output": [{"sum_logits": -7.786960601806641, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.836979866027832, "logits_per_token": -7.786960601806641, "logits_per_char": -0.9733700752258301, "num_chars": 8}, {"sum_logits": -10.576090812683105, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.349469184875488, "logits_per_token": -10.576090812683105, "logits_per_char": -1.7626818021138508, "num_chars": 6}, {"sum_logits": -15.652181625366211, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.086496353149414, "logits_per_token": -7.8260908126831055, "logits_per_char": -1.565218162536621, "num_chars": 10}, {"sum_logits": -15.410183906555176, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.015937805175781, "logits_per_token": -15.410183906555176, "logits_per_char": -1.7122426562839084, "num_chars": 9}, {"sum_logits": -12.143769264221191, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.874731063842773, "logits_per_token": -4.0479230880737305, "logits_per_char": -0.5782747268676758, "num_chars": 21}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 747, "native_id": "f8a9208665a4f2d64986940456b4b964", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.913251876831055, "incorrect_loss_raw": 18.210493087768555, "correct_loss_per_char": 1.1913251876831055, "incorrect_loss_per_char": 1.1920580227393467, "correct_loss_per_token": 5.956625938415527, "incorrect_loss_per_token": 7.297007918357849, "correct_loss_uncond": -4.575532913208008, "incorrect_loss_uncond": -5.96614933013916}, "model_output": [{"sum_logits": -12.167049407958984, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.204669952392578, "logits_per_token": -6.083524703979492, "logits_per_char": -1.3518943786621094, "num_chars": 9}, {"sum_logits": -28.93181800842285, "num_tokens": 4, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -36.69499588012695, "logits_per_token": -7.232954502105713, "logits_per_char": -1.0715488151267722, "num_chars": 27}, {"sum_logits": -17.65188980102539, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -23.036636352539062, "logits_per_token": -8.825944900512695, "logits_per_char": -1.2608492715018136, "num_chars": 14}, {"sum_logits": -11.913251876831055, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.488784790039062, "logits_per_token": -5.956625938415527, "logits_per_char": -1.1913251876831055, "num_chars": 10}, {"sum_logits": -14.091215133666992, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.770267486572266, "logits_per_token": -7.045607566833496, "logits_per_char": -1.0839396256666918, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 748, "native_id": "1bf4c6b5bd870b1a079106e1e97e5d09", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.4918885231018066, "incorrect_loss_raw": 10.077738523483276, "correct_loss_per_char": 0.43648606538772583, "incorrect_loss_per_char": 1.1294136849316683, "correct_loss_per_token": 3.4918885231018066, "incorrect_loss_per_token": 5.514810383319855, "correct_loss_uncond": -9.349770069122314, "incorrect_loss_uncond": -5.21765398979187}, "model_output": [{"sum_logits": -17.781484603881836, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.688199996948242, "logits_per_token": -8.890742301940918, "logits_per_char": -1.6164986003528943, "num_chars": 11}, {"sum_logits": -4.500406265258789, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -4.500406265258789, "logits_per_char": -0.9000812530517578, "num_chars": 5}, {"sum_logits": -3.4918885231018066, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.841658592224121, "logits_per_token": -3.4918885231018066, "logits_per_char": -0.43648606538772583, "num_chars": 8}, {"sum_logits": -5.547769546508789, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.506343841552734, "logits_per_token": -5.547769546508789, "logits_per_char": -1.109553909301758, "num_chars": 5}, {"sum_logits": -12.481293678283691, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.732297897338867, "logits_per_token": -3.120323419570923, "logits_per_char": -0.8915209770202637, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 749, "native_id": "c1c73ef0ff662a76cd42c3500240974a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.0867652893066406, "incorrect_loss_raw": 10.185406923294067, "correct_loss_per_char": 0.19292283058166504, "incorrect_loss_per_char": 1.5820294136092778, "correct_loss_per_token": 1.5433826446533203, "incorrect_loss_per_token": 7.825551271438599, "correct_loss_uncond": -15.932378768920898, "incorrect_loss_uncond": -5.010164737701416}, "model_output": [{"sum_logits": -3.0867652893066406, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": true, "sum_logits_uncond": -19.01914405822754, "logits_per_token": -1.5433826446533203, "logits_per_char": -0.19292283058166504, "num_chars": 16}, {"sum_logits": -10.390054702758789, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.109554290771484, "logits_per_token": -5.1950273513793945, "logits_per_char": -1.4842935289655412, "num_chars": 7}, {"sum_logits": -12.406526565551758, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.696253776550293, "logits_per_token": -12.406526565551758, "logits_per_char": -1.7723609379359655, "num_chars": 7}, {"sum_logits": -8.488790512084961, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.658267974853516, "logits_per_token": -4.2443952560424805, "logits_per_char": -0.7073992093404134, "num_chars": 12}, {"sum_logits": -9.456255912780762, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.31821060180664, "logits_per_token": -9.456255912780762, "logits_per_char": -2.3640639781951904, "num_chars": 4}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 750, "native_id": "aefa60233f3c5c4966f8ac22e901a1c7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.399634838104248, "incorrect_loss_raw": 10.30106770992279, "correct_loss_per_char": 0.26151037216186523, "incorrect_loss_per_char": 1.760675338904063, "correct_loss_per_token": 3.399634838104248, "incorrect_loss_per_token": 10.30106770992279, "correct_loss_uncond": -11.396596431732178, "incorrect_loss_uncond": -3.896990180015564}, "model_output": [{"sum_logits": -3.399634838104248, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.796231269836426, "logits_per_token": -3.399634838104248, "logits_per_char": -0.26151037216186523, "num_chars": 13}, {"sum_logits": -6.21854829788208, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -6.21854829788208, "logits_per_char": -1.243709659576416, "num_chars": 5}, {"sum_logits": -10.222867012023926, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.89412784576416, "logits_per_token": -10.222867012023926, "logits_per_char": -2.0445734024047852, "num_chars": 5}, {"sum_logits": -10.269881248474121, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -10.269881248474121, "logits_per_char": -0.8558234373728434, "num_chars": 12}, {"sum_logits": -14.492974281311035, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.48583984375, "logits_per_token": -14.492974281311035, "logits_per_char": -2.898594856262207, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 751, "native_id": "9221962ed3a6094e5c8f33785ce048cd", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.655303955078125, "incorrect_loss_raw": 14.001204252243042, "correct_loss_per_char": 0.931060791015625, "incorrect_loss_per_char": 1.3420587954358159, "correct_loss_per_token": 4.655303955078125, "incorrect_loss_per_token": 5.536234418551127, "correct_loss_uncond": -6.8699750900268555, "incorrect_loss_uncond": -3.642043352127075}, "model_output": [{"sum_logits": -15.739358901977539, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.93042755126953, "logits_per_token": -3.9348397254943848, "logits_per_char": -1.2107199155367339, "num_chars": 13}, {"sum_logits": -16.43947410583496, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.3109130859375, "logits_per_token": -5.479824701944987, "logits_per_char": -0.9133041169908311, "num_chars": 18}, {"sum_logits": -4.655303955078125, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.52527904510498, "logits_per_token": -4.655303955078125, "logits_per_char": -0.931060791015625, "num_chars": 5}, {"sum_logits": -9.031702995300293, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -9.070283889770508, "logits_per_token": -9.031702995300293, "logits_per_char": -2.2579257488250732, "num_chars": 4}, {"sum_logits": -14.794281005859375, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.26136589050293, "logits_per_token": -3.6985702514648438, "logits_per_char": -0.986285400390625, "num_chars": 15}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 752, "native_id": "8c8052980e357545398d27d1c3c832d8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.648653984069824, "incorrect_loss_raw": 10.65686285495758, "correct_loss_per_char": 0.4499208225923426, "incorrect_loss_per_char": 1.0378864110607804, "correct_loss_per_token": 1.912163496017456, "incorrect_loss_per_token": 6.417179644107819, "correct_loss_uncond": -9.788578987121582, "incorrect_loss_uncond": -7.242366671562195}, "model_output": [{"sum_logits": -7.648653984069824, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.437232971191406, "logits_per_token": -1.912163496017456, "logits_per_char": -0.4499208225923426, "num_chars": 17}, {"sum_logits": -8.709985733032227, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.51136589050293, "logits_per_token": -8.709985733032227, "logits_per_char": -1.4516642888387044, "num_chars": 6}, {"sum_logits": -22.356258392333984, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -23.411361694335938, "logits_per_token": -11.178129196166992, "logits_per_char": -1.719712184025691, "num_chars": 13}, {"sum_logits": -3.890089988708496, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.280855178833008, "logits_per_token": -1.945044994354248, "logits_per_char": -0.43223222096761066, "num_chars": 9}, {"sum_logits": -7.671117305755615, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.393335342407227, "logits_per_token": -3.8355586528778076, "logits_per_char": -0.5479369504111153, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 753, "native_id": "418913999c665ae9527fd14a6132da39", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.248815536499023, "incorrect_loss_raw": 11.7412428855896, "correct_loss_per_char": 0.4832543690999349, "incorrect_loss_per_char": 1.4940381598851038, "correct_loss_per_token": 3.6244077682495117, "incorrect_loss_per_token": 6.088704268137614, "correct_loss_uncond": -10.518655776977539, "incorrect_loss_uncond": -5.653600454330444}, "model_output": [{"sum_logits": -10.523773193359375, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.470956802368164, "logits_per_token": -5.2618865966796875, "logits_per_char": -1.1693081325954862, "num_chars": 9}, {"sum_logits": -7.248815536499023, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.767471313476562, "logits_per_token": -3.6244077682495117, "logits_per_char": -0.4832543690999349, "num_chars": 15}, {"sum_logits": -13.921610832214355, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.774185180664062, "logits_per_token": -4.640536944071452, "logits_per_char": -0.9944007737295968, "num_chars": 14}, {"sum_logits": -10.41879653930664, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.541193962097168, "logits_per_token": -10.41879653930664, "logits_per_char": -2.083759307861328, "num_chars": 5}, {"sum_logits": -12.100790977478027, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.79303741455078, "logits_per_token": -4.033596992492676, "logits_per_char": -1.728684425354004, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 754, "native_id": "2634468d21fa33a88cefe28a5d613f59", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.860547065734863, "incorrect_loss_raw": 10.266339540481567, "correct_loss_per_char": 0.6943638665335519, "incorrect_loss_per_char": 1.6060833408719017, "correct_loss_per_token": 4.860547065734863, "incorrect_loss_per_token": 4.583108723163605, "correct_loss_uncond": -8.764699935913086, "incorrect_loss_uncond": -7.314027786254883}, "model_output": [{"sum_logits": -10.850634574890137, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -16.45197296142578, "logits_per_token": -5.425317287445068, "logits_per_char": -2.1701269149780273, "num_chars": 5}, {"sum_logits": -10.646439552307129, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -18.31760025024414, "logits_per_token": -5.3232197761535645, "logits_per_char": -2.129287910461426, "num_chars": 5}, {"sum_logits": -8.800976753234863, "num_tokens": 4, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -20.26136589050293, "logits_per_token": -2.200244188308716, "logits_per_char": -0.5867317835489909, "num_chars": 15}, {"sum_logits": -10.76730728149414, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -15.29053020477295, "logits_per_token": -5.38365364074707, "logits_per_char": -1.538186754499163, "num_chars": 7}, {"sum_logits": -4.860547065734863, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -13.62524700164795, "logits_per_token": -4.860547065734863, "logits_per_char": -0.6943638665335519, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 755, "native_id": "66bfb6e209c94e6be5b0d04b0c7e2064", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.119348526000977, "incorrect_loss_raw": 7.680880308151245, "correct_loss_per_char": 0.757459282875061, "incorrect_loss_per_char": 1.0750003289694738, "correct_loss_per_token": 6.059674263000488, "incorrect_loss_per_token": 4.345931390921274, "correct_loss_uncond": -6.521766662597656, "incorrect_loss_uncond": -6.627161979675293}, "model_output": [{"sum_logits": -7.266985893249512, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.882076263427734, "logits_per_token": -1.816746473312378, "logits_per_char": -0.6606350812045011, "num_chars": 11}, {"sum_logits": -11.834334373474121, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.835020065307617, "logits_per_token": -3.9447781244913735, "logits_per_char": -1.3149260414971247, "num_chars": 9}, {"sum_logits": -12.119348526000977, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.641115188598633, "logits_per_token": -6.059674263000488, "logits_per_char": -0.757459282875061, "num_chars": 16}, {"sum_logits": -6.953890800476074, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -6.953890800476074, "logits_per_char": -1.390778160095215, "num_chars": 5}, {"sum_logits": -4.668310165405273, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.260344505310059, "logits_per_token": -4.668310165405273, "logits_per_char": -0.9336620330810547, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 756, "native_id": "3163910d665c139a1f6f07d85803baba", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.7965335845947266, "incorrect_loss_raw": 8.568962216377258, "correct_loss_per_char": 0.2566476549421038, "incorrect_loss_per_char": 1.351348416371779, "correct_loss_per_token": 1.7965335845947266, "incorrect_loss_per_token": 8.568962216377258, "correct_loss_uncond": -11.441722869873047, "incorrect_loss_uncond": -5.819960713386536}, "model_output": [{"sum_logits": -8.282418251037598, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.86915111541748, "logits_per_token": -8.282418251037598, "logits_per_char": -1.6564836502075195, "num_chars": 5}, {"sum_logits": -10.082489967346191, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.953621864318848, "logits_per_token": -10.082489967346191, "logits_per_char": -0.9165899970314719, "num_chars": 11}, {"sum_logits": -7.199752330780029, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.562139511108398, "logits_per_token": -7.199752330780029, "logits_per_char": -0.6545229391618208, "num_chars": 11}, {"sum_logits": -8.711188316345215, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.17077922821045, "logits_per_token": -8.711188316345215, "logits_per_char": -2.1777970790863037, "num_chars": 4}, {"sum_logits": -1.7965335845947266, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -13.238256454467773, "logits_per_token": -1.7965335845947266, "logits_per_char": -0.2566476549421038, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 757, "native_id": "0e52659484f2f6d763cf0d38d4c5999d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.099721670150757, "incorrect_loss_raw": 7.484751582145691, "correct_loss_per_char": 0.19088378819552335, "incorrect_loss_per_char": 0.7929163034086104, "correct_loss_per_token": 2.099721670150757, "incorrect_loss_per_token": 5.665921568870544, "correct_loss_uncond": -12.526502847671509, "incorrect_loss_uncond": -7.467881560325623}, "model_output": [{"sum_logits": -2.099721670150757, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.626224517822266, "logits_per_token": -2.099721670150757, "logits_per_char": -0.19088378819552335, "num_chars": 11}, {"sum_logits": -14.550640106201172, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.2833251953125, "logits_per_token": -7.275320053100586, "logits_per_char": -1.3227854642001065, "num_chars": 11}, {"sum_logits": -6.529105186462402, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.722381591796875, "logits_per_token": -6.529105186462402, "logits_per_char": -0.8161381483078003, "num_chars": 8}, {"sum_logits": -3.4256949424743652, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.127396583557129, "logits_per_token": -3.4256949424743652, "logits_per_char": -0.4893849917820522, "num_chars": 7}, {"sum_logits": -5.433566093444824, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.67742919921875, "logits_per_token": -5.433566093444824, "logits_per_char": -0.5433566093444824, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 758, "native_id": "167d2cfa04bfaea0e0b5bac3598d5769", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.7914743423461914, "incorrect_loss_raw": 12.070340156555176, "correct_loss_per_char": 0.17914743423461915, "incorrect_loss_per_char": 2.078466546535492, "correct_loss_per_token": 0.8957371711730957, "incorrect_loss_per_token": 8.87957795461019, "correct_loss_uncond": -17.88282299041748, "incorrect_loss_uncond": -2.6779491901397705}, "model_output": [{"sum_logits": -7.991151809692383, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.641695976257324, "logits_per_token": -7.991151809692383, "logits_per_char": -1.5982303619384766, "num_chars": 5}, {"sum_logits": -9.704607963562012, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.943608283996582, "logits_per_token": -9.704607963562012, "logits_per_char": -1.9409215927124024, "num_chars": 5}, {"sum_logits": -11.441027641296387, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.9362211227417, "logits_per_token": -11.441027641296387, "logits_per_char": -2.8602569103240967, "num_chars": 4}, {"sum_logits": -1.7914743423461914, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -19.674297332763672, "logits_per_token": -0.8957371711730957, "logits_per_char": -0.17914743423461915, "num_chars": 10}, {"sum_logits": -19.144573211669922, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.47163200378418, "logits_per_token": -6.381524403889974, "logits_per_char": -1.9144573211669922, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 759, "native_id": "39572e0ba1db51fa74f7fc2d90c5ec7f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.192632675170898, "incorrect_loss_raw": 12.256901502609253, "correct_loss_per_char": 0.562966606833718, "incorrect_loss_per_char": 1.8049462352480208, "correct_loss_per_token": 3.096316337585449, "incorrect_loss_per_token": 10.315662145614624, "correct_loss_uncond": -12.945821762084961, "incorrect_loss_uncond": -1.7527172565460205}, "model_output": [{"sum_logits": -11.737369537353516, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.889824867248535, "logits_per_token": -11.737369537353516, "logits_per_char": -1.676767076764788, "num_chars": 7}, {"sum_logits": -10.89918327331543, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.782123565673828, "logits_per_token": -10.89918327331543, "logits_per_char": -2.179836654663086, "num_chars": 5}, {"sum_logits": -10.861138343811035, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.753384590148926, "logits_per_token": -10.861138343811035, "logits_per_char": -1.8101897239685059, "num_chars": 6}, {"sum_logits": -6.192632675170898, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.13845443725586, "logits_per_token": -3.096316337585449, "logits_per_char": -0.562966606833718, "num_chars": 11}, {"sum_logits": -15.529914855957031, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.613142013549805, "logits_per_token": -7.764957427978516, "logits_per_char": -1.5529914855957032, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 760, "native_id": "2a32b1e541b1daae04690d0d3a4b3310", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.63123607635498, "incorrect_loss_raw": 13.761392831802368, "correct_loss_per_char": 2.126247215270996, "incorrect_loss_per_char": 1.1529978766828692, "correct_loss_per_token": 10.63123607635498, "incorrect_loss_per_token": 8.784047603607178, "correct_loss_uncond": -2.3865652084350586, "incorrect_loss_uncond": -3.874162197113037}, "model_output": [{"sum_logits": -15.22680950164795, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.858729362487793, "logits_per_token": -15.22680950164795, "logits_per_char": -2.1752585002354214, "num_chars": 7}, {"sum_logits": -12.042744636535645, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.660778045654297, "logits_per_token": -6.021372318267822, "logits_per_char": -0.8028496424357097, "num_chars": 15}, {"sum_logits": -10.63123607635498, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.017801284790039, "logits_per_token": -10.63123607635498, "logits_per_char": -2.126247215270996, "num_chars": 5}, {"sum_logits": -15.081672668457031, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.29230308532715, "logits_per_token": -7.540836334228516, "logits_per_char": -0.8871572157915901, "num_chars": 17}, {"sum_logits": -12.694344520568848, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.730409622192383, "logits_per_token": -6.347172260284424, "logits_per_char": -0.7467261482687557, "num_chars": 17}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 761, "native_id": "71cbfeb995b06b21e890c91040722252", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.804306983947754, "incorrect_loss_raw": 9.17461907863617, "correct_loss_per_char": 0.8505383729934692, "incorrect_loss_per_char": 0.9904851059118907, "correct_loss_per_token": 6.804306983947754, "incorrect_loss_per_token": 6.746392051378886, "correct_loss_uncond": -7.062314987182617, "incorrect_loss_uncond": -6.29741370677948}, "model_output": [{"sum_logits": -10.683602333068848, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.451674461364746, "logits_per_token": -10.683602333068848, "logits_per_char": -1.335450291633606, "num_chars": 8}, {"sum_logits": -14.760627746582031, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.2464542388916, "logits_per_token": -7.380313873291016, "logits_per_char": -0.9840418497721354, "num_chars": 15}, {"sum_logits": -7.755354881286621, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.558703422546387, "logits_per_token": -7.755354881286621, "logits_per_char": -1.2925591468811035, "num_chars": 6}, {"sum_logits": -6.804306983947754, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.866621971130371, "logits_per_token": -6.804306983947754, "logits_per_char": -0.8505383729934692, "num_chars": 8}, {"sum_logits": -3.4988913536071777, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.631299018859863, "logits_per_token": -1.1662971178690593, "logits_per_char": -0.34988913536071775, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 762, "native_id": "a15d564d0be6996251b5d523ac62db2a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.92667293548584, "incorrect_loss_raw": 10.136268615722656, "correct_loss_per_char": 0.6296975395896218, "incorrect_loss_per_char": 1.4207918643951416, "correct_loss_per_token": 3.46333646774292, "incorrect_loss_per_token": 8.922391414642334, "correct_loss_uncond": -10.31811237335205, "incorrect_loss_uncond": -3.3986449241638184}, "model_output": [{"sum_logits": -9.902334213256836, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.428765296936035, "logits_per_token": -9.902334213256836, "logits_per_char": -0.9902334213256836, "num_chars": 10}, {"sum_logits": -9.079858779907227, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.7246675491333, "logits_per_token": -9.079858779907227, "logits_per_char": -1.5133097966512044, "num_chars": 6}, {"sum_logits": -9.711017608642578, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.365083694458008, "logits_per_token": -4.855508804321289, "logits_per_char": -0.8092514673868815, "num_chars": 12}, {"sum_logits": -11.851863861083984, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.621137619018555, "logits_per_token": -11.851863861083984, "logits_per_char": -2.370372772216797, "num_chars": 5}, {"sum_logits": -6.92667293548584, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.24478530883789, "logits_per_token": -3.46333646774292, "logits_per_char": -0.6296975395896218, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 763, "native_id": "6bd170c8d3d99d3c47b3e96427bacaeb", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.8353943824768066, "incorrect_loss_raw": 11.44439709186554, "correct_loss_per_char": 0.20252817017691477, "incorrect_loss_per_char": 1.3158200666157887, "correct_loss_per_token": 0.9451314608256022, "incorrect_loss_per_token": 6.3753355940183, "correct_loss_uncond": -14.580539226531982, "incorrect_loss_uncond": -8.07205045223236}, "model_output": [{"sum_logits": -10.161418914794922, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.783552169799805, "logits_per_token": -10.161418914794922, "logits_per_char": -2.0322837829589844, "num_chars": 5}, {"sum_logits": -5.214926242828369, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.093673706054688, "logits_per_token": -2.6074631214141846, "logits_per_char": -0.5794362492031522, "num_chars": 9}, {"sum_logits": -15.592275619506836, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.522144317626953, "logits_per_token": -7.796137809753418, "logits_per_char": -1.4174796017733486, "num_chars": 11}, {"sum_logits": -14.808967590332031, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -22.666419982910156, "logits_per_token": -4.936322530110677, "logits_per_char": -1.2340806325276692, "num_chars": 12}, {"sum_logits": -2.8353943824768066, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.41593360900879, "logits_per_token": -0.9451314608256022, "logits_per_char": -0.20252817017691477, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 764, "native_id": "7bc1198664b376f79d584725ad7f874b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.863145351409912, "incorrect_loss_raw": 10.05759084224701, "correct_loss_per_char": 0.6514605946011014, "incorrect_loss_per_char": 0.7646378530396356, "correct_loss_per_token": 1.954381783803304, "incorrect_loss_per_token": 4.380834639072418, "correct_loss_uncond": -11.362867832183838, "incorrect_loss_uncond": -7.834553122520447}, "model_output": [{"sum_logits": -9.571100234985352, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.177635192871094, "logits_per_token": -4.785550117492676, "logits_per_char": -0.9571100234985351, "num_chars": 10}, {"sum_logits": -5.140986442565918, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.695878028869629, "logits_per_token": -5.140986442565918, "logits_per_char": -0.4673624038696289, "num_chars": 11}, {"sum_logits": -20.64934539794922, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -24.45351791381836, "logits_per_token": -5.162336349487305, "logits_per_char": -1.1471858554416232, "num_chars": 18}, {"sum_logits": -4.868931293487549, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.241544723510742, "logits_per_token": -2.4344656467437744, "logits_per_char": -0.4868931293487549, "num_chars": 10}, {"sum_logits": -5.863145351409912, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.22601318359375, "logits_per_token": -1.954381783803304, "logits_per_char": -0.6514605946011014, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 765, "native_id": "d6c002d46d9bfa466637cec4a134f332", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.9358320236206055, "incorrect_loss_raw": 12.135224342346191, "correct_loss_per_char": 0.4946526686350505, "incorrect_loss_per_char": 2.143938616343907, "correct_loss_per_token": 2.9679160118103027, "incorrect_loss_per_token": 12.135224342346191, "correct_loss_uncond": -10.815302848815918, "incorrect_loss_uncond": -1.2320685386657715}, "model_output": [{"sum_logits": -12.145021438598633, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.242291450500488, "logits_per_token": -12.145021438598633, "logits_per_char": -2.4290042877197267, "num_chars": 5}, {"sum_logits": -14.53736686706543, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.828683853149414, "logits_per_token": -14.53736686706543, "logits_per_char": -2.076766695295061, "num_chars": 7}, {"sum_logits": -9.051549911499023, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.81769847869873, "logits_per_token": -9.051549911499023, "logits_per_char": -1.508591651916504, "num_chars": 6}, {"sum_logits": -5.9358320236206055, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.751134872436523, "logits_per_token": -2.9679160118103027, "logits_per_char": -0.4946526686350505, "num_chars": 12}, {"sum_logits": -12.80695915222168, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.580497741699219, "logits_per_token": -12.80695915222168, "logits_per_char": -2.561391830444336, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 766, "native_id": "8cb45b421375243e788cfc64bd77b051", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.523318290710449, "incorrect_loss_raw": 13.661149024963379, "correct_loss_per_char": 0.7523318290710449, "incorrect_loss_per_char": 1.11749687491844, "correct_loss_per_token": 7.523318290710449, "incorrect_loss_per_token": 6.3283452192942296, "correct_loss_uncond": -6.699067115783691, "incorrect_loss_uncond": -3.0502450466156006}, "model_output": [{"sum_logits": -16.796375274658203, "num_tokens": 4, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.18400764465332, "logits_per_token": -4.199093818664551, "logits_per_char": -0.622087973135489, "num_chars": 27}, {"sum_logits": -11.44736099243164, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.540124893188477, "logits_per_token": -3.8157869974772134, "logits_per_char": -1.0406691811301492, "num_chars": 11}, {"sum_logits": -13.653539657592773, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -23.050052642822266, "logits_per_token": -4.551179885864258, "logits_per_char": -0.6826769828796386, "num_chars": 20}, {"sum_logits": -12.747320175170898, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -12.747320175170898, "logits_per_char": -2.124553362528483, "num_chars": 6}, {"sum_logits": -7.523318290710449, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.22238540649414, "logits_per_token": -7.523318290710449, "logits_per_char": -0.7523318290710449, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 767, "native_id": "d6ff2d749494d89e9c7a53f587c519f4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.8199001550674438, "incorrect_loss_raw": 10.255833983421326, "correct_loss_per_char": 0.11712859358106341, "incorrect_loss_per_char": 1.5335694135181488, "correct_loss_per_token": 0.8199001550674438, "incorrect_loss_per_token": 10.255833983421326, "correct_loss_uncond": -12.867884039878845, "incorrect_loss_uncond": -2.978082537651062}, "model_output": [{"sum_logits": -7.580634593963623, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.276521682739258, "logits_per_token": -7.580634593963623, "logits_per_char": -0.8422927326626248, "num_chars": 9}, {"sum_logits": -0.8199001550674438, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": true, "sum_logits_uncond": -13.687784194946289, "logits_per_token": -0.8199001550674438, "logits_per_char": -0.11712859358106341, "num_chars": 7}, {"sum_logits": -13.021758079528809, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.791247367858887, "logits_per_token": -13.021758079528809, "logits_per_char": -2.1702930132548013, "num_chars": 6}, {"sum_logits": -8.585400581359863, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.31299877166748, "logits_per_token": -8.585400581359863, "logits_per_char": -1.4309000968933105, "num_chars": 6}, {"sum_logits": -11.835542678833008, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.554898262023926, "logits_per_token": -11.835542678833008, "logits_per_char": -1.6907918112618583, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 768, "native_id": "6974d215428a974641c1df18678522f5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 13.567275047302246, "incorrect_loss_raw": 15.21909499168396, "correct_loss_per_char": 1.507475005255805, "incorrect_loss_per_char": 1.1787271658023755, "correct_loss_per_token": 6.783637523651123, "incorrect_loss_per_token": 7.123865048090616, "correct_loss_uncond": -11.561188697814941, "incorrect_loss_uncond": -4.963939905166626}, "model_output": [{"sum_logits": -17.682518005371094, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.405588150024414, "logits_per_token": -8.841259002685547, "logits_per_char": -1.3601936927208533, "num_chars": 13}, {"sum_logits": -11.656378746032715, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.401235580444336, "logits_per_token": -3.885459582010905, "logits_per_char": -0.8966445189255935, "num_chars": 13}, {"sum_logits": -14.395866394042969, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.214157104492188, "logits_per_token": -7.197933197021484, "logits_per_char": -0.8997416496276855, "num_chars": 16}, {"sum_logits": -17.141616821289062, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.711158752441406, "logits_per_token": -8.570808410644531, "logits_per_char": -1.5583288019353694, "num_chars": 11}, {"sum_logits": -13.567275047302246, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -25.128463745117188, "logits_per_token": -6.783637523651123, "logits_per_char": -1.507475005255805, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 769, "native_id": "b94a9764acff078b52a9cbae04661dc9", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 14.025333404541016, "incorrect_loss_raw": 13.40200924873352, "correct_loss_per_char": 1.4025333404541016, "incorrect_loss_per_char": 1.131968760957905, "correct_loss_per_token": 7.012666702270508, "incorrect_loss_per_token": 6.70100462436676, "correct_loss_uncond": -8.869977951049805, "incorrect_loss_uncond": -3.9535672664642334}, "model_output": [{"sum_logits": -14.025333404541016, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.89531135559082, "logits_per_token": -7.012666702270508, "logits_per_char": -1.4025333404541016, "num_chars": 10}, {"sum_logits": -14.859511375427246, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.413597106933594, "logits_per_token": -7.429755687713623, "logits_per_char": -1.4859511375427246, "num_chars": 10}, {"sum_logits": -9.017529487609863, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.56960678100586, "logits_per_token": -4.508764743804932, "logits_per_char": -0.9017529487609863, "num_chars": 10}, {"sum_logits": -13.766411781311035, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.279050827026367, "logits_per_token": -6.883205890655518, "logits_per_char": -0.8097889283124138, "num_chars": 17}, {"sum_logits": -15.964584350585938, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.160051345825195, "logits_per_token": -7.982292175292969, "logits_per_char": -1.3303820292154949, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 770, "native_id": "80930e9df9ac4ad752749a54e7fc124f_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.0795392990112305, "incorrect_loss_raw": 7.349201798439026, "correct_loss_per_char": 0.5899616082509359, "incorrect_loss_per_char": 0.8482319990126024, "correct_loss_per_token": 7.0795392990112305, "incorrect_loss_per_token": 6.650323390960693, "correct_loss_uncond": -8.484447479248047, "incorrect_loss_uncond": -7.441829085350037}, "model_output": [{"sum_logits": -4.125543117523193, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -4.125543117523193, "logits_per_char": -0.8251086235046386, "num_chars": 5}, {"sum_logits": -7.0795392990112305, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.563986778259277, "logits_per_token": -7.0795392990112305, "logits_per_char": -0.5899616082509359, "num_chars": 12}, {"sum_logits": -13.358963012695312, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.08862590789795, "logits_per_token": -13.358963012695312, "logits_per_char": -1.3358963012695313, "num_chars": 10}, {"sum_logits": -5.59102725982666, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.928295135498047, "logits_per_token": -2.79551362991333, "logits_per_char": -0.32888395646039176, "num_chars": 17}, {"sum_logits": -6.3212738037109375, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.892474174499512, "logits_per_token": -6.3212738037109375, "logits_per_char": -0.9030391148158482, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 771, "native_id": "3310b5b24f03d67179fababf9ae95144", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.300266265869141, "incorrect_loss_raw": 11.531468868255615, "correct_loss_per_char": 1.4600532531738282, "incorrect_loss_per_char": 1.1412298696351348, "correct_loss_per_token": 7.300266265869141, "incorrect_loss_per_token": 7.634537935256958, "correct_loss_uncond": -3.975531578063965, "incorrect_loss_uncond": -3.5682389736175537}, "model_output": [{"sum_logits": -15.149236679077148, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -15.407781600952148, "logits_per_token": -7.574618339538574, "logits_per_char": -1.683248519897461, "num_chars": 9}, {"sum_logits": -16.02621078491211, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.36071014404297, "logits_per_token": -8.013105392456055, "logits_per_char": -0.9427182814654183, "num_chars": 17}, {"sum_logits": -7.300266265869141, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -11.275797843933105, "logits_per_token": -7.300266265869141, "logits_per_char": -1.4600532531738282, "num_chars": 5}, {"sum_logits": -7.653661727905273, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -11.229242324829102, "logits_per_token": -7.653661727905273, "logits_per_char": -1.2756102879842122, "num_chars": 6}, {"sum_logits": -7.29676628112793, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -13.401097297668457, "logits_per_token": -7.29676628112793, "logits_per_char": -0.6633423891934481, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 772, "native_id": "846bc47ced7119ad2ee19a8780d7fe18", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.337273597717285, "incorrect_loss_raw": 12.673466920852661, "correct_loss_per_char": 1.867454719543457, "incorrect_loss_per_char": 0.9167913917855267, "correct_loss_per_token": 9.337273597717285, "incorrect_loss_per_token": 4.952215313911438, "correct_loss_uncond": -5.075187683105469, "incorrect_loss_uncond": -8.382570505142212}, "model_output": [{"sum_logits": -22.15229034423828, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -28.43642807006836, "logits_per_token": -5.53807258605957, "logits_per_char": -0.8860916137695313, "num_chars": 25}, {"sum_logits": -8.552309036254883, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.980607986450195, "logits_per_token": -4.276154518127441, "logits_per_char": -1.2217584337506975, "num_chars": 7}, {"sum_logits": -10.637990951538086, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.99103546142578, "logits_per_token": -5.318995475769043, "logits_per_char": -0.7091993967692057, "num_chars": 15}, {"sum_logits": -9.337273597717285, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.412461280822754, "logits_per_token": -9.337273597717285, "logits_per_char": -1.867454719543457, "num_chars": 5}, {"sum_logits": -9.351277351379395, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.816078186035156, "logits_per_token": -4.675638675689697, "logits_per_char": -0.8501161228526722, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 773, "native_id": "fd5a34e94303d7fd343de2a8f36943d5", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.90119743347168, "incorrect_loss_raw": 9.44672030210495, "correct_loss_per_char": 0.7417664527893066, "incorrect_loss_per_char": 1.1744023892614577, "correct_loss_per_token": 4.45059871673584, "incorrect_loss_per_token": 4.947924792766571, "correct_loss_uncond": -11.340299606323242, "incorrect_loss_uncond": -5.856036007404327}, "model_output": [{"sum_logits": -14.570798873901367, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -20.626497268676758, "logits_per_token": -4.856932957967122, "logits_per_char": -1.0407713481358118, "num_chars": 14}, {"sum_logits": -12.421974182128906, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -15.884847640991211, "logits_per_token": -4.140658060709636, "logits_per_char": -1.3802193535698786, "num_chars": 9}, {"sum_logits": -3.937809705734253, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -12.858729362487793, "logits_per_token": -3.937809705734253, "logits_per_char": -0.5625442436763218, "num_chars": 7}, {"sum_logits": -6.856298446655273, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -11.840950965881348, "logits_per_token": -6.856298446655273, "logits_per_char": -1.7140746116638184, "num_chars": 4}, {"sum_logits": -8.90119743347168, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -20.241497039794922, "logits_per_token": -4.45059871673584, "logits_per_char": -0.7417664527893066, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 774, "native_id": "4e87db4771f2d6423034935446e3fff1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.654964447021484, "incorrect_loss_raw": 11.810028910636902, "correct_loss_per_char": 0.6182117462158203, "incorrect_loss_per_char": 0.9154059476322598, "correct_loss_per_token": 4.327482223510742, "incorrect_loss_per_token": 7.913532137870789, "correct_loss_uncond": -9.909099578857422, "incorrect_loss_uncond": -4.496010661125183}, "model_output": [{"sum_logits": -9.590513229370117, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.022502899169922, "logits_per_token": -9.590513229370117, "logits_per_char": -1.065612581041124, "num_chars": 9}, {"sum_logits": -8.654964447021484, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.564064025878906, "logits_per_token": -4.327482223510742, "logits_per_char": -0.6182117462158203, "num_chars": 14}, {"sum_logits": -18.34648895263672, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -17.042081832885742, "logits_per_token": -9.17324447631836, "logits_per_char": -1.146655559539795, "num_chars": 16}, {"sum_logits": -6.477628231048584, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.412413597106934, "logits_per_token": -6.477628231048584, "logits_per_char": -0.6477628231048584, "num_chars": 10}, {"sum_logits": -12.825485229492188, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.747159957885742, "logits_per_token": -6.412742614746094, "logits_per_char": -0.8015928268432617, "num_chars": 16}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 775, "native_id": "a585df0818180ce3c06f963a4c3c810a", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.714303016662598, "incorrect_loss_raw": 9.154334545135498, "correct_loss_per_char": 1.7142878770828247, "incorrect_loss_per_char": 0.8979574671158423, "correct_loss_per_token": 6.857151508331299, "incorrect_loss_per_token": 8.28471451997757, "correct_loss_uncond": -6.638077735900879, "incorrect_loss_uncond": -4.7756712436676025}, "model_output": [{"sum_logits": -13.714303016662598, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.352380752563477, "logits_per_token": -6.857151508331299, "logits_per_char": -1.7142878770828247, "num_chars": 8}, {"sum_logits": -7.873934268951416, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -12.348342895507812, "logits_per_token": -7.873934268951416, "logits_per_char": -1.124847752707345, "num_chars": 7}, {"sum_logits": -6.956960201263428, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -16.393335342407227, "logits_per_token": -3.478480100631714, "logits_per_char": -0.4969257286616734, "num_chars": 14}, {"sum_logits": -9.038812637329102, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -13.202109336853027, "logits_per_token": -9.038812637329102, "logits_per_char": -0.6952932797945462, "num_chars": 13}, {"sum_logits": -12.747631072998047, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -13.776235580444336, "logits_per_token": -12.747631072998047, "logits_per_char": -1.2747631072998047, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 776, "native_id": "c9f7d07e6d363a99f5fadd68a4dfa35a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.712935447692871, "incorrect_loss_raw": 9.155580997467041, "correct_loss_per_char": 0.4794953891209194, "incorrect_loss_per_char": 1.744749977191289, "correct_loss_per_token": 3.3564677238464355, "incorrect_loss_per_token": 9.155580997467041, "correct_loss_uncond": -9.680399894714355, "incorrect_loss_uncond": -4.0659499168396}, "model_output": [{"sum_logits": -10.283234596252441, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.02552318572998, "logits_per_token": -10.283234596252441, "logits_per_char": -2.5708086490631104, "num_chars": 4}, {"sum_logits": -6.712935447692871, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.393335342407227, "logits_per_token": -3.3564677238464355, "logits_per_char": -0.4794953891209194, "num_chars": 14}, {"sum_logits": -10.195353507995605, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.682783126831055, "logits_per_token": -10.195353507995605, "logits_per_char": -2.039070701599121, "num_chars": 5}, {"sum_logits": -7.716050148010254, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.629175186157227, "logits_per_token": -7.716050148010254, "logits_per_char": -0.9645062685012817, "num_chars": 8}, {"sum_logits": -8.427685737609863, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.5486421585083, "logits_per_token": -8.427685737609863, "logits_per_char": -1.4046142896016438, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 777, "native_id": "c7cb327fa4c0008efaa7741081a365d4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 18.092777252197266, "incorrect_loss_raw": 12.69645380973816, "correct_loss_per_char": 1.5077314376831055, "incorrect_loss_per_char": 1.2877098586824207, "correct_loss_per_token": 6.030925750732422, "incorrect_loss_per_token": 7.077171325683594, "correct_loss_uncond": -9.50439453125, "incorrect_loss_uncond": -5.347034215927124}, "model_output": [{"sum_logits": -20.101369857788086, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.308223724365234, "logits_per_token": -10.050684928894043, "logits_per_char": -1.3400913238525392, "num_chars": 15}, {"sum_logits": -9.314105033874512, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.431859970092773, "logits_per_token": -9.314105033874512, "logits_per_char": -1.5523508389790852, "num_chars": 6}, {"sum_logits": -10.922691345214844, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.364959716796875, "logits_per_token": -5.461345672607422, "logits_per_char": -1.2136323716905382, "num_chars": 9}, {"sum_logits": -10.447649002075195, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -23.06890869140625, "logits_per_token": -3.4825496673583984, "logits_per_char": -1.0447649002075194, "num_chars": 10}, {"sum_logits": -18.092777252197266, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -27.597171783447266, "logits_per_token": -6.030925750732422, "logits_per_char": -1.5077314376831055, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 778, "native_id": "c54ddc0f9d170ba65d9f4f2e0bb41d1c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.284580945968628, "incorrect_loss_raw": 13.047869324684143, "correct_loss_per_char": 0.3807634909947713, "incorrect_loss_per_char": 1.2247703253691635, "correct_loss_per_token": 2.284580945968628, "incorrect_loss_per_token": 8.264787634213766, "correct_loss_uncond": -13.05739426612854, "incorrect_loss_uncond": -3.830617070198059}, "model_output": [{"sum_logits": -2.284580945968628, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.341975212097168, "logits_per_token": -2.284580945968628, "logits_per_char": -0.3807634909947713, "num_chars": 6}, {"sum_logits": -6.7692694664001465, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.453845024108887, "logits_per_token": -6.7692694664001465, "logits_per_char": -1.3538538932800293, "num_chars": 5}, {"sum_logits": -12.260394096374512, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -12.260394096374512, "logits_per_char": -1.0216995080312092, "num_chars": 12}, {"sum_logits": -17.853294372558594, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -16.048660278320312, "logits_per_token": -8.926647186279297, "logits_per_char": -1.6230267611416904, "num_chars": 11}, {"sum_logits": -15.30851936340332, "num_tokens": 3, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -22.853904724121094, "logits_per_token": -5.1028397878011065, "logits_per_char": -0.9005011390237248, "num_chars": 17}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 779, "native_id": "1729c737ff92cf558efecde2c6cafc5e", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 16.052562713623047, "incorrect_loss_raw": 14.279387474060059, "correct_loss_per_char": 1.234812516432542, "incorrect_loss_per_char": 1.1202339622709485, "correct_loss_per_token": 4.013140678405762, "incorrect_loss_per_token": 8.49567683537801, "correct_loss_uncond": -3.001667022705078, "incorrect_loss_uncond": -3.9605724811553955}, "model_output": [{"sum_logits": -22.798076629638672, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -24.732065200805664, "logits_per_token": -7.599358876546224, "logits_per_char": -1.266559812757704, "num_chars": 18}, {"sum_logits": -11.904187202453613, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.521671295166016, "logits_per_token": -3.968062400817871, "logits_per_char": -0.6613437334696451, "num_chars": 18}, {"sum_logits": -16.052562713623047, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.054229736328125, "logits_per_token": -4.013140678405762, "logits_per_char": -1.234812516432542, "num_chars": 13}, {"sum_logits": -9.955138206481934, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.220978736877441, "logits_per_token": -9.955138206481934, "logits_per_char": -0.9955138206481934, "num_chars": 10}, {"sum_logits": -12.460147857666016, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.485124588012695, "logits_per_token": -12.460147857666016, "logits_per_char": -1.557518482208252, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 780, "native_id": "19dfd55e967dacd6f5700a62c1e14eee", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.555746078491211, "incorrect_loss_raw": 7.609161853790283, "correct_loss_per_char": 0.07408314659481957, "incorrect_loss_per_char": 0.7751050722031366, "correct_loss_per_token": 0.5185820261637369, "incorrect_loss_per_token": 5.020186344782511, "correct_loss_uncond": -19.366758346557617, "incorrect_loss_uncond": -9.056171894073486}, "model_output": [{"sum_logits": -4.592437744140625, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.388150215148926, "logits_per_token": -4.592437744140625, "logits_per_char": -0.918487548828125, "num_chars": 5}, {"sum_logits": -7.7827911376953125, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.82883071899414, "logits_per_token": -2.594263712565104, "logits_per_char": -0.38913955688476565, "num_chars": 20}, {"sum_logits": -7.7266693115234375, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -7.7266693115234375, "logits_per_char": -1.1038099016462053, "num_chars": 7}, {"sum_logits": -1.555746078491211, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -20.922504425048828, "logits_per_token": -0.5185820261637369, "logits_per_char": -0.07408314659481957, "num_chars": 21}, {"sum_logits": -10.334749221801758, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.881364822387695, "logits_per_token": -5.167374610900879, "logits_per_char": -0.6889832814534506, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 781, "native_id": "b9bed83138901f4a45041b02c5b242c1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.864626884460449, "incorrect_loss_raw": 11.38461446762085, "correct_loss_per_char": 0.34747334889003206, "incorrect_loss_per_char": 1.615301922040108, "correct_loss_per_token": 2.4323134422302246, "incorrect_loss_per_token": 7.059280037879944, "correct_loss_uncond": -13.614109992980957, "incorrect_loss_uncond": -5.065119028091431}, "model_output": [{"sum_logits": -11.433565139770508, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.090980529785156, "logits_per_token": -11.433565139770508, "logits_per_char": -2.2867130279541015, "num_chars": 5}, {"sum_logits": -4.864626884460449, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.478736877441406, "logits_per_token": -2.4323134422302246, "logits_per_char": -0.34747334889003206, "num_chars": 14}, {"sum_logits": -5.375641822814941, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.526021003723145, "logits_per_token": -5.375641822814941, "logits_per_char": -0.5972935358683268, "num_chars": 9}, {"sum_logits": -17.62027359008789, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -21.469478607177734, "logits_per_token": -5.873424530029297, "logits_per_char": -1.3554056607759917, "num_chars": 13}, {"sum_logits": -11.108977317810059, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.712453842163086, "logits_per_token": -5.554488658905029, "logits_per_char": -2.2217954635620116, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 782, "native_id": "b9d22425a3d5810be9528a55245c8f09", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.466024398803711, "incorrect_loss_raw": 10.152936339378357, "correct_loss_per_char": 1.2932048797607423, "incorrect_loss_per_char": 0.9843458784951105, "correct_loss_per_token": 6.466024398803711, "incorrect_loss_per_token": 7.533696889877319, "correct_loss_uncond": -7.256833076477051, "incorrect_loss_uncond": -5.743436217308044}, "model_output": [{"sum_logits": -7.310190677642822, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.133002281188965, "logits_per_token": -7.310190677642822, "logits_per_char": -1.0443129539489746, "num_chars": 7}, {"sum_logits": -6.466024398803711, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.722857475280762, "logits_per_token": -6.466024398803711, "logits_per_char": -1.2932048797607423, "num_chars": 5}, {"sum_logits": -12.058311462402344, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.98815155029297, "logits_per_token": -6.029155731201172, "logits_per_char": -0.6699061923556857, "num_chars": 18}, {"sum_logits": -8.895604133605957, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.671432495117188, "logits_per_token": -4.4478020668029785, "logits_per_char": -0.9884004592895508, "num_chars": 9}, {"sum_logits": -12.347639083862305, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.792903900146484, "logits_per_token": -12.347639083862305, "logits_per_char": -1.2347639083862305, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 783, "native_id": "2af70107e04e61e3c7884bc743901c02", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.52714729309082, "incorrect_loss_raw": 8.277431964874268, "correct_loss_per_char": 0.41155884482643823, "incorrect_loss_per_char": 0.9014835357666016, "correct_loss_per_token": 4.52714729309082, "incorrect_loss_per_token": 6.3307026624679565, "correct_loss_uncond": -8.55500316619873, "incorrect_loss_uncond": -6.8108131885528564}, "model_output": [{"sum_logits": -7.543103218078613, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -17.062847137451172, "logits_per_token": -3.7715516090393066, "logits_per_char": -0.6857366561889648, "num_chars": 11}, {"sum_logits": -4.52714729309082, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -13.08215045928955, "logits_per_token": -4.52714729309082, "logits_per_char": -0.41155884482643823, "num_chars": 11}, {"sum_logits": -12.6265869140625, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -13.98448371887207, "logits_per_token": -12.6265869140625, "logits_per_char": -1.4029541015625, "num_chars": 9}, {"sum_logits": -4.909306526184082, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -13.375931739807129, "logits_per_token": -4.909306526184082, "logits_per_char": -0.9818613052368164, "num_chars": 5}, {"sum_logits": -8.030731201171875, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -15.929718017578125, "logits_per_token": -4.0153656005859375, "logits_per_char": -0.535382080078125, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 784, "native_id": "be2cb9c96069ac355a7ccef262743d14", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.2437164783477783, "incorrect_loss_raw": 11.269195795059204, "correct_loss_per_char": 0.14958109855651855, "incorrect_loss_per_char": 1.1143550017523387, "correct_loss_per_token": 1.1218582391738892, "incorrect_loss_per_token": 8.512481927871704, "correct_loss_uncond": -18.132294416427612, "incorrect_loss_uncond": -4.303882122039795}, "model_output": [{"sum_logits": -10.55769157409668, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.463623046875, "logits_per_token": -10.55769157409668, "logits_per_char": -1.1730768415662978, "num_chars": 9}, {"sum_logits": -2.2437164783477783, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.37601089477539, "logits_per_token": -1.1218582391738892, "logits_per_char": -0.14958109855651855, "num_chars": 15}, {"sum_logits": -15.05501937866211, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.99239158630371, "logits_per_token": -7.527509689331055, "logits_per_char": -1.003667958577474, "num_chars": 15}, {"sum_logits": -6.998691558837891, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.393335342407227, "logits_per_token": -3.4993457794189453, "logits_per_char": -0.4999065399169922, "num_chars": 14}, {"sum_logits": -12.465380668640137, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.442961692810059, "logits_per_token": -12.465380668640137, "logits_per_char": -1.780768666948591, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 785, "native_id": "799e48ec7fb16415c8f82828c5761ed1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.665092468261719, "incorrect_loss_raw": 16.350343465805054, "correct_loss_per_char": 0.5150084062056108, "incorrect_loss_per_char": 1.134377872696435, "correct_loss_per_token": 5.665092468261719, "incorrect_loss_per_token": 7.80989412466685, "correct_loss_uncond": -8.070916175842285, "incorrect_loss_uncond": -2.543605089187622}, "model_output": [{"sum_logits": -13.714651107788086, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.5659122467041, "logits_per_token": -6.857325553894043, "logits_per_char": -0.9796179362705776, "num_chars": 14}, {"sum_logits": -5.665092468261719, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.736008644104004, "logits_per_token": -5.665092468261719, "logits_per_char": -0.5150084062056108, "num_chars": 11}, {"sum_logits": -22.641324996948242, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.544958114624023, "logits_per_token": -7.547108332316081, "logits_per_char": -1.741640384380634, "num_chars": 13}, {"sum_logits": -16.28034019470215, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.978012084960938, "logits_per_token": -4.070085048675537, "logits_per_char": -0.9044633441501193, "num_chars": 18}, {"sum_logits": -12.765057563781738, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.48691177368164, "logits_per_token": -12.765057563781738, "logits_per_char": -0.9117898259844098, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 786, "native_id": "a5db1e9677af118deb8e4add8bc18db2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.404190063476562, "incorrect_loss_raw": 17.090235471725464, "correct_loss_per_char": 0.8670158386230469, "incorrect_loss_per_char": 0.988669922498577, "correct_loss_per_token": 3.4680633544921875, "incorrect_loss_per_token": 4.1586841798964, "correct_loss_uncond": -10.176490783691406, "incorrect_loss_uncond": -7.866879940032959}, "model_output": [{"sum_logits": -8.394518852233887, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.380178451538086, "logits_per_token": -4.197259426116943, "logits_per_char": -0.7631380774758079, "num_chars": 11}, {"sum_logits": -45.090850830078125, "num_tokens": 7, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -42.42943572998047, "logits_per_token": -6.441550118582589, "logits_per_char": -1.960471775220788, "num_chars": 23}, {"sum_logits": -6.224418640136719, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.943564414978027, "logits_per_token": -3.1122093200683594, "logits_per_char": -0.4446013314383371, "num_chars": 14}, {"sum_logits": -10.404190063476562, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.58068084716797, "logits_per_token": -3.4680633544921875, "logits_per_char": -0.8670158386230469, "num_chars": 12}, {"sum_logits": -8.651153564453125, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.07528305053711, "logits_per_token": -2.8837178548177085, "logits_per_char": -0.786468505859375, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 787, "native_id": "28357ebf85f8bb82b6a3210c4397e0aa", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.427609443664551, "incorrect_loss_raw": 8.611506700515747, "correct_loss_per_char": 0.5843281312422319, "incorrect_loss_per_char": 0.9152349455683839, "correct_loss_per_token": 2.142536481221517, "incorrect_loss_per_token": 4.904221773147583, "correct_loss_uncond": -7.951478004455566, "incorrect_loss_uncond": -6.801597356796265}, "model_output": [{"sum_logits": -9.23802661895752, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.148038864135742, "logits_per_token": -3.0793422063191733, "logits_per_char": -0.5434133305269129, "num_chars": 17}, {"sum_logits": -7.304165840148926, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.586409568786621, "logits_per_token": -7.304165840148926, "logits_per_char": -1.2173609733581543, "num_chars": 6}, {"sum_logits": -4.898151397705078, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -4.898151397705078, "logits_per_char": -0.8163585662841797, "num_chars": 6}, {"sum_logits": -6.427609443664551, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.379087448120117, "logits_per_token": -2.142536481221517, "logits_per_char": -0.5843281312422319, "num_chars": 11}, {"sum_logits": -13.005682945251465, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.0283145904541, "logits_per_token": -4.335227648417155, "logits_per_char": -1.0838069121042888, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 788, "native_id": "7b95825a19d6930d6aed35c7c57a2d82", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.247386932373047, "incorrect_loss_raw": 10.493792057037354, "correct_loss_per_char": 0.5618467330932617, "incorrect_loss_per_char": 1.6470894813537598, "correct_loss_per_token": 2.247386932373047, "incorrect_loss_per_token": 10.493792057037354, "correct_loss_uncond": -10.688834190368652, "incorrect_loss_uncond": -2.5320348739624023}, "model_output": [{"sum_logits": -9.256111145019531, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.520729064941406, "logits_per_token": -9.256111145019531, "logits_per_char": -1.3223015921456474, "num_chars": 7}, {"sum_logits": -11.26761245727539, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.048408508300781, "logits_per_token": -11.26761245727539, "logits_per_char": -1.609658922467913, "num_chars": 7}, {"sum_logits": -10.358343124389648, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -11.641695976257324, "logits_per_token": -10.358343124389648, "logits_per_char": -2.0716686248779297, "num_chars": 5}, {"sum_logits": -2.247386932373047, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.9362211227417, "logits_per_token": -2.247386932373047, "logits_per_char": -0.5618467330932617, "num_chars": 4}, {"sum_logits": -11.093101501464844, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.892474174499512, "logits_per_token": -11.093101501464844, "logits_per_char": -1.584728785923549, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 789, "native_id": "6b270159bd402ddd498a38153f9d1efe", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.230673789978027, "incorrect_loss_raw": 9.341613292694092, "correct_loss_per_char": 0.8900962557111468, "incorrect_loss_per_char": 1.1404766334599747, "correct_loss_per_token": 6.230673789978027, "incorrect_loss_per_token": 8.493134021759033, "correct_loss_uncond": -7.983884811401367, "incorrect_loss_uncond": -6.555392503738403}, "model_output": [{"sum_logits": -6.787834167480469, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.576881408691406, "logits_per_token": -3.3939170837402344, "logits_per_char": -0.9696905953543526, "num_chars": 7}, {"sum_logits": -11.216650009155273, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.968234062194824, "logits_per_token": -11.216650009155273, "logits_per_char": -1.0196954553777522, "num_chars": 11}, {"sum_logits": -6.094465255737305, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.495186805725098, "logits_per_token": -6.094465255737305, "logits_per_char": -0.6771628061930338, "num_chars": 9}, {"sum_logits": -13.26750373840332, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.547720909118652, "logits_per_token": -13.26750373840332, "logits_per_char": -1.89535767691476, "num_chars": 7}, {"sum_logits": -6.230673789978027, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.214558601379395, "logits_per_token": -6.230673789978027, "logits_per_char": -0.8900962557111468, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 790, "native_id": "eae0e03773365064ce915603c7addc91", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.3058648109436035, "incorrect_loss_raw": 14.043289184570312, "correct_loss_per_char": 0.23613320078168595, "incorrect_loss_per_char": 1.0412792005037006, "correct_loss_per_token": 1.6529324054718018, "incorrect_loss_per_token": 5.892254869143168, "correct_loss_uncond": -11.539684772491455, "incorrect_loss_uncond": -6.087803840637207}, "model_output": [{"sum_logits": -3.3058648109436035, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.845549583435059, "logits_per_token": -1.6529324054718018, "logits_per_char": -0.23613320078168595, "num_chars": 14}, {"sum_logits": -21.097505569458008, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -24.52056121826172, "logits_per_token": -10.548752784729004, "logits_per_char": -1.4065003712972006, "num_chars": 15}, {"sum_logits": -15.012435913085938, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.910371780395508, "logits_per_token": -5.0041453043619795, "logits_per_char": -1.0008290608723958, "num_chars": 15}, {"sum_logits": -8.061944961547852, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.955930709838867, "logits_per_token": -2.015486240386963, "logits_per_char": -0.42431289271304484, "num_chars": 19}, {"sum_logits": -12.001270294189453, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.137508392333984, "logits_per_token": -6.000635147094727, "logits_per_char": -1.3334744771321614, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 791, "native_id": "a5ca7c89196e54938b5827814d0071d4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 12.799531936645508, "incorrect_loss_raw": 10.253275871276855, "correct_loss_per_char": 0.9845793797419622, "incorrect_loss_per_char": 0.9727628975203543, "correct_loss_per_token": 4.266510645548503, "incorrect_loss_per_token": 4.622441291809082, "correct_loss_uncond": -10.141481399536133, "incorrect_loss_uncond": -8.19644021987915}, "model_output": [{"sum_logits": -12.100719451904297, "num_tokens": 3, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -17.87074089050293, "logits_per_token": -4.033573150634766, "logits_per_char": -1.0083932876586914, "num_chars": 12}, {"sum_logits": -11.38338851928711, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -21.37042236328125, "logits_per_token": -5.691694259643555, "logits_per_char": -1.0348535017533735, "num_chars": 11}, {"sum_logits": -8.541472434997559, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -16.256288528442383, "logits_per_token": -4.270736217498779, "logits_per_char": -0.9490524927775065, "num_chars": 9}, {"sum_logits": -8.987523078918457, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -18.30141258239746, "logits_per_token": -4.4937615394592285, "logits_per_char": -0.8987523078918457, "num_chars": 10}, {"sum_logits": -12.799531936645508, "num_tokens": 3, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -22.94101333618164, "logits_per_token": -4.266510645548503, "logits_per_char": -0.9845793797419622, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 792, "native_id": "ffc3461d437a1c6c22d1c4f6439ebd9c", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.9505574703216553, "incorrect_loss_raw": 15.602559804916382, "correct_loss_per_char": 0.2438196837902069, "incorrect_loss_per_char": 1.8900061193694415, "correct_loss_per_token": 1.9505574703216553, "incorrect_loss_per_token": 10.827805757522583, "correct_loss_uncond": -10.478860139846802, "incorrect_loss_uncond": -0.8985898494720459}, "model_output": [{"sum_logits": -11.190418243408203, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.953031539916992, "logits_per_token": -11.190418243408203, "logits_per_char": -1.8650697072347004, "num_chars": 6}, {"sum_logits": -13.021788597106934, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.375890731811523, "logits_per_token": -13.021788597106934, "logits_per_char": -2.1702980995178223, "num_chars": 6}, {"sum_logits": -17.150634765625, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.789621353149414, "logits_per_token": -8.5753173828125, "logits_per_char": -1.9056260850694444, "num_chars": 9}, {"sum_logits": -1.9505574703216553, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.429417610168457, "logits_per_token": -1.9505574703216553, "logits_per_char": -0.2438196837902069, "num_chars": 8}, {"sum_logits": -21.04739761352539, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.88605499267578, "logits_per_token": -10.523698806762695, "logits_per_char": -1.6190305856557994, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 793, "native_id": "aa2dcd9bcce5e4445bd3bacbf0bb11d3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.977085590362549, "incorrect_loss_raw": 10.847155928611755, "correct_loss_per_char": 0.5681550843375069, "incorrect_loss_per_char": 1.0511032859484355, "correct_loss_per_token": 3.977085590362549, "incorrect_loss_per_token": 6.366256475448608, "correct_loss_uncond": -10.680206775665283, "incorrect_loss_uncond": -5.475520968437195}, "model_output": [{"sum_logits": -7.541428089141846, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.193785667419434, "logits_per_token": -7.541428089141846, "logits_per_char": -1.0773468698774065, "num_chars": 7}, {"sum_logits": -9.467832565307617, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.393335342407227, "logits_per_token": -4.733916282653809, "logits_per_char": -0.6762737546648298, "num_chars": 14}, {"sum_logits": -3.977085590362549, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.657292366027832, "logits_per_token": -3.977085590362549, "logits_per_char": -0.5681550843375069, "num_chars": 7}, {"sum_logits": -15.150735855102539, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.293781280517578, "logits_per_token": -7.5753679275512695, "logits_per_char": -1.515073585510254, "num_chars": 10}, {"sum_logits": -11.22862720489502, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.409805297851562, "logits_per_token": -5.61431360244751, "logits_per_char": -0.9357189337412516, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 794, "native_id": "6cc797ec148c1fc74592957a55bd0951", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.035923480987549, "incorrect_loss_raw": 8.70481812953949, "correct_loss_per_char": 0.4196602900822957, "incorrect_loss_per_char": 0.8002482308281792, "correct_loss_per_token": 2.5179617404937744, "incorrect_loss_per_token": 7.141958832740784, "correct_loss_uncond": -11.24925947189331, "incorrect_loss_uncond": -5.874590277671814}, "model_output": [{"sum_logits": -5.037693977355957, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.420660018920898, "logits_per_token": -2.5188469886779785, "logits_per_char": -0.559743775261773, "num_chars": 9}, {"sum_logits": -14.879281997680664, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.07396125793457, "logits_per_token": -14.879281997680664, "logits_per_char": -1.4879281997680665, "num_chars": 10}, {"sum_logits": -7.465180397033691, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.65135383605957, "logits_per_token": -3.7325901985168457, "logits_per_char": -0.6220983664194742, "num_chars": 12}, {"sum_logits": -7.4371161460876465, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.171658515930176, "logits_per_token": -7.4371161460876465, "logits_per_char": -0.5312225818634033, "num_chars": 14}, {"sum_logits": -5.035923480987549, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.28518295288086, "logits_per_token": -2.5179617404937744, "logits_per_char": -0.4196602900822957, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 795, "native_id": "64dbe5cb840ef4f1d25f8b68db8d5fed", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.985455513000488, "incorrect_loss_raw": 10.372758507728577, "correct_loss_per_char": 0.6985455513000488, "incorrect_loss_per_char": 1.1797957030510662, "correct_loss_per_token": 3.492727756500244, "incorrect_loss_per_token": 8.25691282749176, "correct_loss_uncond": -11.101040840148926, "incorrect_loss_uncond": -5.088573098182678}, "model_output": [{"sum_logits": -16.92676544189453, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -19.213878631591797, "logits_per_token": -8.463382720947266, "logits_per_char": -1.5387968583540483, "num_chars": 11}, {"sum_logits": -7.33845853805542, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.836979866027832, "logits_per_token": -7.33845853805542, "logits_per_char": -0.9173073172569275, "num_chars": 8}, {"sum_logits": -6.283795356750488, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -12.77853012084961, "logits_per_token": -6.283795356750488, "logits_per_char": -1.0472992261250813, "num_chars": 6}, {"sum_logits": -10.942014694213867, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.015937805175781, "logits_per_token": -10.942014694213867, "logits_per_char": -1.2157794104682074, "num_chars": 9}, {"sum_logits": -6.985455513000488, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -18.086496353149414, "logits_per_token": -3.492727756500244, "logits_per_char": -0.6985455513000488, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 796, "native_id": "a74753bf249c1cbcff632c5c16b0397b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.025160789489746, "incorrect_loss_raw": 10.43332028388977, "correct_loss_per_char": 0.5031450986862183, "incorrect_loss_per_char": 1.4756231279084178, "correct_loss_per_token": 4.025160789489746, "incorrect_loss_per_token": 8.0673987865448, "correct_loss_uncond": -7.897103309631348, "incorrect_loss_uncond": -6.610657453536987}, "model_output": [{"sum_logits": -10.3222017288208, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.278654098510742, "logits_per_token": -5.1611008644104, "logits_per_char": -0.9383819753473456, "num_chars": 11}, {"sum_logits": -11.38078498840332, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.672751426696777, "logits_per_token": -11.38078498840332, "logits_per_char": -1.89679749806722, "num_chars": 6}, {"sum_logits": -8.605170249938965, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.687454223632812, "logits_per_token": -4.302585124969482, "logits_per_char": -0.7822882045399059, "num_chars": 11}, {"sum_logits": -4.025160789489746, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.922264099121094, "logits_per_token": -4.025160789489746, "logits_per_char": -0.5031450986862183, "num_chars": 8}, {"sum_logits": -11.425124168395996, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.5370512008667, "logits_per_token": -11.425124168395996, "logits_per_char": -2.2850248336791994, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 797, "native_id": "9190efbd77fe10b989fcaae35e208a0f", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.356958389282227, "incorrect_loss_raw": 11.063151717185974, "correct_loss_per_char": 1.1696197986602783, "incorrect_loss_per_char": 0.9959094350859201, "correct_loss_per_token": 3.118986129760742, "incorrect_loss_per_token": 4.538489639759064, "correct_loss_uncond": -4.761652946472168, "incorrect_loss_uncond": -5.5887356996536255}, "model_output": [{"sum_logits": -9.356958389282227, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.118611335754395, "logits_per_token": -3.118986129760742, "logits_per_char": -1.1696197986602783, "num_chars": 8}, {"sum_logits": -6.940622806549072, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.253847122192383, "logits_per_token": -3.470311403274536, "logits_per_char": -0.867577850818634, "num_chars": 8}, {"sum_logits": -14.910629272460938, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.918708801269531, "logits_per_token": -4.9702097574869795, "logits_per_char": -1.3555117520419033, "num_chars": 11}, {"sum_logits": -8.923439979553223, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.089000701904297, "logits_per_token": -2.9744799931844077, "logits_per_char": -0.6373885699680873, "num_chars": 14}, {"sum_logits": -13.477914810180664, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -18.345993041992188, "logits_per_token": -6.738957405090332, "logits_per_char": -1.1231595675150554, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 798, "native_id": "ff0303db294a823d4138fb81a6ee6438", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.102952003479004, "incorrect_loss_raw": 9.72283411026001, "correct_loss_per_char": 0.5548138184980913, "incorrect_loss_per_char": 0.775790274143219, "correct_loss_per_token": 3.051476001739502, "incorrect_loss_per_token": 5.485662221908569, "correct_loss_uncond": -11.328150749206543, "incorrect_loss_uncond": -7.828977108001709}, "model_output": [{"sum_logits": -6.102952003479004, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.431102752685547, "logits_per_token": -3.051476001739502, "logits_per_char": -0.5548138184980913, "num_chars": 11}, {"sum_logits": -6.651209354400635, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.90507698059082, "logits_per_token": -6.651209354400635, "logits_per_char": -1.1085348924001057, "num_chars": 6}, {"sum_logits": -4.971744060516357, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.548974990844727, "logits_per_token": -1.6572480201721191, "logits_per_char": -0.4143120050430298, "num_chars": 12}, {"sum_logits": -17.850202560424805, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -23.20541000366211, "logits_per_token": -8.925101280212402, "logits_per_char": -0.9916779200236002, "num_chars": 18}, {"sum_logits": -9.418180465698242, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.54778289794922, "logits_per_token": -4.709090232849121, "logits_per_char": -0.5886362791061401, "num_chars": 16}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 799, "native_id": "63963c9c15835d451aac2e1e0b116388", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.389993667602539, "incorrect_loss_raw": 10.804122686386108, "correct_loss_per_char": 0.9128562382289341, "incorrect_loss_per_char": 1.105376434919638, "correct_loss_per_token": 6.389993667602539, "incorrect_loss_per_token": 5.6538327534993496, "correct_loss_uncond": -9.26509952545166, "incorrect_loss_uncond": -5.3736231327056885}, "model_output": [{"sum_logits": -10.365301132202148, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.76177406311035, "logits_per_token": -3.4551003774007163, "logits_per_char": -1.4807573046003069, "num_chars": 7}, {"sum_logits": -6.389993667602539, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.6550931930542, "logits_per_token": -6.389993667602539, "logits_per_char": -0.9128562382289341, "num_chars": 7}, {"sum_logits": -6.874107360839844, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.016243934631348, "logits_per_token": -6.874107360839844, "logits_per_char": -1.1456845601399739, "num_chars": 6}, {"sum_logits": -7.722470283508301, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.594550132751465, "logits_per_token": -7.722470283508301, "logits_per_char": -0.9653087854385376, "num_chars": 8}, {"sum_logits": -18.25461196899414, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.338415145874023, "logits_per_token": -4.563652992248535, "logits_per_char": -0.8297550894997336, "num_chars": 22}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 800, "native_id": "cc8324b73ed9625e723ef041dfc77a37", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.871566772460938, "incorrect_loss_raw": 12.94339919090271, "correct_loss_per_char": 1.478594462076823, "incorrect_loss_per_char": 1.2360631527838768, "correct_loss_per_token": 8.871566772460938, "incorrect_loss_per_token": 8.918710231781006, "correct_loss_uncond": -2.813887596130371, "incorrect_loss_uncond": -3.6999309062957764}, "model_output": [{"sum_logits": -11.747499465942383, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.920188903808594, "logits_per_token": -5.873749732971191, "logits_per_char": -1.067954496903853, "num_chars": 11}, {"sum_logits": -10.686070442199707, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.657332420349121, "logits_per_token": -10.686070442199707, "logits_per_char": -1.5265814917428153, "num_chars": 7}, {"sum_logits": -20.45001220703125, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.24371337890625, "logits_per_token": -10.225006103515625, "logits_per_char": -1.4607151576450892, "num_chars": 14}, {"sum_logits": -8.871566772460938, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.685454368591309, "logits_per_token": -8.871566772460938, "logits_per_char": -1.478594462076823, "num_chars": 6}, {"sum_logits": -8.8900146484375, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.75208568572998, "logits_per_token": -8.8900146484375, "logits_per_char": -0.88900146484375, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 801, "native_id": "684dbde19719e8224113433981d6e01e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.158921241760254, "incorrect_loss_raw": 16.51470375061035, "correct_loss_per_char": 0.559901931069114, "incorrect_loss_per_char": 1.3946843418208035, "correct_loss_per_token": 3.079460620880127, "incorrect_loss_per_token": 8.389107386271158, "correct_loss_uncond": -9.474466323852539, "incorrect_loss_uncond": -2.595899820327759}, "model_output": [{"sum_logits": -6.158921241760254, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.633387565612793, "logits_per_token": -3.079460620880127, "logits_per_char": -0.559901931069114, "num_chars": 11}, {"sum_logits": -10.60272216796875, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.71352767944336, "logits_per_token": -10.60272216796875, "logits_per_char": -2.12054443359375, "num_chars": 5}, {"sum_logits": -12.980164527893066, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.934584617614746, "logits_per_token": -6.490082263946533, "logits_per_char": -1.1800149570811878, "num_chars": 11}, {"sum_logits": -28.646034240722656, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -31.683643341064453, "logits_per_token": -9.548678080240885, "logits_per_char": -0.895188570022583, "num_chars": 32}, {"sum_logits": -13.829894065856934, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.110658645629883, "logits_per_token": -6.914947032928467, "logits_per_char": -1.3829894065856934, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 802, "native_id": "21450618657881d8c5af73691f3423a7_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.242210388183594, "incorrect_loss_raw": 8.686980724334717, "correct_loss_per_char": 1.0403683980305989, "incorrect_loss_per_char": 0.9837766224409874, "correct_loss_per_token": 6.242210388183594, "incorrect_loss_per_token": 7.441269159317017, "correct_loss_uncond": -5.704010963439941, "incorrect_loss_uncond": -5.662595748901367}, "model_output": [{"sum_logits": -8.479475021362305, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -11.46108627319336, "logits_per_token": -8.479475021362305, "logits_per_char": -1.4132458368937175, "num_chars": 6}, {"sum_logits": -6.242210388183594, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -11.946221351623535, "logits_per_token": -6.242210388183594, "logits_per_char": -1.0403683980305989, "num_chars": 6}, {"sum_logits": -8.013874053955078, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.241378784179688, "logits_per_token": -8.013874053955078, "logits_per_char": -1.0017342567443848, "num_chars": 8}, {"sum_logits": -8.288881301879883, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.562139511108398, "logits_per_token": -8.288881301879883, "logits_per_char": -0.7535346638072621, "num_chars": 11}, {"sum_logits": -9.965692520141602, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.13370132446289, "logits_per_token": -4.982846260070801, "logits_per_char": -0.7665917323185847, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 803, "native_id": "8b94b61b604ec0d7508804033eec6d23", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.725470066070557, "incorrect_loss_raw": 10.99890148639679, "correct_loss_per_char": 0.5906837582588196, "incorrect_loss_per_char": 1.0614388547246418, "correct_loss_per_token": 2.3627350330352783, "incorrect_loss_per_token": 4.736189901828766, "correct_loss_uncond": -10.188846111297607, "incorrect_loss_uncond": -5.1150864362716675}, "model_output": [{"sum_logits": -7.7253289222717285, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.167723655700684, "logits_per_token": -3.8626644611358643, "logits_per_char": -0.8583698802524142, "num_chars": 9}, {"sum_logits": -10.038810729980469, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.091103553771973, "logits_per_token": -5.019405364990234, "logits_per_char": -1.4341158185686385, "num_chars": 7}, {"sum_logits": -7.913206100463867, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.996505737304688, "logits_per_token": -3.9566030502319336, "logits_per_char": -0.9891507625579834, "num_chars": 8}, {"sum_logits": -18.318260192871094, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.200618743896484, "logits_per_token": -6.106086730957031, "logits_per_char": -0.9641189575195312, "num_chars": 19}, {"sum_logits": -4.725470066070557, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.914316177368164, "logits_per_token": -2.3627350330352783, "logits_per_char": -0.5906837582588196, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 804, "native_id": "52ecf169febc95a7f5ccb048fc85857d", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 11.886358261108398, "incorrect_loss_raw": 11.755109071731567, "correct_loss_per_char": 0.5660170600527809, "incorrect_loss_per_char": 1.2889649531197926, "correct_loss_per_token": 5.943179130554199, "incorrect_loss_per_token": 7.780715147654216, "correct_loss_uncond": -8.656412124633789, "incorrect_loss_uncond": -5.761412620544434}, "model_output": [{"sum_logits": -11.886358261108398, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.542770385742188, "logits_per_token": -5.943179130554199, "logits_per_char": -0.5660170600527809, "num_chars": 21}, {"sum_logits": -15.742719650268555, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.667911529541016, "logits_per_token": -7.871359825134277, "logits_per_char": -1.7491910722520616, "num_chars": 9}, {"sum_logits": -9.89522647857666, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.192317008972168, "logits_per_token": -9.89522647857666, "logits_per_char": -0.989522647857666, "num_chars": 10}, {"sum_logits": -9.34316635131836, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.668022155761719, "logits_per_token": -9.34316635131836, "logits_per_char": -1.5571943918863933, "num_chars": 6}, {"sum_logits": -12.039323806762695, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -24.5378360748291, "logits_per_token": -4.013107935587565, "logits_per_char": -0.8599517004830497, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 805, "native_id": "e408a5a031caec33782cb3b3a005eecc", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.924335479736328, "incorrect_loss_raw": 9.324520111083984, "correct_loss_per_char": 0.740541934967041, "incorrect_loss_per_char": 1.2705976948851632, "correct_loss_per_token": 5.924335479736328, "incorrect_loss_per_token": 7.694472551345825, "correct_loss_uncond": -8.383426666259766, "incorrect_loss_uncond": -5.827897787094116}, "model_output": [{"sum_logits": -6.6491241455078125, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.973722457885742, "logits_per_token": -6.6491241455078125, "logits_per_char": -0.5540936787923177, "num_chars": 12}, {"sum_logits": -6.465048789978027, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.993132591247559, "logits_per_token": -6.465048789978027, "logits_per_char": -0.8081310987472534, "num_chars": 8}, {"sum_logits": -11.143527030944824, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.600893020629883, "logits_per_token": -11.143527030944824, "logits_per_char": -1.8572545051574707, "num_chars": 6}, {"sum_logits": -13.040380477905273, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.04192352294922, "logits_per_token": -6.520190238952637, "logits_per_char": -1.8629114968436105, "num_chars": 7}, {"sum_logits": -5.924335479736328, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.307762145996094, "logits_per_token": -5.924335479736328, "logits_per_char": -0.740541934967041, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 806, "native_id": "31bd05ba62a16ee35217224b98c6baea", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.418340682983398, "incorrect_loss_raw": 7.397370934486389, "correct_loss_per_char": 0.5418340682983398, "incorrect_loss_per_char": 1.070289621466682, "correct_loss_per_token": 5.418340682983398, "incorrect_loss_per_token": 7.397370934486389, "correct_loss_uncond": -7.316166877746582, "incorrect_loss_uncond": -6.526780486106873}, "model_output": [{"sum_logits": -6.965766906738281, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.857012748718262, "logits_per_token": -6.965766906738281, "logits_per_char": -0.6965766906738281, "num_chars": 10}, {"sum_logits": -7.395507335662842, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.017424583435059, "logits_per_token": -7.395507335662842, "logits_per_char": -1.232584555943807, "num_chars": 6}, {"sum_logits": -5.418340682983398, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -5.418340682983398, "logits_per_char": -0.5418340682983398, "num_chars": 10}, {"sum_logits": -7.813582420349121, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.247040748596191, "logits_per_token": -7.813582420349121, "logits_per_char": -1.1162260600498743, "num_chars": 7}, {"sum_logits": -7.4146270751953125, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.575127601623535, "logits_per_token": -7.4146270751953125, "logits_per_char": -1.2357711791992188, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 807, "native_id": "b4043bd1f65a8ad088e62042eca259c2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.456900596618652, "incorrect_loss_raw": 8.213201522827148, "correct_loss_per_char": 1.050766732957628, "incorrect_loss_per_char": 1.0211088922288682, "correct_loss_per_token": 9.456900596618652, "incorrect_loss_per_token": 5.792208671569824, "correct_loss_uncond": -4.795083999633789, "incorrect_loss_uncond": -6.56459641456604}, "model_output": [{"sum_logits": -11.341270446777344, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.40850067138672, "logits_per_token": -5.670635223388672, "logits_per_char": -0.9451058705647787, "num_chars": 12}, {"sum_logits": -4.310551643371582, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.312787055969238, "logits_per_token": -4.310551643371582, "logits_per_char": -0.7184252738952637, "num_chars": 6}, {"sum_logits": -9.456900596618652, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.251984596252441, "logits_per_token": -9.456900596618652, "logits_per_char": -1.050766732957628, "num_chars": 9}, {"sum_logits": -8.02667236328125, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.767253875732422, "logits_per_token": -4.013336181640625, "logits_per_char": -0.8918524848090278, "num_chars": 9}, {"sum_logits": -9.174311637878418, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.622650146484375, "logits_per_token": -9.174311637878418, "logits_per_char": -1.529051939646403, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 808, "native_id": "4302e727e47f464511d4d04f22bed0d2", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.677471160888672, "incorrect_loss_raw": 11.1565660238266, "correct_loss_per_char": 1.7795785268147786, "incorrect_loss_per_char": 1.289509711757539, "correct_loss_per_token": 10.677471160888672, "incorrect_loss_per_token": 7.57778787612915, "correct_loss_uncond": -5.255011558532715, "incorrect_loss_uncond": -5.135735630989075}, "model_output": [{"sum_logits": -11.107756614685059, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.565593719482422, "logits_per_token": -5.553878307342529, "logits_per_char": -1.234195179409451, "num_chars": 9}, {"sum_logits": -8.499478340148926, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.075064659118652, "logits_per_token": -8.499478340148926, "logits_per_char": -1.2142111914498466, "num_chars": 7}, {"sum_logits": -17.52246856689453, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.409805297851562, "logits_per_token": -8.761234283447266, "logits_per_char": -1.4602057139078777, "num_chars": 12}, {"sum_logits": -10.677471160888672, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.932482719421387, "logits_per_token": -10.677471160888672, "logits_per_char": -1.7795785268147786, "num_chars": 6}, {"sum_logits": -7.496560573577881, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.118742942810059, "logits_per_token": -7.496560573577881, "logits_per_char": -1.2494267622629802, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 809, "native_id": "f0d473701d52125dd055d23042de1b0d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.4664950370788574, "incorrect_loss_raw": 10.27104902267456, "correct_loss_per_char": 0.49521357672555105, "incorrect_loss_per_char": 1.1554961384116829, "correct_loss_per_token": 3.4664950370788574, "incorrect_loss_per_token": 8.196133375167847, "correct_loss_uncond": -11.104864597320557, "incorrect_loss_uncond": -5.3883209228515625}, "model_output": [{"sum_logits": -3.4664950370788574, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.571359634399414, "logits_per_token": -3.4664950370788574, "logits_per_char": -0.49521357672555105, "num_chars": 7}, {"sum_logits": -6.421167373657227, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -19.662477493286133, "logits_per_token": -3.2105836868286133, "logits_per_char": -0.9173096248081752, "num_chars": 7}, {"sum_logits": -11.24884033203125, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.244270324707031, "logits_per_token": -11.24884033203125, "logits_per_char": -1.124884033203125, "num_chars": 10}, {"sum_logits": -10.178157806396484, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.92625904083252, "logits_per_token": -5.089078903198242, "logits_per_char": -0.9252870733087714, "num_chars": 11}, {"sum_logits": -13.236030578613281, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.804472923278809, "logits_per_token": -13.236030578613281, "logits_per_char": -1.6545038223266602, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 810, "native_id": "d35112a99ab3983fb51c3adae80bc2da", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.0272436141967773, "incorrect_loss_raw": 16.16076922416687, "correct_loss_per_char": 0.5045406023661295, "incorrect_loss_per_char": 1.835216551073014, "correct_loss_per_token": 3.0272436141967773, "incorrect_loss_per_token": 11.659713625907898, "correct_loss_uncond": -10.262310981750488, "incorrect_loss_uncond": -0.6307706832885742}, "model_output": [{"sum_logits": -13.150616645812988, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.21500301361084, "logits_per_token": -13.150616645812988, "logits_per_char": -2.1917694409688315, "num_chars": 6}, {"sum_logits": -15.990218162536621, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.027002334594727, "logits_per_token": -7.9951090812683105, "logits_per_char": -1.9987772703170776, "num_chars": 8}, {"sum_logits": -15.484015464782715, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.07509994506836, "logits_per_token": -15.484015464782715, "logits_per_char": -1.720446162753635, "num_chars": 9}, {"sum_logits": -3.0272436141967773, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.289554595947266, "logits_per_token": -3.0272436141967773, "logits_per_char": -0.5045406023661295, "num_chars": 6}, {"sum_logits": -20.018226623535156, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.84905433654785, "logits_per_token": -10.009113311767578, "logits_per_char": -1.429873330252511, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 811, "native_id": "661474a1a0c29dd7a243b284535ac934", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.405750274658203, "incorrect_loss_raw": 10.100483179092407, "correct_loss_per_char": 0.6004107339041573, "incorrect_loss_per_char": 1.230739916746433, "correct_loss_per_token": 4.202875137329102, "incorrect_loss_per_token": 7.852081298828125, "correct_loss_uncond": -10.386798858642578, "incorrect_loss_uncond": -4.178330898284912}, "model_output": [{"sum_logits": -17.987215042114258, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.973587036132812, "logits_per_token": -8.993607521057129, "logits_per_char": -1.3836319263164814, "num_chars": 13}, {"sum_logits": -9.353707313537598, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.404218673706055, "logits_per_token": -9.353707313537598, "logits_per_char": -1.1692134141921997, "num_chars": 8}, {"sum_logits": -5.798377990722656, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.841102600097656, "logits_per_token": -5.798377990722656, "logits_per_char": -1.1596755981445312, "num_chars": 5}, {"sum_logits": -8.405750274658203, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.79254913330078, "logits_per_token": -4.202875137329102, "logits_per_char": -0.6004107339041573, "num_chars": 14}, {"sum_logits": -7.262632369995117, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.896347999572754, "logits_per_token": -7.262632369995117, "logits_per_char": -1.2104387283325195, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 812, "native_id": "6416dcdf9b8d7d2787f07e7426f86fe4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.525177001953125, "incorrect_loss_raw": 17.139071941375732, "correct_loss_per_char": 0.7104314168294271, "incorrect_loss_per_char": 1.4242012343563877, "correct_loss_per_token": 2.8417256673177085, "incorrect_loss_per_token": 7.048892180124919, "correct_loss_uncond": -12.959451675415039, "incorrect_loss_uncond": -2.0810954570770264}, "model_output": [{"sum_logits": -19.152040481567383, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -21.47437286376953, "logits_per_token": -6.384013493855794, "logits_per_char": -1.3680028915405273, "num_chars": 14}, {"sum_logits": -24.646690368652344, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -20.868106842041016, "logits_per_token": -8.215563456217447, "logits_per_char": -1.8958992591271033, "num_chars": 13}, {"sum_logits": -16.742347717285156, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -19.390138626098633, "logits_per_token": -5.580782572428386, "logits_per_char": -1.2878729013296275, "num_chars": 13}, {"sum_logits": -8.015209197998047, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.148051261901855, "logits_per_token": -8.015209197998047, "logits_per_char": -1.1450298854282923, "num_chars": 7}, {"sum_logits": -8.525177001953125, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -21.484628677368164, "logits_per_token": -2.8417256673177085, "logits_per_char": -0.7104314168294271, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 813, "native_id": "0f54a1ee30a0034a3d2db1bfdef9ca85", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.8123109340667725, "incorrect_loss_raw": 11.832027196884155, "correct_loss_per_char": 0.1647555394606157, "incorrect_loss_per_char": 1.2503140881020798, "correct_loss_per_token": 1.8123109340667725, "incorrect_loss_per_token": 8.824334859848022, "correct_loss_uncond": -10.34291672706604, "incorrect_loss_uncond": -4.025009632110596}, "model_output": [{"sum_logits": -14.42069149017334, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.780450820922852, "logits_per_token": -14.42069149017334, "logits_per_char": -1.8025864362716675, "num_chars": 8}, {"sum_logits": -8.845878601074219, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.004643440246582, "logits_per_token": -8.845878601074219, "logits_per_char": -0.8845878601074219, "num_chars": 10}, {"sum_logits": -1.8123109340667725, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -12.155227661132812, "logits_per_token": -1.8123109340667725, "logits_per_char": -0.1647555394606157, "num_chars": 11}, {"sum_logits": -13.54843807220459, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.462135314941406, "logits_per_token": -6.774219036102295, "logits_per_char": -1.5053820080227323, "num_chars": 9}, {"sum_logits": -10.513100624084473, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.180917739868164, "logits_per_token": -5.256550312042236, "logits_per_char": -0.8087000480064979, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 814, "native_id": "7850beb1209c41fabe385cbedc96a61a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.0620298385620117, "incorrect_loss_raw": 13.880080938339233, "correct_loss_per_char": 0.25775372982025146, "incorrect_loss_per_char": 0.9327656353240485, "correct_loss_per_token": 1.0310149192810059, "incorrect_loss_per_token": 5.748006184895833, "correct_loss_uncond": -12.700067520141602, "incorrect_loss_uncond": -8.653367757797241}, "model_output": [{"sum_logits": -2.0620298385620117, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.762097358703613, "logits_per_token": -1.0310149192810059, "logits_per_char": -0.25775372982025146, "num_chars": 8}, {"sum_logits": -15.680136680603027, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.81637191772461, "logits_per_token": -5.226712226867676, "logits_per_char": -0.9800085425376892, "num_chars": 16}, {"sum_logits": -12.928686141967773, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.036880493164062, "logits_per_token": -4.309562047322591, "logits_per_char": -0.9234775815691266, "num_chars": 14}, {"sum_logits": -10.095460891723633, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.41530990600586, "logits_per_token": -5.047730445861816, "logits_per_char": -0.7765739147479718, "num_chars": 13}, {"sum_logits": -16.8160400390625, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.865232467651367, "logits_per_token": -8.40802001953125, "logits_per_char": -1.0510025024414062, "num_chars": 16}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 815, "native_id": "cdb06b28b9c4e7ef7e880d1f096fd409", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.36872386932373, "incorrect_loss_raw": 15.207561254501343, "correct_loss_per_char": 0.5184361934661865, "incorrect_loss_per_char": 1.2356977021252669, "correct_loss_per_token": 5.184361934661865, "incorrect_loss_per_token": 8.998848994572956, "correct_loss_uncond": -10.940350532531738, "incorrect_loss_uncond": -4.303022146224976}, "model_output": [{"sum_logits": -23.569351196289062, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -27.906356811523438, "logits_per_token": -7.8564503987630205, "logits_per_char": -0.8729389331958912, "num_chars": 27}, {"sum_logits": -13.259608268737793, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.27340316772461, "logits_per_token": -13.259608268737793, "logits_per_char": -2.209934711456299, "num_chars": 6}, {"sum_logits": -10.36872386932373, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.30907440185547, "logits_per_token": -5.184361934661865, "logits_per_char": -0.5184361934661865, "num_chars": 20}, {"sum_logits": -5.757389068603516, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.294889450073242, "logits_per_token": -5.757389068603516, "logits_per_char": -0.7196736335754395, "num_chars": 8}, {"sum_logits": -18.243896484375, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.567684173583984, "logits_per_token": -9.1219482421875, "logits_per_char": -1.1402435302734375, "num_chars": 16}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 816, "native_id": "14309d9bd3c13d1c0efb625198f6304a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.0528359413146973, "incorrect_loss_raw": 8.181528210639954, "correct_loss_per_char": 0.38160449266433716, "incorrect_loss_per_char": 0.7834093678565253, "correct_loss_per_token": 3.0528359413146973, "incorrect_loss_per_token": 5.10196453332901, "correct_loss_uncond": -10.272768497467041, "incorrect_loss_uncond": -6.94987428188324}, "model_output": [{"sum_logits": -10.03210163116455, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.96494197845459, "logits_per_token": -10.03210163116455, "logits_per_char": -1.4331573758806502, "num_chars": 7}, {"sum_logits": -5.639005184173584, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.387718200683594, "logits_per_token": -2.819502592086792, "logits_per_char": -0.469917098681132, "num_chars": 12}, {"sum_logits": -4.390003204345703, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -4.390003204345703, "logits_per_char": -0.4390003204345703, "num_chars": 10}, {"sum_logits": -12.665002822875977, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.43844223022461, "logits_per_token": -3.166250705718994, "logits_per_char": -0.7915626764297485, "num_chars": 16}, {"sum_logits": -3.0528359413146973, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.325604438781738, "logits_per_token": -3.0528359413146973, "logits_per_char": -0.38160449266433716, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 817, "native_id": "a00276c6db928900772c0320aeff77c0", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.3634562492370605, "incorrect_loss_raw": 13.775428771972656, "correct_loss_per_char": 0.4726912498474121, "incorrect_loss_per_char": 1.7533195483220088, "correct_loss_per_token": 2.3634562492370605, "incorrect_loss_per_token": 9.831054846445719, "correct_loss_uncond": -9.394665241241455, "incorrect_loss_uncond": -2.961742639541626}, "model_output": [{"sum_logits": -13.175691604614258, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.633954048156738, "logits_per_token": -13.175691604614258, "logits_per_char": -1.882241657802037, "num_chars": 7}, {"sum_logits": -2.3634562492370605, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.758121490478516, "logits_per_token": -2.3634562492370605, "logits_per_char": -0.4726912498474121, "num_chars": 5}, {"sum_logits": -13.191842079162598, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.53042984008789, "logits_per_token": -13.191842079162598, "logits_per_char": -1.4657602310180664, "num_chars": 9}, {"sum_logits": -20.271751403808594, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.987972259521484, "logits_per_token": -10.135875701904297, "logits_per_char": -2.8959644862583707, "num_chars": 7}, {"sum_logits": -8.462430000305176, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.796329498291016, "logits_per_token": -2.820810000101725, "logits_per_char": -0.7693118182095614, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 818, "native_id": "4706be6e24f1fafd9ff9fe63583acffd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.75102424621582, "incorrect_loss_raw": 9.093258380889893, "correct_loss_per_char": 0.5962326343242939, "incorrect_loss_per_char": 0.5510418891906739, "correct_loss_per_token": 3.87551212310791, "incorrect_loss_per_token": 3.9575038750966387, "correct_loss_uncond": -10.93460464477539, "incorrect_loss_uncond": -9.86018991470337}, "model_output": [{"sum_logits": -6.621040344238281, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.266294479370117, "logits_per_token": -3.3105201721191406, "logits_per_char": -0.3310520172119141, "num_chars": 20}, {"sum_logits": -14.139007568359375, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.910371780395508, "logits_per_token": -4.713002522786458, "logits_per_char": -0.9426005045572917, "num_chars": 15}, {"sum_logits": -8.991945266723633, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.370832443237305, "logits_per_token": -4.495972633361816, "logits_per_char": -0.5994630177815755, "num_chars": 15}, {"sum_logits": -6.621040344238281, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.266294479370117, "logits_per_token": -3.3105201721191406, "logits_per_char": -0.3310520172119141, "num_chars": 20}, {"sum_logits": -7.75102424621582, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.68562889099121, "logits_per_token": -3.87551212310791, "logits_per_char": -0.5962326343242939, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 819, "native_id": "ee8819b2da5453848c1cbb9d9c93403b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.181088924407959, "incorrect_loss_raw": 9.534637808799744, "correct_loss_per_char": 0.5129349231719971, "incorrect_loss_per_char": 0.9599565712844624, "correct_loss_per_token": 1.7952722311019897, "incorrect_loss_per_token": 4.7740165789922075, "correct_loss_uncond": -10.58599328994751, "incorrect_loss_uncond": -5.991450905799866}, "model_output": [{"sum_logits": -5.072200298309326, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.583044052124023, "logits_per_token": -5.072200298309326, "logits_per_char": -0.8453667163848877, "num_chars": 6}, {"sum_logits": -7.181088924407959, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.76708221435547, "logits_per_token": -1.7952722311019897, "logits_per_char": -0.5129349231719971, "num_chars": 14}, {"sum_logits": -15.055856704711914, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.01510238647461, "logits_per_token": -5.018618901570638, "logits_per_char": -0.8856386296889361, "num_chars": 17}, {"sum_logits": -5.699633598327637, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.36673355102539, "logits_per_token": -2.8498167991638184, "logits_per_char": -0.5699633598327637, "num_chars": 10}, {"sum_logits": -12.310860633850098, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.139474868774414, "logits_per_token": -6.155430316925049, "logits_per_char": -1.5388575792312622, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 820, "native_id": "84ea43b967259814d939c62131f74df0", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.900546073913574, "incorrect_loss_raw": 13.560391902923584, "correct_loss_per_char": 0.6125682592391968, "incorrect_loss_per_char": 1.748320150375366, "correct_loss_per_token": 4.900546073913574, "incorrect_loss_per_token": 11.260047912597656, "correct_loss_uncond": -8.425058364868164, "incorrect_loss_uncond": 0.29550862312316895}, "model_output": [{"sum_logits": -9.82284164428711, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -11.828949928283691, "logits_per_token": -9.82284164428711, "logits_per_char": -1.6371402740478516, "num_chars": 6}, {"sum_logits": -4.900546073913574, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -13.325604438781738, "logits_per_token": -4.900546073913574, "logits_per_char": -0.6125682592391968, "num_chars": 8}, {"sum_logits": -12.301958084106445, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -14.322620391845703, "logits_per_token": -12.301958084106445, "logits_per_char": -1.2301958084106446, "num_chars": 10}, {"sum_logits": -18.402751922607422, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.83657169342041, "logits_per_token": -9.201375961303711, "logits_per_char": -1.840275192260742, "num_chars": 10}, {"sum_logits": -13.71401596069336, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -13.71401596069336, "logits_per_char": -2.2856693267822266, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 821, "native_id": "60e7338e9e6bfc746a15a161eb12706c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.710507392883301, "incorrect_loss_raw": 8.231996417045593, "correct_loss_per_char": 0.5592089494069418, "incorrect_loss_per_char": 1.2469888868786039, "correct_loss_per_token": 6.710507392883301, "incorrect_loss_per_token": 7.271150767803192, "correct_loss_uncond": -8.447028160095215, "incorrect_loss_uncond": -5.578325629234314}, "model_output": [{"sum_logits": -7.686765193939209, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.68304443359375, "logits_per_token": -3.8433825969696045, "logits_per_char": -1.098109313419887, "num_chars": 7}, {"sum_logits": -11.92621898651123, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.930030822753906, "logits_per_token": -11.92621898651123, "logits_per_char": -1.9877031644185383, "num_chars": 6}, {"sum_logits": -6.212238311767578, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.019676208496094, "logits_per_token": -6.212238311767578, "logits_per_char": -0.8874626159667969, "num_chars": 7}, {"sum_logits": -7.1027631759643555, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.608536720275879, "logits_per_token": -7.1027631759643555, "logits_per_char": -1.0146804537091936, "num_chars": 7}, {"sum_logits": -6.710507392883301, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -6.710507392883301, "logits_per_char": -0.5592089494069418, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 822, "native_id": "a0f5414bf98e094f4d807abee28861a4", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.628813743591309, "incorrect_loss_raw": 9.595828652381897, "correct_loss_per_char": 1.0483702879685621, "incorrect_loss_per_char": 0.9526810836310339, "correct_loss_per_token": 4.5429379145304365, "incorrect_loss_per_token": 4.7979143261909485, "correct_loss_uncond": -8.87502384185791, "incorrect_loss_uncond": -8.170286774635315}, "model_output": [{"sum_logits": -12.445244789123535, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.95294761657715, "logits_per_token": -6.222622394561768, "logits_per_char": -1.1313858899203213, "num_chars": 11}, {"sum_logits": -8.390817642211914, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.14581298828125, "logits_per_token": -4.195408821105957, "logits_per_char": -0.8390817642211914, "num_chars": 10}, {"sum_logits": -13.628813743591309, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -22.50383758544922, "logits_per_token": -4.5429379145304365, "logits_per_char": -1.0483702879685621, "num_chars": 13}, {"sum_logits": -9.849420547485352, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.728801727294922, "logits_per_token": -4.924710273742676, "logits_per_char": -0.9849420547485351, "num_chars": 10}, {"sum_logits": -7.697831630706787, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.236899375915527, "logits_per_token": -3.8489158153533936, "logits_per_char": -0.8553146256340874, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 823, "native_id": "44120a9443c619d98ce5bfe4bb219c43", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 2.9472830295562744, "incorrect_loss_raw": 6.140928089618683, "correct_loss_per_char": 0.3274758921729194, "incorrect_loss_per_char": 0.893253574059123, "correct_loss_per_token": 2.9472830295562744, "incorrect_loss_per_token": 5.501446962356567, "correct_loss_uncond": -12.127816915512085, "incorrect_loss_uncond": -9.36672991514206}, "model_output": [{"sum_logits": -2.9472830295562744, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.07509994506836, "logits_per_token": -2.9472830295562744, "logits_per_char": -0.3274758921729194, "num_chars": 9}, {"sum_logits": -5.801961898803711, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.434986114501953, "logits_per_token": -5.801961898803711, "logits_per_char": -0.7252452373504639, "num_chars": 8}, {"sum_logits": -2.773216962814331, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.071504592895508, "logits_per_token": -2.773216962814331, "logits_per_char": -0.3961738518306187, "num_chars": 7}, {"sum_logits": -10.872684478759766, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.432170867919922, "logits_per_token": -10.872684478759766, "logits_per_char": -1.8121140797932942, "num_chars": 6}, {"sum_logits": -5.115849018096924, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -18.091970443725586, "logits_per_token": -2.557924509048462, "logits_per_char": -0.6394811272621155, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 824, "native_id": "38ab26e29a0984b212006d39185c43f3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.8104891777038574, "incorrect_loss_raw": 9.585646629333496, "correct_loss_per_char": 0.21169384320576987, "incorrect_loss_per_char": 1.282186726161412, "correct_loss_per_token": 1.9052445888519287, "incorrect_loss_per_token": 8.149819612503052, "correct_loss_uncond": -13.1470627784729, "incorrect_loss_uncond": -5.54809832572937}, "model_output": [{"sum_logits": -9.830830574035645, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -9.830830574035645, "logits_per_char": -1.4044043677193778, "num_chars": 7}, {"sum_logits": -3.8104891777038574, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.957551956176758, "logits_per_token": -1.9052445888519287, "logits_per_char": -0.21169384320576987, "num_chars": 18}, {"sum_logits": -11.486616134643555, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -18.74686050415039, "logits_per_token": -5.743308067321777, "logits_per_char": -1.1486616134643555, "num_chars": 10}, {"sum_logits": -5.967179298400879, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.388150215148926, "logits_per_token": -5.967179298400879, "logits_per_char": -1.1934358596801757, "num_chars": 5}, {"sum_logits": -11.057960510253906, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.836979866027832, "logits_per_token": -11.057960510253906, "logits_per_char": -1.3822450637817383, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 825, "native_id": "a5e207803684eea8a43ca6670c50b354", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.943711280822754, "incorrect_loss_raw": 8.806416392326355, "correct_loss_per_char": 0.6179639101028442, "incorrect_loss_per_char": 1.5896572510401408, "correct_loss_per_token": 2.471855640411377, "incorrect_loss_per_token": 7.076539556185405, "correct_loss_uncond": -10.159041404724121, "incorrect_loss_uncond": -4.246640086174011}, "model_output": [{"sum_logits": -9.27619743347168, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.120752334594727, "logits_per_token": -9.27619743347168, "logits_per_char": -2.31904935836792, "num_chars": 4}, {"sum_logits": -4.943711280822754, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.102752685546875, "logits_per_token": -2.471855640411377, "logits_per_char": -0.6179639101028442, "num_chars": 8}, {"sum_logits": -4.414124965667725, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.807665824890137, "logits_per_token": -4.414124965667725, "logits_per_char": -0.8828249931335449, "num_chars": 5}, {"sum_logits": -11.156082153320312, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -10.333660125732422, "logits_per_token": -11.156082153320312, "logits_per_char": -1.8593470255533855, "num_chars": 6}, {"sum_logits": -10.379261016845703, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.95014762878418, "logits_per_token": -3.459753672281901, "logits_per_char": -1.297407627105713, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 826, "native_id": "af3b9a8b1962cd3bcd19e644d873e7bc", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.516951560974121, "incorrect_loss_raw": 12.454131007194519, "correct_loss_per_char": 0.27966128455268013, "incorrect_loss_per_char": 0.9316301063581424, "correct_loss_per_token": 1.2584757804870605, "incorrect_loss_per_token": 5.715995967388153, "correct_loss_uncond": -14.763903617858887, "incorrect_loss_uncond": -7.176051735877991}, "model_output": [{"sum_logits": -8.177112579345703, "num_tokens": 4, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -17.437232971191406, "logits_per_token": -2.044278144836426, "logits_per_char": -0.4810066223144531, "num_chars": 17}, {"sum_logits": -11.188505172729492, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.393335342407227, "logits_per_token": -5.594252586364746, "logits_per_char": -0.7991789409092495, "num_chars": 14}, {"sum_logits": -23.017948150634766, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -23.411361694335938, "logits_per_token": -11.508974075317383, "logits_per_char": -1.7706113962026744, "num_chars": 13}, {"sum_logits": -7.432958126068115, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -21.27880096435547, "logits_per_token": -3.7164790630340576, "logits_per_char": -0.6757234660061923, "num_chars": 11}, {"sum_logits": -2.516951560974121, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.280855178833008, "logits_per_token": -1.2584757804870605, "logits_per_char": -0.27966128455268013, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 827, "native_id": "43a91955fd0717997a16897c3324e095", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.6027369499206543, "incorrect_loss_raw": 13.661579370498657, "correct_loss_per_char": 0.06697077221340603, "incorrect_loss_per_char": 2.020853685008155, "correct_loss_per_token": 0.6027369499206543, "incorrect_loss_per_token": 12.053221821784973, "correct_loss_uncond": -13.071155071258545, "incorrect_loss_uncond": -1.7310645580291748}, "model_output": [{"sum_logits": -15.415567398071289, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -15.415567398071289, "logits_per_char": -3.0831134796142576, "num_chars": 5}, {"sum_logits": -13.164030075073242, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.084624290466309, "logits_per_token": -13.164030075073242, "logits_per_char": -1.6455037593841553, "num_chars": 8}, {"sum_logits": -12.866860389709473, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.98815155029297, "logits_per_token": -6.433430194854736, "logits_per_char": -0.7148255772060819, "num_chars": 18}, {"sum_logits": -0.6027369499206543, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": true, "sum_logits_uncond": -13.6738920211792, "logits_per_token": -0.6027369499206543, "logits_per_char": -0.06697077221340603, "num_chars": 9}, {"sum_logits": -13.199859619140625, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.669677734375, "logits_per_token": -13.199859619140625, "logits_per_char": -2.639971923828125, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 828, "native_id": "7f7a6f2b3087bf37dadbe8aa8d358047", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.20005464553833, "incorrect_loss_raw": 9.001659035682678, "correct_loss_per_char": 0.42000546455383303, "incorrect_loss_per_char": 1.0574448752884913, "correct_loss_per_token": 2.100027322769165, "incorrect_loss_per_token": 5.176337718963623, "correct_loss_uncond": -10.637229442596436, "incorrect_loss_uncond": -7.539501309394836}, "model_output": [{"sum_logits": -5.4040656089782715, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.339823722839355, "logits_per_token": -5.4040656089782715, "logits_per_char": -0.600451734330919, "num_chars": 9}, {"sum_logits": -8.426752090454102, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.234086990356445, "logits_per_token": -4.213376045227051, "logits_per_char": -0.7660683718594637, "num_chars": 11}, {"sum_logits": -4.20005464553833, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.837284088134766, "logits_per_token": -2.100027322769165, "logits_per_char": -0.42000546455383303, "num_chars": 10}, {"sum_logits": -9.992524147033691, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.76109504699707, "logits_per_token": -4.996262073516846, "logits_per_char": -0.832710345586141, "num_chars": 12}, {"sum_logits": -12.183294296264648, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.829635620117188, "logits_per_token": -6.091647148132324, "logits_per_char": -2.0305490493774414, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 829, "native_id": "37d88a9bb24913c1973cc26d4ce3394f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 11.325153350830078, "incorrect_loss_raw": 16.769647359848022, "correct_loss_per_char": 1.4156441688537598, "incorrect_loss_per_char": 1.8437921975441132, "correct_loss_per_token": 3.7750511169433594, "incorrect_loss_per_token": 7.458249773297991, "correct_loss_uncond": -5.5830841064453125, "incorrect_loss_uncond": -0.9914078712463379}, "model_output": [{"sum_logits": -25.590431213378906, "num_tokens": 7, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -26.865615844726562, "logits_per_token": -3.655775887625558, "logits_per_char": -1.1126274440599524, "num_chars": 23}, {"sum_logits": -15.287130355834961, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.285333633422852, "logits_per_token": -7.6435651779174805, "logits_per_char": -3.0574260711669923, "num_chars": 5}, {"sum_logits": -15.334739685058594, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.821880340576172, "logits_per_token": -7.667369842529297, "logits_per_char": -1.3940672440962358, "num_chars": 11}, {"sum_logits": -10.866288185119629, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -10.866288185119629, "logits_per_char": -1.8110480308532715, "num_chars": 6}, {"sum_logits": -11.325153350830078, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -16.90823745727539, "logits_per_token": -3.7750511169433594, "logits_per_char": -1.4156441688537598, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 830, "native_id": "001b0f5a841fd81d13fbe67c7c7179d6", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.892853260040283, "incorrect_loss_raw": 9.673935890197754, "correct_loss_per_char": 0.6266230236400258, "incorrect_loss_per_char": 0.9539966363828261, "correct_loss_per_token": 2.297617753346761, "incorrect_loss_per_token": 7.180569052696228, "correct_loss_uncond": -10.202085018157959, "incorrect_loss_uncond": -7.163437128067017}, "model_output": [{"sum_logits": -10.292732238769531, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.714659690856934, "logits_per_token": -10.292732238769531, "logits_per_char": -1.2865915298461914, "num_chars": 8}, {"sum_logits": -8.6639986038208, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -19.503150939941406, "logits_per_token": -4.3319993019104, "logits_per_char": -0.6664614310631385, "num_chars": 13}, {"sum_logits": -11.282936096191406, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -20.502506256103516, "logits_per_token": -5.641468048095703, "logits_per_char": -0.8059240068708148, "num_chars": 14}, {"sum_logits": -8.456076622009277, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.629175186157227, "logits_per_token": -8.456076622009277, "logits_per_char": -1.0570095777511597, "num_chars": 8}, {"sum_logits": -6.892853260040283, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.094938278198242, "logits_per_token": -2.297617753346761, "logits_per_char": -0.6266230236400258, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 831, "native_id": "9f9ca9bb06d6afc31b19c365fb29a1c9", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.603825092315674, "incorrect_loss_raw": 10.347501516342163, "correct_loss_per_char": 0.5094386447559703, "incorrect_loss_per_char": 1.3312758360590253, "correct_loss_per_token": 5.603825092315674, "incorrect_loss_per_token": 7.875255982081096, "correct_loss_uncond": -8.349796772003174, "incorrect_loss_uncond": -5.268702507019043}, "model_output": [{"sum_logits": -9.60814094543457, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.386833190917969, "logits_per_token": -9.60814094543457, "logits_per_char": -1.2010176181793213, "num_chars": 8}, {"sum_logits": -14.833473205566406, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.395252227783203, "logits_per_token": -4.944491068522136, "logits_per_char": -1.0595338003976005, "num_chars": 14}, {"sum_logits": -5.603825092315674, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.953621864318848, "logits_per_token": -5.603825092315674, "logits_per_char": -0.5094386447559703, "num_chars": 11}, {"sum_logits": -7.194598197937012, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.800455093383789, "logits_per_token": -7.194598197937012, "logits_per_char": -1.4389196395874024, "num_chars": 5}, {"sum_logits": -9.753793716430664, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.882275581359863, "logits_per_token": -9.753793716430664, "logits_per_char": -1.6256322860717773, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 832, "native_id": "d60c5a494539c66982c0f692afde9499", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.374963760375977, "incorrect_loss_raw": 10.69136905670166, "correct_loss_per_char": 0.5795421600341797, "incorrect_loss_per_char": 1.5487731040470185, "correct_loss_per_token": 3.1874818801879883, "incorrect_loss_per_token": 6.848719000816345, "correct_loss_uncond": -10.551332473754883, "incorrect_loss_uncond": -4.769157886505127}, "model_output": [{"sum_logits": -11.798272132873535, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.879467010498047, "logits_per_token": -5.899136066436768, "logits_per_char": -1.6854674475533622, "num_chars": 7}, {"sum_logits": -6.374963760375977, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.92629623413086, "logits_per_token": -3.1874818801879883, "logits_per_char": -0.5795421600341797, "num_chars": 11}, {"sum_logits": -6.743692398071289, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.506343841552734, "logits_per_token": -6.743692398071289, "logits_per_char": -1.3487384796142579, "num_chars": 5}, {"sum_logits": -5.280583381652832, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -5.280583381652832, "logits_per_char": -1.0561166763305665, "num_chars": 5}, {"sum_logits": -18.942928314208984, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.201568603515625, "logits_per_token": -9.471464157104492, "logits_per_char": -2.104769812689887, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 833, "native_id": "a6d3a2cb250a6310b8cabd31dbe2138c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.969498157501221, "incorrect_loss_raw": 11.394092321395874, "correct_loss_per_char": 0.4687940092647777, "incorrect_loss_per_char": 1.3536959682640277, "correct_loss_per_token": 3.9847490787506104, "incorrect_loss_per_token": 9.990004420280457, "correct_loss_uncond": -8.07573652267456, "incorrect_loss_uncond": -3.322883129119873}, "model_output": [{"sum_logits": -7.969498157501221, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.04523468017578, "logits_per_token": -3.9847490787506104, "logits_per_char": -0.4687940092647777, "num_chars": 17}, {"sum_logits": -11.23270320892334, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.399694442749023, "logits_per_token": -5.61635160446167, "logits_per_char": -0.5911949057328073, "num_chars": 19}, {"sum_logits": -16.72989845275879, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.001232147216797, "logits_per_token": -16.72989845275879, "logits_per_char": -2.0912373065948486, "num_chars": 8}, {"sum_logits": -10.538691520690918, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.090795516967773, "logits_per_token": -10.538691520690918, "logits_per_char": -1.3173364400863647, "num_chars": 8}, {"sum_logits": -7.075076103210449, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.376179695129395, "logits_per_token": -7.075076103210449, "logits_per_char": -1.4150152206420898, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 834, "native_id": "27c523eb9099d2eec66296558eb4448e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.762296676635742, "incorrect_loss_raw": 10.574024319648743, "correct_loss_per_char": 1.2937161127726238, "incorrect_loss_per_char": 1.3193571850275383, "correct_loss_per_token": 7.762296676635742, "incorrect_loss_per_token": 8.468246579170227, "correct_loss_uncond": -7.19073486328125, "incorrect_loss_uncond": -3.9435137510299683}, "model_output": [{"sum_logits": -4.730074405670166, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.378788948059082, "logits_per_token": -4.730074405670166, "logits_per_char": -0.9460148811340332, "num_chars": 5}, {"sum_logits": -10.025988578796387, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.429417610168457, "logits_per_token": -10.025988578796387, "logits_per_char": -1.2532485723495483, "num_chars": 8}, {"sum_logits": -7.762296676635742, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.953031539916992, "logits_per_token": -7.762296676635742, "logits_per_char": -1.2937161127726238, "num_chars": 6}, {"sum_logits": -16.846221923828125, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -20.88605499267578, "logits_per_token": -8.423110961914062, "logits_per_char": -1.2958632249098558, "num_chars": 13}, {"sum_logits": -10.693812370300293, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -11.375890731811523, "logits_per_token": -10.693812370300293, "logits_per_char": -1.7823020617167156, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 835, "native_id": "2509fdd7d94afe9d0c021654ce0ba93f", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.896488189697266, "incorrect_loss_raw": 13.807761907577515, "correct_loss_per_char": 0.45357601459209734, "incorrect_loss_per_char": 1.444812944200304, "correct_loss_per_token": 1.9654960632324219, "incorrect_loss_per_token": 6.903880953788757, "correct_loss_uncond": -13.790767669677734, "incorrect_loss_uncond": -4.227332592010498}, "model_output": [{"sum_logits": -12.88150405883789, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.473495483398438, "logits_per_token": -6.440752029418945, "logits_per_char": -1.2881504058837892, "num_chars": 10}, {"sum_logits": -5.896488189697266, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.687255859375, "logits_per_token": -1.9654960632324219, "logits_per_char": -0.45357601459209734, "num_chars": 13}, {"sum_logits": -19.29631233215332, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.65680694580078, "logits_per_token": -9.64815616607666, "logits_per_char": -1.929631233215332, "num_chars": 10}, {"sum_logits": -15.39138412475586, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.12323570251465, "logits_per_token": -7.69569206237793, "logits_per_char": -1.71015379163954, "num_chars": 9}, {"sum_logits": -7.661847114562988, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.886839866638184, "logits_per_token": -3.830923557281494, "logits_per_char": -0.8513163460625542, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 836, "native_id": "75b8195e23c6bada574f1e41471b8f23", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.880422115325928, "incorrect_loss_raw": 8.620156168937683, "correct_loss_per_char": 0.6533802350362142, "incorrect_loss_per_char": 0.8814698921309578, "correct_loss_per_token": 2.940211057662964, "incorrect_loss_per_token": 6.148927330970764, "correct_loss_uncond": -10.961170673370361, "incorrect_loss_uncond": -7.785364031791687}, "model_output": [{"sum_logits": -5.880422115325928, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.84159278869629, "logits_per_token": -2.940211057662964, "logits_per_char": -0.6533802350362142, "num_chars": 9}, {"sum_logits": -8.047754287719727, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.339823722839355, "logits_per_token": -8.047754287719727, "logits_per_char": -0.8941949208577474, "num_chars": 9}, {"sum_logits": -9.514217376708984, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.858665466308594, "logits_per_token": -4.757108688354492, "logits_per_char": -0.9514217376708984, "num_chars": 10}, {"sum_logits": -6.663039684295654, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.495595932006836, "logits_per_token": -6.663039684295654, "logits_per_char": -1.1105066140492756, "num_chars": 6}, {"sum_logits": -10.255613327026367, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.927995681762695, "logits_per_token": -5.127806663513184, "logits_per_char": -0.5697562959459093, "num_chars": 18}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 837, "native_id": "df1bf6f3f87975aa0c1b6d6153d9ecef", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.572432994842529, "incorrect_loss_raw": 9.906484246253967, "correct_loss_per_char": 0.6965541243553162, "incorrect_loss_per_char": 1.2005899442566765, "correct_loss_per_token": 5.572432994842529, "incorrect_loss_per_token": 5.7155067920684814, "correct_loss_uncond": -9.32371473312378, "incorrect_loss_uncond": -3.884978413581848}, "model_output": [{"sum_logits": -13.739784240722656, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.022384643554688, "logits_per_token": -6.869892120361328, "logits_per_char": -1.5266426934136286, "num_chars": 9}, {"sum_logits": -5.572432994842529, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.896147727966309, "logits_per_token": -5.572432994842529, "logits_per_char": -0.6965541243553162, "num_chars": 8}, {"sum_logits": -10.01785945892334, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.547733306884766, "logits_per_token": -5.00892972946167, "logits_per_char": -0.834821621576945, "num_chars": 12}, {"sum_logits": -9.77017593383789, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.665070533752441, "logits_per_token": -4.885087966918945, "logits_per_char": -1.2212719917297363, "num_chars": 8}, {"sum_logits": -6.098117351531982, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.930662155151367, "logits_per_token": -6.098117351531982, "logits_per_char": -1.2196234703063964, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 838, "native_id": "e99d4cb2e69d3e020ee9e4e9a84ac45b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.929009437561035, "incorrect_loss_raw": 13.68581771850586, "correct_loss_per_char": 0.4929009437561035, "incorrect_loss_per_char": 1.1838755329449973, "correct_loss_per_token": 2.4645047187805176, "incorrect_loss_per_token": 6.168265342712402, "correct_loss_uncond": -13.602431297302246, "incorrect_loss_uncond": -4.627916574478149}, "model_output": [{"sum_logits": -13.068480491638184, "num_tokens": 2, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -19.72175407409668, "logits_per_token": -6.534240245819092, "logits_per_char": -1.3068480491638184, "num_chars": 10}, {"sum_logits": -4.929009437561035, "num_tokens": 2, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -18.53144073486328, "logits_per_token": -2.4645047187805176, "logits_per_char": -0.4929009437561035, "num_chars": 10}, {"sum_logits": -11.946608543395996, "num_tokens": 2, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -15.915934562683105, "logits_per_token": -5.973304271697998, "logits_per_char": -0.9955507119496664, "num_chars": 12}, {"sum_logits": -16.191444396972656, "num_tokens": 3, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -18.910371780395508, "logits_per_token": -5.397148132324219, "logits_per_char": -1.0794296264648438, "num_chars": 15}, {"sum_logits": -13.536737442016602, "num_tokens": 2, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -18.706876754760742, "logits_per_token": -6.768368721008301, "logits_per_char": -1.3536737442016602, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 839, "native_id": "b1274d6f5969dea4d46f43fbdc28fd97", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.21880841255188, "incorrect_loss_raw": 7.098449349403381, "correct_loss_per_char": 0.24653426806131998, "incorrect_loss_per_char": 0.8480013310909271, "correct_loss_per_token": 2.21880841255188, "incorrect_loss_per_token": 7.098449349403381, "correct_loss_uncond": -12.398751020431519, "incorrect_loss_uncond": -6.697638630867004}, "model_output": [{"sum_logits": -2.21880841255188, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -14.617559432983398, "logits_per_token": -2.21880841255188, "logits_per_char": -0.24653426806131998, "num_chars": 9}, {"sum_logits": -8.392118453979492, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.123327255249023, "logits_per_token": -8.392118453979492, "logits_per_char": -0.699343204498291, "num_chars": 12}, {"sum_logits": -9.224352836608887, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.9453706741333, "logits_per_token": -9.224352836608887, "logits_per_char": -1.1530441045761108, "num_chars": 8}, {"sum_logits": -6.061956882476807, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.671866416931152, "logits_per_token": -6.061956882476807, "logits_per_char": -0.8659938403538295, "num_chars": 7}, {"sum_logits": -4.71536922454834, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.443787574768066, "logits_per_token": -4.71536922454834, "logits_per_char": -0.6736241749354771, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 840, "native_id": "001cb999a61a5c8b4031ff53cf261714", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.065227508544922, "incorrect_loss_raw": 8.381958365440369, "correct_loss_per_char": 0.41304550170898435, "incorrect_loss_per_char": 0.8376671527113233, "correct_loss_per_token": 2.065227508544922, "incorrect_loss_per_token": 5.294233798980713, "correct_loss_uncond": -11.826799392700195, "incorrect_loss_uncond": -8.343441843986511}, "model_output": [{"sum_logits": -2.065227508544922, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": true, "sum_logits_uncond": -13.892026901245117, "logits_per_token": -2.065227508544922, "logits_per_char": -0.41304550170898435, "num_chars": 5}, {"sum_logits": -2.065227508544922, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": true, "sum_logits_uncond": -13.892026901245117, "logits_per_token": -2.065227508544922, "logits_per_char": -0.41304550170898435, "num_chars": 5}, {"sum_logits": -3.871077537536621, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.304367065429688, "logits_per_token": -1.9355387687683105, "logits_per_char": -0.48388469219207764, "num_chars": 8}, {"sum_logits": -20.830718994140625, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -24.751480102539062, "logits_per_token": -10.415359497070312, "logits_per_char": -1.4879084995814733, "num_chars": 14}, {"sum_logits": -6.760809421539307, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.953726768493652, "logits_per_token": -6.760809421539307, "logits_per_char": -0.9658299173627581, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 841, "native_id": "18ee7a93410a6b4c9cec5d4894775991_1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.9838024377822876, "incorrect_loss_raw": 10.182098388671875, "correct_loss_per_char": 0.2459506094455719, "incorrect_loss_per_char": 1.4602738949987624, "correct_loss_per_token": 0.9838024377822876, "incorrect_loss_per_token": 8.757511854171753, "correct_loss_uncond": -12.658395171165466, "incorrect_loss_uncond": -4.095841407775879}, "model_output": [{"sum_logits": -6.398224830627441, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.05156421661377, "logits_per_token": -6.398224830627441, "logits_per_char": -1.2796449661254883, "num_chars": 5}, {"sum_logits": -14.629305839538574, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.183414459228516, "logits_per_token": -14.629305839538574, "logits_per_char": -1.2191088199615479, "num_chars": 12}, {"sum_logits": -0.9838024377822876, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -13.642197608947754, "logits_per_token": -0.9838024377822876, "logits_per_char": -0.2459506094455719, "num_chars": 4}, {"sum_logits": -8.304170608520508, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.587641716003418, "logits_per_token": -8.304170608520508, "logits_per_char": -2.076042652130127, "num_chars": 4}, {"sum_logits": -11.396692276000977, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.289138793945312, "logits_per_token": -5.698346138000488, "logits_per_char": -1.2662991417778864, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 842, "native_id": "3b8be90fdd8c67571d8d692eaa6dd87b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.5993876457214355, "incorrect_loss_raw": 5.527444243431091, "correct_loss_per_char": 0.5499489704767863, "incorrect_loss_per_char": 0.7167452616350991, "correct_loss_per_token": 3.2996938228607178, "incorrect_loss_per_token": 4.471380591392517, "correct_loss_uncond": -12.199999332427979, "incorrect_loss_uncond": -9.155905604362488}, "model_output": [{"sum_logits": -8.448509216308594, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.40916633605957, "logits_per_token": -4.224254608154297, "logits_per_char": -0.5632339477539062, "num_chars": 15}, {"sum_logits": -3.0336523056030273, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.922264099121094, "logits_per_token": -3.0336523056030273, "logits_per_char": -0.3792065382003784, "num_chars": 8}, {"sum_logits": -7.110421180725098, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -9.769732475280762, "logits_per_token": -7.110421180725098, "logits_per_char": -1.4220842361450194, "num_chars": 5}, {"sum_logits": -6.5993876457214355, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.799386978149414, "logits_per_token": -3.2996938228607178, "logits_per_char": -0.5499489704767863, "num_chars": 12}, {"sum_logits": -3.5171942710876465, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.63223648071289, "logits_per_token": -3.5171942710876465, "logits_per_char": -0.5024563244410923, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 843, "native_id": "300bd7704ae8c5fcef618902f18fd01d", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.9558141231536865, "incorrect_loss_raw": 9.770849704742432, "correct_loss_per_char": 0.29558141231536866, "incorrect_loss_per_char": 0.818038673060281, "correct_loss_per_token": 0.9852713743845621, "incorrect_loss_per_token": 4.644771973292032, "correct_loss_uncond": -13.416504144668579, "incorrect_loss_uncond": -7.988371849060059}, "model_output": [{"sum_logits": -10.807201385498047, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.789691925048828, "logits_per_token": -5.403600692749023, "logits_per_char": -1.0807201385498046, "num_chars": 10}, {"sum_logits": -11.461431503295898, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.60476303100586, "logits_per_token": -5.730715751647949, "logits_per_char": -1.0419483184814453, "num_chars": 11}, {"sum_logits": -5.775669097900391, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.48796272277832, "logits_per_token": -1.9252230326334636, "logits_per_char": -0.3609793186187744, "num_chars": 16}, {"sum_logits": -2.9558141231536865, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.372318267822266, "logits_per_token": -0.9852713743845621, "logits_per_char": -0.29558141231536866, "num_chars": 10}, {"sum_logits": -11.03909683227539, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.154468536376953, "logits_per_token": -5.519548416137695, "logits_per_char": -0.7885069165910993, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 844, "native_id": "f18833ace65a54709377134168b457a9", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.984786510467529, "incorrect_loss_raw": 13.033531665802002, "correct_loss_per_char": 0.4153988758722941, "incorrect_loss_per_char": 1.0137432595094045, "correct_loss_per_token": 2.4923932552337646, "incorrect_loss_per_token": 5.9845506350199384, "correct_loss_uncond": -14.110918521881104, "incorrect_loss_uncond": -6.5803985595703125}, "model_output": [{"sum_logits": -12.23056411743164, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.54778289794922, "logits_per_token": -6.11528205871582, "logits_per_char": -0.7644102573394775, "num_chars": 16}, {"sum_logits": -9.520272254943848, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.82883071899414, "logits_per_token": -3.1734240849812827, "logits_per_char": -0.4760136127471924, "num_chars": 20}, {"sum_logits": -4.984786510467529, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.095705032348633, "logits_per_token": -2.4923932552337646, "logits_per_char": -0.4153988758722941, "num_chars": 12}, {"sum_logits": -23.600690841674805, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -26.32343101501465, "logits_per_token": -7.866896947224935, "logits_per_char": -1.9667242368062336, "num_chars": 12}, {"sum_logits": -6.782599449157715, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.75567626953125, "logits_per_token": -6.782599449157715, "logits_per_char": -0.8478249311447144, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 845, "native_id": "5bba03b425f5abc6e017f194cf074b06", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.42322063446045, "incorrect_loss_raw": 12.270744800567627, "correct_loss_per_char": 2.0705367724100747, "incorrect_loss_per_char": 1.3417954762776694, "correct_loss_per_token": 6.211610317230225, "incorrect_loss_per_token": 10.617601156234741, "correct_loss_uncond": -3.196404457092285, "incorrect_loss_uncond": -3.4046173095703125}, "model_output": [{"sum_logits": -15.740257263183594, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.160067558288574, "logits_per_token": -15.740257263183594, "logits_per_char": -1.5740257263183595, "num_chars": 10}, {"sum_logits": -12.42322063446045, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.619625091552734, "logits_per_token": -6.211610317230225, "logits_per_char": -2.0705367724100747, "num_chars": 6}, {"sum_logits": -5.290439605712891, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.742315292358398, "logits_per_token": -5.290439605712891, "logits_per_char": -1.0580879211425782, "num_chars": 5}, {"sum_logits": -14.827133178710938, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.560240745544434, "logits_per_token": -14.827133178710938, "logits_per_char": -1.8533916473388672, "num_chars": 8}, {"sum_logits": -13.225149154663086, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.23882484436035, "logits_per_token": -6.612574577331543, "logits_per_char": -0.8816766103108724, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 846, "native_id": "78276a4eab6e8d6b9ae3749211816977", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.363130569458008, "incorrect_loss_raw": 5.027373194694519, "correct_loss_per_char": 0.4363130569458008, "incorrect_loss_per_char": 0.6819867359267341, "correct_loss_per_token": 4.363130569458008, "incorrect_loss_per_token": 4.09328305721283, "correct_loss_uncond": -9.23535442352295, "incorrect_loss_uncond": -7.3439754247665405}, "model_output": [{"sum_logits": -7.472721099853516, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -14.08496379852295, "logits_per_token": -3.736360549926758, "logits_per_char": -0.8303023444281684, "num_chars": 9}, {"sum_logits": -7.95100212097168, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -12.654258728027344, "logits_per_token": -7.95100212097168, "logits_per_char": -0.99387526512146, "num_chars": 8}, {"sum_logits": -4.363130569458008, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -13.598484992980957, "logits_per_token": -4.363130569458008, "logits_per_char": -0.4363130569458008, "num_chars": 10}, {"sum_logits": -3.684232234954834, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -10.917222023010254, "logits_per_token": -3.684232234954834, "logits_per_char": -0.7368464469909668, "num_chars": 5}, {"sum_logits": -1.0015373229980469, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": true, "sum_logits_uncond": -11.828949928283691, "logits_per_token": -1.0015373229980469, "logits_per_char": -0.16692288716634116, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 847, "native_id": "cf33e0f5891ce53a716432be06a46ee1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.566852569580078, "incorrect_loss_raw": 9.982144474983215, "correct_loss_per_char": 1.0566852569580079, "incorrect_loss_per_char": 1.1103661139806111, "correct_loss_per_token": 10.566852569580078, "incorrect_loss_per_token": 9.982144474983215, "correct_loss_uncond": -2.993767738342285, "incorrect_loss_uncond": -4.095244288444519}, "model_output": [{"sum_logits": -7.238198757171631, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.581976890563965, "logits_per_token": -7.238198757171631, "logits_per_char": -0.8042443063524034, "num_chars": 9}, {"sum_logits": -11.887999534606934, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.711588859558105, "logits_per_token": -11.887999534606934, "logits_per_char": -0.6992940902709961, "num_chars": 17}, {"sum_logits": -10.566852569580078, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.560620307922363, "logits_per_token": -10.566852569580078, "logits_per_char": -1.0566852569580079, "num_chars": 10}, {"sum_logits": -7.048693656921387, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.818624496459961, "logits_per_token": -7.048693656921387, "logits_per_char": -1.4097387313842773, "num_chars": 5}, {"sum_logits": -13.75368595123291, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.197364807128906, "logits_per_token": -13.75368595123291, "logits_per_char": -1.5281873279147677, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 848, "native_id": "3938d6e50d38b1f8774b4f00a89bdb39", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.646706581115723, "incorrect_loss_raw": 10.869704842567444, "correct_loss_per_char": 0.3909827400656307, "incorrect_loss_per_char": 1.5676907996336622, "correct_loss_per_token": 2.2155688603719077, "incorrect_loss_per_token": 7.774410009384155, "correct_loss_uncond": -12.142956733703613, "incorrect_loss_uncond": -5.087711215019226}, "model_output": [{"sum_logits": -15.201912879943848, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.285333633422852, "logits_per_token": -7.600956439971924, "logits_per_char": -3.0403825759887697, "num_chars": 5}, {"sum_logits": -9.688471794128418, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.412413597106934, "logits_per_token": -9.688471794128418, "logits_per_char": -0.9688471794128418, "num_chars": 10}, {"sum_logits": -6.646706581115723, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.789663314819336, "logits_per_token": -2.2155688603719077, "logits_per_char": -0.3909827400656307, "num_chars": 17}, {"sum_logits": -11.418100357055664, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.303086280822754, "logits_per_token": -11.418100357055664, "logits_per_char": -1.903016726175944, "num_chars": 6}, {"sum_logits": -7.170334339141846, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.82883071899414, "logits_per_token": -2.3901114463806152, "logits_per_char": -0.3585167169570923, "num_chars": 20}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 849, "native_id": "cabefb7063a728e77abd44d97397a2a4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.418949127197266, "incorrect_loss_raw": 14.551490783691406, "correct_loss_per_char": 0.7849124272664388, "incorrect_loss_per_char": 2.3410443867955886, "correct_loss_per_token": 4.709474563598633, "incorrect_loss_per_token": 14.551490783691406, "correct_loss_uncond": -8.279346466064453, "incorrect_loss_uncond": 1.126899242401123}, "model_output": [{"sum_logits": -13.308869361877441, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.923977851867676, "logits_per_token": -13.308869361877441, "logits_per_char": -3.3272173404693604, "num_chars": 4}, {"sum_logits": -17.186763763427734, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.985567092895508, "logits_per_token": -17.186763763427734, "logits_per_char": -2.4552519662039622, "num_chars": 7}, {"sum_logits": -9.418949127197266, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.69829559326172, "logits_per_token": -4.709474563598633, "logits_per_char": -0.7849124272664388, "num_chars": 12}, {"sum_logits": -15.550201416015625, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -15.550201416015625, "logits_per_char": -1.5550201416015625, "num_chars": 10}, {"sum_logits": -12.160128593444824, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.054313659667969, "logits_per_token": -12.160128593444824, "logits_per_char": -2.0266880989074707, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 850, "native_id": "60b909ad1d7956218a5d99954fdebecd", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.307478904724121, "incorrect_loss_raw": 7.20280647277832, "correct_loss_per_char": 0.6153541292463031, "incorrect_loss_per_char": 0.8627066607513125, "correct_loss_per_token": 4.307478904724121, "incorrect_loss_per_token": 4.303768634796143, "correct_loss_uncond": -11.32475757598877, "incorrect_loss_uncond": -9.076004266738892}, "model_output": [{"sum_logits": -6.346549034118652, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.549616813659668, "logits_per_token": -3.173274517059326, "logits_per_char": -0.7051721149020724, "num_chars": 9}, {"sum_logits": -5.126458168029785, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.095869064331055, "logits_per_token": -2.5632290840148926, "logits_per_char": -0.6408072710037231, "num_chars": 8}, {"sum_logits": -5.618923187255859, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.071504592895508, "logits_per_token": -5.618923187255859, "logits_per_char": -0.8027033124651227, "num_chars": 7}, {"sum_logits": -4.307478904724121, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.63223648071289, "logits_per_token": -4.307478904724121, "logits_per_char": -0.6153541292463031, "num_chars": 7}, {"sum_logits": -11.719295501708984, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.398252487182617, "logits_per_token": -5.859647750854492, "logits_per_char": -1.3021439446343317, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 851, "native_id": "9fdebd1c2cf498f1d726a025b780a39a", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.070625305175781, "incorrect_loss_raw": 9.979795455932617, "correct_loss_per_char": 0.8246023004705255, "incorrect_loss_per_char": 0.927049403389295, "correct_loss_per_token": 3.0235417683919272, "incorrect_loss_per_token": 4.629169424374899, "correct_loss_uncond": -8.221920013427734, "incorrect_loss_uncond": -7.418217420578003}, "model_output": [{"sum_logits": -9.070625305175781, "num_tokens": 3, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -17.292545318603516, "logits_per_token": -3.0235417683919272, "logits_per_char": -0.8246023004705255, "num_chars": 11}, {"sum_logits": -12.04349422454834, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -20.21933364868164, "logits_per_token": -6.02174711227417, "logits_per_char": -1.0036245187123616, "num_chars": 12}, {"sum_logits": -8.078925132751465, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -18.075977325439453, "logits_per_token": -4.039462566375732, "logits_per_char": -0.5049328207969666, "num_chars": 16}, {"sum_logits": -8.657479286193848, "num_tokens": 3, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -15.835020065307617, "logits_per_token": -2.8858264287312827, "logits_per_char": -0.9619421429104276, "num_chars": 9}, {"sum_logits": -11.139283180236816, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -15.46172046661377, "logits_per_token": -5.569641590118408, "logits_per_char": -1.237698131137424, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 852, "native_id": "f36027954e43cfd926451bdf7cb0c3ac", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.080622673034668, "incorrect_loss_raw": 15.572545289993286, "correct_loss_per_char": 0.6215863594642053, "incorrect_loss_per_char": 1.286473143642599, "correct_loss_per_token": 2.693540891011556, "incorrect_loss_per_token": 6.58859646320343, "correct_loss_uncond": -10.47901439666748, "incorrect_loss_uncond": -5.284844160079956}, "model_output": [{"sum_logits": -19.162818908691406, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -27.014991760253906, "logits_per_token": -4.790704727172852, "logits_per_char": -0.9581409454345703, "num_chars": 20}, {"sum_logits": -8.080622673034668, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.55963706970215, "logits_per_token": -2.693540891011556, "logits_per_char": -0.6215863594642053, "num_chars": 13}, {"sum_logits": -18.29424285888672, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.62407112121582, "logits_per_token": -9.14712142944336, "logits_per_char": -1.4072494506835938, "num_chars": 13}, {"sum_logits": -9.49337387084961, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.22709083557129, "logits_per_token": -4.746686935424805, "logits_per_char": -0.8630339882590554, "num_chars": 11}, {"sum_logits": -15.33974552154541, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.563404083251953, "logits_per_token": -7.669872760772705, "logits_per_char": -1.9174681901931763, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 853, "native_id": "7ec14907622c6d5a6087cd59a22d8c9d", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.490758895874023, "incorrect_loss_raw": 9.313881278038025, "correct_loss_per_char": 0.4991598996249112, "incorrect_loss_per_char": 0.9569446058063716, "correct_loss_per_token": 2.7453794479370117, "incorrect_loss_per_token": 6.2562782764434814, "correct_loss_uncond": -15.698129653930664, "incorrect_loss_uncond": -6.6670321226119995}, "model_output": [{"sum_logits": -9.71722412109375, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.393335342407227, "logits_per_token": -4.858612060546875, "logits_per_char": -0.6940874372209821, "num_chars": 14}, {"sum_logits": -14.743599891662598, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.85931396484375, "logits_per_token": -7.371799945831299, "logits_per_char": -1.1341230685894306, "num_chars": 13}, {"sum_logits": -7.213645935058594, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.77853012084961, "logits_per_token": -7.213645935058594, "logits_per_char": -1.2022743225097656, "num_chars": 6}, {"sum_logits": -5.490758895874023, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.188888549804688, "logits_per_token": -2.7453794479370117, "logits_per_char": -0.4991598996249112, "num_chars": 11}, {"sum_logits": -5.581055164337158, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.892474174499512, "logits_per_token": -5.581055164337158, "logits_per_char": -0.7972935949053083, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 854, "native_id": "efe488f67b53a4b6e69782c01c84f06c", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.541312217712402, "incorrect_loss_raw": 5.957689166069031, "correct_loss_per_char": 1.3082624435424806, "incorrect_loss_per_char": 0.7840148732775734, "correct_loss_per_token": 6.541312217712402, "incorrect_loss_per_token": 4.630520820617676, "correct_loss_uncond": -5.894695281982422, "incorrect_loss_uncond": -8.813365578651428}, "model_output": [{"sum_logits": -10.61734676361084, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.10889434814453, "logits_per_token": -5.30867338180542, "logits_per_char": -0.707823117574056, "num_chars": 15}, {"sum_logits": -2.474792003631592, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -2.474792003631592, "logits_per_char": -0.49495840072631836, "num_chars": 5}, {"sum_logits": -6.541312217712402, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.436007499694824, "logits_per_token": -6.541312217712402, "logits_per_char": -1.3082624435424806, "num_chars": 5}, {"sum_logits": -3.752798080444336, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.892474174499512, "logits_per_token": -3.752798080444336, "logits_per_char": -0.536114011492048, "num_chars": 7}, {"sum_logits": -6.9858198165893555, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -6.9858198165893555, "logits_per_char": -1.3971639633178712, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 855, "native_id": "7c62637437ad7515452886074010a438", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.425698280334473, "incorrect_loss_raw": 13.831650376319885, "correct_loss_per_char": 0.7854748566945394, "incorrect_loss_per_char": 0.9275835020975634, "correct_loss_per_token": 4.712849140167236, "incorrect_loss_per_token": 4.399154047171274, "correct_loss_uncond": -9.474501609802246, "incorrect_loss_uncond": -6.814086079597473}, "model_output": [{"sum_logits": -9.425698280334473, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.90019989013672, "logits_per_token": -4.712849140167236, "logits_per_char": -0.7854748566945394, "num_chars": 12}, {"sum_logits": -8.294928550720215, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.580815315246582, "logits_per_token": -2.764976183573405, "logits_per_char": -0.7540844137018378, "num_chars": 11}, {"sum_logits": -24.315750122070312, "num_tokens": 7, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -30.454418182373047, "logits_per_token": -3.4736785888671875, "logits_per_char": -0.8105250040690104, "num_chars": 30}, {"sum_logits": -7.552061557769775, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.465166091918945, "logits_per_token": -3.7760307788848877, "logits_per_char": -0.6293384631474813, "num_chars": 12}, {"sum_logits": -15.163861274719238, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.08254623413086, "logits_per_token": -7.581930637359619, "logits_per_char": -1.516386127471924, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 856, "native_id": "4f7be1c68654e2924c161c8eca652928", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.47940444946289, "incorrect_loss_raw": 7.487178921699524, "correct_loss_per_char": 0.7708549499511719, "incorrect_loss_per_char": 1.0278455757432514, "correct_loss_per_token": 4.239702224731445, "incorrect_loss_per_token": 4.259602636098862, "correct_loss_uncond": -9.403072357177734, "incorrect_loss_uncond": -8.201165318489075}, "model_output": [{"sum_logits": -12.440513610839844, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.746070861816406, "logits_per_token": -6.220256805419922, "logits_per_char": -1.382279290093316, "num_chars": 9}, {"sum_logits": -8.47940444946289, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.882476806640625, "logits_per_token": -4.239702224731445, "logits_per_char": -0.7708549499511719, "num_chars": 11}, {"sum_logits": -5.895498752593994, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.165645599365234, "logits_per_token": -1.4738746881484985, "logits_per_char": -0.39303325017293295, "num_chars": 15}, {"sum_logits": -7.075854778289795, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.619281768798828, "logits_per_token": -7.075854778289795, "logits_per_char": -1.7689636945724487, "num_chars": 4}, {"sum_logits": -4.536848545074463, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.222378730773926, "logits_per_token": -2.2684242725372314, "logits_per_char": -0.5671060681343079, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 857, "native_id": "e4976ee741cf4b28b8a42780ffb15774", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.946861267089844, "incorrect_loss_raw": 11.885305881500244, "correct_loss_per_char": 0.8829845852322049, "incorrect_loss_per_char": 1.0933324593764084, "correct_loss_per_token": 7.946861267089844, "incorrect_loss_per_token": 9.8226797580719, "correct_loss_uncond": -6.3449296951293945, "incorrect_loss_uncond": -3.0643463134765625}, "model_output": [{"sum_logits": -9.918062210083008, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.973995208740234, "logits_per_token": -9.918062210083008, "logits_per_char": -0.9918062210083007, "num_chars": 10}, {"sum_logits": -7.946861267089844, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.291790962219238, "logits_per_token": -7.946861267089844, "logits_per_char": -0.8829845852322049, "num_chars": 9}, {"sum_logits": -10.968779563903809, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.455964088439941, "logits_per_token": -10.968779563903809, "logits_per_char": -1.0968779563903808, "num_chars": 10}, {"sum_logits": -10.153372764587402, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.412413597106934, "logits_per_token": -10.153372764587402, "logits_per_char": -1.0153372764587403, "num_chars": 10}, {"sum_logits": -16.501008987426758, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.956235885620117, "logits_per_token": -8.250504493713379, "logits_per_char": -1.2693083836482122, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 858, "native_id": "14e75a42a416d32a24e2826cae34d2bf", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.398224830627441, "incorrect_loss_raw": 15.139475584030151, "correct_loss_per_char": 0.7398224830627441, "incorrect_loss_per_char": 1.708125857150916, "correct_loss_per_token": 3.6991124153137207, "incorrect_loss_per_token": 9.464009165763855, "correct_loss_uncond": -7.634651184082031, "incorrect_loss_uncond": -2.548490524291992}, "model_output": [{"sum_logits": -15.154170989990234, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -15.154170989990234, "logits_per_char": -2.5256951649983725, "num_chars": 6}, {"sum_logits": -11.405797958374023, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.813737869262695, "logits_per_token": -5.702898979187012, "logits_per_char": -1.0368907234885476, "num_chars": 11}, {"sum_logits": -14.286337852478027, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.357776641845703, "logits_per_token": -7.143168926239014, "logits_per_char": -1.2987579865889116, "num_chars": 11}, {"sum_logits": -19.71159553527832, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.50895881652832, "logits_per_token": -9.85579776763916, "logits_per_char": -1.9711595535278321, "num_chars": 10}, {"sum_logits": -7.398224830627441, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.032876014709473, "logits_per_token": -3.6991124153137207, "logits_per_char": -0.7398224830627441, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 859, "native_id": "004607228ad49b69eac932c1005d6106", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 14.68859577178955, "incorrect_loss_raw": 12.156621932983398, "correct_loss_per_char": 0.9792397181193034, "incorrect_loss_per_char": 1.2964874912822058, "correct_loss_per_token": 4.896198590596517, "incorrect_loss_per_token": 8.830484390258789, "correct_loss_uncond": -6.385581016540527, "incorrect_loss_uncond": -3.291571617126465}, "model_output": [{"sum_logits": -14.347977638244629, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.906111717224121, "logits_per_token": -7.1739888191223145, "logits_per_char": -1.434797763824463, "num_chars": 10}, {"sum_logits": -12.261122703552246, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.095705032348633, "logits_per_token": -6.130561351776123, "logits_per_char": -1.0217602252960205, "num_chars": 12}, {"sum_logits": -14.68859577178955, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.074176788330078, "logits_per_token": -4.896198590596517, "logits_per_char": -0.9792397181193034, "num_chars": 15}, {"sum_logits": -8.91499137878418, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.181896209716797, "logits_per_token": -8.91499137878418, "logits_per_char": -1.2735701969691686, "num_chars": 7}, {"sum_logits": -13.102396011352539, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.609061241149902, "logits_per_token": -13.102396011352539, "logits_per_char": -1.455821779039171, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 860, "native_id": "a7f54ee1866d5db34eacf40efa53c93e", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.823493003845215, "incorrect_loss_raw": 12.930140018463135, "correct_loss_per_char": 1.164698600769043, "incorrect_loss_per_char": 1.9114014714483232, "correct_loss_per_token": 5.823493003845215, "incorrect_loss_per_token": 11.157720804214478, "correct_loss_uncond": -5.899905204772949, "incorrect_loss_uncond": -1.97230863571167}, "model_output": [{"sum_logits": -12.047651290893555, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.922264099121094, "logits_per_token": -12.047651290893555, "logits_per_char": -1.5059564113616943, "num_chars": 8}, {"sum_logits": -16.88608169555664, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.241706848144531, "logits_per_token": -16.88608169555664, "logits_per_char": -2.41229738507952, "num_chars": 7}, {"sum_logits": -5.823493003845215, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.723398208618164, "logits_per_token": -5.823493003845215, "logits_per_char": -1.164698600769043, "num_chars": 5}, {"sum_logits": -8.607473373413086, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.993368148803711, "logits_per_token": -8.607473373413086, "logits_per_char": -2.1518683433532715, "num_chars": 4}, {"sum_logits": -14.179353713989258, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.452455520629883, "logits_per_token": -7.089676856994629, "logits_per_char": -1.5754837459988065, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 861, "native_id": "e56c56c3cfe50ba0c787c2bd67255be8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.6845054626464844, "incorrect_loss_raw": 8.137399673461914, "correct_loss_per_char": 0.46056318283081055, "incorrect_loss_per_char": 1.4622124092919488, "correct_loss_per_token": 3.6845054626464844, "incorrect_loss_per_token": 8.137399673461914, "correct_loss_uncond": -3.9356632232666016, "incorrect_loss_uncond": -2.671647548675537}, "model_output": [{"sum_logits": -10.640802383422852, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.090980529785156, "logits_per_token": -10.640802383422852, "logits_per_char": -2.1281604766845703, "num_chars": 5}, {"sum_logits": -7.255008697509766, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.099248886108398, "logits_per_token": -7.255008697509766, "logits_per_char": -1.0364298139299666, "num_chars": 7}, {"sum_logits": -5.514703750610352, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -10.483684539794922, "logits_per_token": -5.514703750610352, "logits_per_char": -1.378675937652588, "num_chars": 4}, {"sum_logits": -3.6845054626464844, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -7.620168685913086, "logits_per_token": -3.6845054626464844, "logits_per_char": -0.46056318283081055, "num_chars": 8}, {"sum_logits": -9.139083862304688, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -8.562274932861328, "logits_per_token": -9.139083862304688, "logits_per_char": -1.3055834089006697, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 862, "native_id": "6f48ee564a48293eb501cc0d8197bdd9", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.686202049255371, "incorrect_loss_raw": 9.707527756690979, "correct_loss_per_char": 1.1143670082092285, "incorrect_loss_per_char": 1.1451729731427298, "correct_loss_per_token": 6.686202049255371, "incorrect_loss_per_token": 7.911970734596252, "correct_loss_uncond": -8.986549377441406, "incorrect_loss_uncond": -5.570254683494568}, "model_output": [{"sum_logits": -13.232747077941895, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.633559226989746, "logits_per_token": -13.232747077941895, "logits_per_char": -1.3232747077941895, "num_chars": 10}, {"sum_logits": -7.802867412567139, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.75567626953125, "logits_per_token": -7.802867412567139, "logits_per_char": -0.9753584265708923, "num_chars": 8}, {"sum_logits": -6.686202049255371, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.672751426696777, "logits_per_token": -6.686202049255371, "logits_per_char": -1.1143670082092285, "num_chars": 6}, {"sum_logits": -14.364456176757812, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.345714569091797, "logits_per_token": -7.182228088378906, "logits_per_char": -1.5960506863064237, "num_chars": 9}, {"sum_logits": -3.4300403594970703, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.376179695129395, "logits_per_token": -3.4300403594970703, "logits_per_char": -0.686008071899414, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 863, "native_id": "13d2a103abbed930cabc9567a1ba12f2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.808589458465576, "incorrect_loss_raw": 7.055909514427185, "correct_loss_per_char": 0.4468145737281212, "incorrect_loss_per_char": 1.0101011558184547, "correct_loss_per_token": 2.904294729232788, "incorrect_loss_per_token": 6.398031413555145, "correct_loss_uncond": -12.44532060623169, "incorrect_loss_uncond": -7.813398480415344}, "model_output": [{"sum_logits": -9.95242977142334, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.77156925201416, "logits_per_token": -9.95242977142334, "logits_per_char": -1.4217756816319056, "num_chars": 7}, {"sum_logits": -7.278443336486816, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.625797271728516, "logits_per_token": -7.278443336486816, "logits_per_char": -1.4556886672973632, "num_chars": 5}, {"sum_logits": -5.263024806976318, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.816247940063477, "logits_per_token": -2.631512403488159, "logits_per_char": -0.5263024806976319, "num_chars": 10}, {"sum_logits": -5.729740142822266, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.263617515563965, "logits_per_token": -5.729740142822266, "logits_per_char": -0.6366377936469184, "num_chars": 9}, {"sum_logits": -5.808589458465576, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.253910064697266, "logits_per_token": -2.904294729232788, "logits_per_char": -0.4468145737281212, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 864, "native_id": "0c1efb38e023ee9725486fbec4f2d797", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.6135120391845703, "incorrect_loss_raw": 9.409871339797974, "correct_loss_per_char": 0.3733588627406529, "incorrect_loss_per_char": 1.0121247992887126, "correct_loss_per_token": 2.6135120391845703, "incorrect_loss_per_token": 8.24010157585144, "correct_loss_uncond": -9.907217025756836, "incorrect_loss_uncond": -5.654129266738892}, "model_output": [{"sum_logits": -9.22258472442627, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.363737106323242, "logits_per_token": -9.22258472442627, "logits_per_char": -1.317512103489467, "num_chars": 7}, {"sum_logits": -7.959668159484863, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.892474174499512, "logits_per_token": -7.959668159484863, "logits_per_char": -1.1370954513549805, "num_chars": 7}, {"sum_logits": -11.099074363708496, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.953621864318848, "logits_per_token": -11.099074363708496, "logits_per_char": -1.009006760337136, "num_chars": 11}, {"sum_logits": -2.6135120391845703, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.520729064941406, "logits_per_token": -2.6135120391845703, "logits_per_char": -0.3733588627406529, "num_chars": 7}, {"sum_logits": -9.358158111572266, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.04616928100586, "logits_per_token": -4.679079055786133, "logits_per_char": -0.5848848819732666, "num_chars": 16}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 865, "native_id": "b7ab4a5e0c19a98f41cd1ba3176f2dff", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.260814666748047, "incorrect_loss_raw": 10.522074937820435, "correct_loss_per_char": 0.7260814666748047, "incorrect_loss_per_char": 1.4007498800754548, "correct_loss_per_token": 7.260814666748047, "incorrect_loss_per_token": 7.822440505027771, "correct_loss_uncond": -6.418561935424805, "incorrect_loss_uncond": -2.989654541015625}, "model_output": [{"sum_logits": -8.884963989257812, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.591012001037598, "logits_per_token": -8.884963989257812, "logits_per_char": -0.8884963989257812, "num_chars": 10}, {"sum_logits": -12.255334854125977, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.623748779296875, "logits_per_token": -6.127667427062988, "logits_per_char": -1.2255334854125977, "num_chars": 10}, {"sum_logits": -7.260814666748047, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.679376602172852, "logits_per_token": -7.260814666748047, "logits_per_char": -0.7260814666748047, "num_chars": 10}, {"sum_logits": -9.341740608215332, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.826271057128906, "logits_per_token": -4.670870304107666, "logits_per_char": -1.1677175760269165, "num_chars": 8}, {"sum_logits": -11.606260299682617, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -11.00588607788086, "logits_per_token": -11.606260299682617, "logits_per_char": -2.3212520599365236, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 866, "native_id": "8bcbb5098876940b2382db3a9a0b1beb", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.916767120361328, "incorrect_loss_raw": 11.244559526443481, "correct_loss_per_char": 0.909730593363444, "incorrect_loss_per_char": 0.8645964656041298, "correct_loss_per_token": 3.638922373453776, "incorrect_loss_per_token": 5.265806436538696, "correct_loss_uncond": -6.953973770141602, "incorrect_loss_uncond": -5.02737021446228}, "model_output": [{"sum_logits": -8.555359840393066, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.617439270019531, "logits_per_token": -2.8517866134643555, "logits_per_char": -0.7777599854902788, "num_chars": 11}, {"sum_logits": -15.131402969360352, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.04989242553711, "logits_per_token": -7.565701484680176, "logits_per_char": -1.0087601979573568, "num_chars": 15}, {"sum_logits": -8.603610038757324, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.714458465576172, "logits_per_token": -4.301805019378662, "logits_per_char": -0.6145435741969517, "num_chars": 14}, {"sum_logits": -10.916767120361328, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.87074089050293, "logits_per_token": -3.638922373453776, "logits_per_char": -0.909730593363444, "num_chars": 12}, {"sum_logits": -12.687865257263184, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.705928802490234, "logits_per_token": -6.343932628631592, "logits_per_char": -1.057322104771932, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 867, "native_id": "c7ce02d9365fe9275f88338ad51cbde6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.393357992172241, "incorrect_loss_raw": 12.897374391555786, "correct_loss_per_char": 0.29916974902153015, "incorrect_loss_per_char": 1.3042127915791102, "correct_loss_per_token": 2.393357992172241, "incorrect_loss_per_token": 8.85099971294403, "correct_loss_uncond": -11.483213663101196, "incorrect_loss_uncond": -3.64404296875}, "model_output": [{"sum_logits": -2.393357992172241, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": true, "sum_logits_uncond": -13.876571655273438, "logits_per_token": -2.393357992172241, "logits_per_char": -0.29916974902153015, "num_chars": 8}, {"sum_logits": -18.80211639404297, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.841018676757812, "logits_per_token": -9.401058197021484, "logits_per_char": -1.0445620218912761, "num_chars": 18}, {"sum_logits": -9.107953071594238, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.23271656036377, "logits_per_token": -9.107953071594238, "logits_per_char": -1.5179921785990398, "num_chars": 6}, {"sum_logits": -10.110547065734863, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.082622528076172, "logits_per_token": -10.110547065734863, "logits_per_char": -1.6850911776224773, "num_chars": 6}, {"sum_logits": -13.568881034851074, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.00931167602539, "logits_per_token": -6.784440517425537, "logits_per_char": -0.9692057882036481, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 868, "native_id": "fb54a118d46b2776e435d411ae3dd9c8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.027143478393555, "incorrect_loss_raw": 11.066318988800049, "correct_loss_per_char": 0.6283929347991943, "incorrect_loss_per_char": 1.3409820758379425, "correct_loss_per_token": 2.5135717391967773, "incorrect_loss_per_token": 8.403386354446411, "correct_loss_uncond": -8.036157608032227, "incorrect_loss_uncond": -5.943977355957031}, "model_output": [{"sum_logits": -10.437841415405273, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.030803680419922, "logits_per_token": -10.437841415405273, "logits_per_char": -1.1597601572672527, "num_chars": 9}, {"sum_logits": -11.842735290527344, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.083843231201172, "logits_per_token": -5.921367645263672, "logits_per_char": -0.9109796377328726, "num_chars": 13}, {"sum_logits": -9.460725784301758, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.913827896118164, "logits_per_token": -4.730362892150879, "logits_per_char": -0.7883938153584799, "num_chars": 12}, {"sum_logits": -5.027143478393555, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.063301086425781, "logits_per_token": -2.5135717391967773, "logits_per_char": -0.6283929347991943, "num_chars": 8}, {"sum_logits": -12.52397346496582, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.012710571289062, "logits_per_token": -12.52397346496582, "logits_per_char": -2.504794692993164, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 869, "native_id": "2c13e6d61e3733db90a9fd22d72b3337", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.9731802940368652, "incorrect_loss_raw": 8.59462857246399, "correct_loss_per_char": 0.2477650245030721, "incorrect_loss_per_char": 0.8136258426338735, "correct_loss_per_token": 1.4865901470184326, "incorrect_loss_per_token": 4.965413212776184, "correct_loss_uncond": -15.80879259109497, "incorrect_loss_uncond": -8.960812091827393}, "model_output": [{"sum_logits": -11.348350524902344, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.06200408935547, "logits_per_token": -5.674175262451172, "logits_per_char": -0.8105964660644531, "num_chars": 14}, {"sum_logits": -5.344791412353516, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.197783470153809, "logits_per_token": -5.344791412353516, "logits_per_char": -0.6680989265441895, "num_chars": 8}, {"sum_logits": -2.9731802940368652, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -18.781972885131836, "logits_per_token": -1.4865901470184326, "logits_per_char": -0.2477650245030721, "num_chars": 12}, {"sum_logits": -8.318319320678711, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.11066436767578, "logits_per_token": -4.1591596603393555, "logits_per_char": -0.9242577022976346, "num_chars": 9}, {"sum_logits": -9.367053031921387, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.85131072998047, "logits_per_token": -4.683526515960693, "logits_per_char": -0.8515502756292169, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 870, "native_id": "350292ae429060a00ff2cf64d71558e4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.612018585205078, "incorrect_loss_raw": 8.487722873687744, "correct_loss_per_char": 0.5437156132289341, "incorrect_loss_per_char": 1.1801927089691162, "correct_loss_per_token": 3.806009292602539, "incorrect_loss_per_token": 8.487722873687744, "correct_loss_uncond": -10.952045440673828, "incorrect_loss_uncond": -5.57607626914978}, "model_output": [{"sum_logits": -10.410540580749512, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.973722457885742, "logits_per_token": -10.410540580749512, "logits_per_char": -0.8675450483957926, "num_chars": 12}, {"sum_logits": -4.237451553344727, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -11.551885604858398, "logits_per_token": -4.237451553344727, "logits_per_char": -1.0593628883361816, "num_chars": 4}, {"sum_logits": -7.111727714538574, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.17077922821045, "logits_per_token": -7.111727714538574, "logits_per_char": -1.7779319286346436, "num_chars": 4}, {"sum_logits": -12.191171646118164, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.558809280395508, "logits_per_token": -12.191171646118164, "logits_per_char": -1.015930970509847, "num_chars": 12}, {"sum_logits": -7.612018585205078, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.564064025878906, "logits_per_token": -3.806009292602539, "logits_per_char": -0.5437156132289341, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 871, "native_id": "179fff4b5928e5ac3d3ae3e1db782547", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.73676061630249, "incorrect_loss_raw": 16.201069355010986, "correct_loss_per_char": 0.4811971868787493, "incorrect_loss_per_char": 1.3648307949304581, "correct_loss_per_token": 3.368380308151245, "incorrect_loss_per_token": 7.219212571779887, "correct_loss_uncond": -13.174488544464111, "incorrect_loss_uncond": -6.313846826553345}, "model_output": [{"sum_logits": -14.817017555236816, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -22.902904510498047, "logits_per_token": -7.408508777618408, "logits_per_char": -0.926063597202301, "num_chars": 16}, {"sum_logits": -20.05605697631836, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -28.396303176879883, "logits_per_token": -6.685352325439453, "logits_per_char": -1.6713380813598633, "num_chars": 12}, {"sum_logits": -7.2088823318481445, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.388150215148926, "logits_per_token": -7.2088823318481445, "logits_per_char": -1.4417764663696289, "num_chars": 5}, {"sum_logits": -6.73676061630249, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.9112491607666, "logits_per_token": -3.368380308151245, "logits_per_char": -0.4811971868787493, "num_chars": 14}, {"sum_logits": -22.722320556640625, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -24.37230682373047, "logits_per_token": -7.574106852213542, "logits_per_char": -1.420145034790039, "num_chars": 16}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 872, "native_id": "81cc0d320488c7bacafb285cf7db5fbd", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.5610885620117188, "incorrect_loss_raw": 7.104418158531189, "correct_loss_per_char": 0.29675738016764325, "incorrect_loss_per_char": 0.7591561092398107, "correct_loss_per_token": 3.5610885620117188, "incorrect_loss_per_token": 6.094143509864807, "correct_loss_uncond": -11.412633895874023, "incorrect_loss_uncond": -7.603135466575623}, "model_output": [{"sum_logits": -7.456866264343262, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -7.456866264343262, "logits_per_char": -0.9321082830429077, "num_chars": 8}, {"sum_logits": -3.5610885620117188, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.973722457885742, "logits_per_token": -3.5610885620117188, "logits_per_char": -0.29675738016764325, "num_chars": 12}, {"sum_logits": -6.061647891998291, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.7302303314209, "logits_per_token": -2.0205492973327637, "logits_per_char": -0.3788529932498932, "num_chars": 16}, {"sum_logits": -6.458110809326172, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.321028709411621, "logits_per_token": -6.458110809326172, "logits_per_char": -1.076351801554362, "num_chars": 6}, {"sum_logits": -8.441047668457031, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.202109336853027, "logits_per_token": -8.441047668457031, "logits_per_char": -0.6493113591120794, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 873, "native_id": "26c8a7165d0ed7250b9328f90d83ba83", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.429478645324707, "incorrect_loss_raw": 16.776554584503174, "correct_loss_per_char": 0.6286319096883138, "incorrect_loss_per_char": 1.2296169271002282, "correct_loss_per_token": 3.143159548441569, "incorrect_loss_per_token": 6.048969626426697, "correct_loss_uncond": -8.994515419006348, "incorrect_loss_uncond": -1.368971824645996}, "model_output": [{"sum_logits": -14.422605514526367, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.63493537902832, "logits_per_token": -3.605651378631592, "logits_per_char": -1.109431193425105, "num_chars": 13}, {"sum_logits": -18.174137115478516, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.17990493774414, "logits_per_token": -9.087068557739258, "logits_per_char": -1.6521942832253196, "num_chars": 11}, {"sum_logits": -12.894777297973633, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.848461151123047, "logits_per_token": -4.298259099324544, "logits_per_char": -0.805923581123352, "num_chars": 16}, {"sum_logits": -9.429478645324707, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.423994064331055, "logits_per_token": -3.143159548441569, "logits_per_char": -0.6286319096883138, "num_chars": 15}, {"sum_logits": -21.61469841003418, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.918804168701172, "logits_per_token": -7.2048994700113935, "logits_per_char": -1.3509186506271362, "num_chars": 16}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 874, "native_id": "636fc69dee35cd357b4191b47e64d0e5", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.133977890014648, "incorrect_loss_raw": 13.592984199523926, "correct_loss_per_char": 1.391747236251831, "incorrect_loss_per_char": 1.4820460253291659, "correct_loss_per_token": 11.133977890014648, "incorrect_loss_per_token": 10.22411584854126, "correct_loss_uncond": -5.137632369995117, "incorrect_loss_uncond": -2.348971128463745}, "model_output": [{"sum_logits": -13.414518356323242, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.485124588012695, "logits_per_token": -13.414518356323242, "logits_per_char": -1.6768147945404053, "num_chars": 8}, {"sum_logits": -14.006471633911133, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.317315101623535, "logits_per_token": -14.006471633911133, "logits_per_char": -1.556274625990126, "num_chars": 9}, {"sum_logits": -11.072998046875, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.656585693359375, "logits_per_token": -5.5364990234375, "logits_per_char": -1.1072998046875, "num_chars": 10}, {"sum_logits": -15.877948760986328, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.308795928955078, "logits_per_token": -7.938974380493164, "logits_per_char": -1.587794876098633, "num_chars": 10}, {"sum_logits": -11.133977890014648, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.271610260009766, "logits_per_token": -11.133977890014648, "logits_per_char": -1.391747236251831, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 875, "native_id": "f0c4622a082eb9ad0690dd36dcf61297", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.716444969177246, "incorrect_loss_raw": 13.154116153717041, "correct_loss_per_char": 0.581096331278483, "incorrect_loss_per_char": 1.1141794919967651, "correct_loss_per_token": 4.358222484588623, "incorrect_loss_per_token": 5.503771114349365, "correct_loss_uncond": -11.837479591369629, "incorrect_loss_uncond": -7.280959129333496}, "model_output": [{"sum_logits": -16.42188262939453, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.65846824645996, "logits_per_token": -8.210941314697266, "logits_per_char": -1.368490219116211, "num_chars": 12}, {"sum_logits": -2.891740322113037, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.14124870300293, "logits_per_token": -2.891740322113037, "logits_per_char": -0.7229350805282593, "num_chars": 4}, {"sum_logits": -5.314793109893799, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -10.994718551635742, "logits_per_token": -5.314793109893799, "logits_per_char": -1.3286982774734497, "num_chars": 4}, {"sum_logits": -27.988048553466797, "num_tokens": 5, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -37.945865631103516, "logits_per_token": -5.5976097106933596, "logits_per_char": -1.0365943908691406, "num_chars": 27}, {"sum_logits": -8.716444969177246, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.553924560546875, "logits_per_token": -4.358222484588623, "logits_per_char": -0.581096331278483, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 876, "native_id": "4499ebd5e8188b0d5fdef6afd893017a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.09134840965271, "incorrect_loss_raw": 5.604387700557709, "correct_loss_per_char": 0.618269681930542, "incorrect_loss_per_char": 0.6301888624827068, "correct_loss_per_token": 3.09134840965271, "incorrect_loss_per_token": 4.608791351318359, "correct_loss_uncond": -7.9267542362213135, "incorrect_loss_uncond": -8.384796679019928}, "model_output": [{"sum_logits": -7.966109275817871, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.03420352935791, "logits_per_token": -7.966109275817871, "logits_per_char": -0.8851232528686523, "num_chars": 9}, {"sum_logits": -2.4174787998199463, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": true, "sum_logits_uncond": -12.480264663696289, "logits_per_token": -2.4174787998199463, "logits_per_char": -0.4029131333033244, "num_chars": 6}, {"sum_logits": -7.964770793914795, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.686844825744629, "logits_per_token": -3.9823853969573975, "logits_per_char": -0.7240700721740723, "num_chars": 11}, {"sum_logits": -4.069191932678223, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.755424499511719, "logits_per_token": -4.069191932678223, "logits_per_char": -0.5086489915847778, "num_chars": 8}, {"sum_logits": -3.09134840965271, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.018102645874023, "logits_per_token": -3.09134840965271, "logits_per_char": -0.618269681930542, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 877, "native_id": "230cc491829307e8edb5423c8d09f945", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.937873840332031, "incorrect_loss_raw": 16.18611192703247, "correct_loss_per_char": 0.7291915893554688, "incorrect_loss_per_char": 1.4315982276743109, "correct_loss_per_token": 3.6459579467773438, "incorrect_loss_per_token": 6.840540409088135, "correct_loss_uncond": -9.07142448425293, "incorrect_loss_uncond": -4.93938684463501}, "model_output": [{"sum_logits": -15.156089782714844, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.221446990966797, "logits_per_token": -7.578044891357422, "logits_per_char": -1.5156089782714843, "num_chars": 10}, {"sum_logits": -20.04024887084961, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -28.94101333618164, "logits_per_token": -5.010062217712402, "logits_per_char": -1.0020124435424804, "num_chars": 20}, {"sum_logits": -10.937873840332031, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.00929832458496, "logits_per_token": -3.6459579467773438, "logits_per_char": -0.7291915893554688, "num_chars": 15}, {"sum_logits": -14.21910285949707, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.847156524658203, "logits_per_token": -7.109551429748535, "logits_per_char": -1.2926457144997336, "num_chars": 11}, {"sum_logits": -15.32900619506836, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.49237823486328, "logits_per_token": -7.66450309753418, "logits_per_char": -1.916125774383545, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 878, "native_id": "6163a897cd7eac1deddd4c002a1930ae", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.832844257354736, "incorrect_loss_raw": 9.610886335372925, "correct_loss_per_char": 0.5221896171569824, "incorrect_loss_per_char": 0.7952585328708995, "correct_loss_per_token": 2.610948085784912, "incorrect_loss_per_token": 5.656858921051025, "correct_loss_uncond": -14.479285717010498, "incorrect_loss_uncond": -7.072266101837158}, "model_output": [{"sum_logits": -6.811326026916504, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.893023490905762, "logits_per_token": -6.811326026916504, "logits_per_char": -0.7568140029907227, "num_chars": 9}, {"sum_logits": -8.177390098571777, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.578033447265625, "logits_per_token": -4.088695049285889, "logits_per_char": -0.4542994499206543, "num_chars": 18}, {"sum_logits": -11.607061386108398, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.175729751586914, "logits_per_token": -5.803530693054199, "logits_per_char": -0.8928508758544922, "num_chars": 13}, {"sum_logits": -11.84776782989502, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.08582305908203, "logits_per_token": -5.92388391494751, "logits_per_char": -1.077069802717729, "num_chars": 11}, {"sum_logits": -7.832844257354736, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.312129974365234, "logits_per_token": -2.610948085784912, "logits_per_char": -0.5221896171569824, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 879, "native_id": "55478486079423907508a06be13ca536", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.3969945907592773, "incorrect_loss_raw": 8.978670716285706, "correct_loss_per_char": 0.4852849415370396, "incorrect_loss_per_char": 1.4001707951227824, "correct_loss_per_token": 3.3969945907592773, "incorrect_loss_per_token": 7.283399224281311, "correct_loss_uncond": -9.211542129516602, "incorrect_loss_uncond": -6.978099226951599}, "model_output": [{"sum_logits": -8.139419555664062, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.616278648376465, "logits_per_token": -8.139419555664062, "logits_per_char": -1.6278839111328125, "num_chars": 5}, {"sum_logits": -13.562171936035156, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.71833038330078, "logits_per_token": -6.781085968017578, "logits_per_char": -1.1301809946695964, "num_chars": 12}, {"sum_logits": -3.3969945907592773, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.608536720275879, "logits_per_token": -3.3969945907592773, "logits_per_char": -0.4852849415370396, "num_chars": 7}, {"sum_logits": -9.021903038024902, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.664348602294922, "logits_per_token": -9.021903038024902, "logits_per_char": -1.8043806076049804, "num_chars": 5}, {"sum_logits": -5.191188335418701, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -5.191188335418701, "logits_per_char": -1.0382376670837403, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 880, "native_id": "4fa0d61ec82eb1e238d8938d5f43f392", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.807600021362305, "incorrect_loss_raw": 18.765909671783447, "correct_loss_per_char": 0.9082769247201773, "incorrect_loss_per_char": 1.957330591647656, "correct_loss_per_token": 3.935866673787435, "incorrect_loss_per_token": 9.865829785664877, "correct_loss_uncond": -9.177494049072266, "incorrect_loss_uncond": 0.23545336723327637}, "model_output": [{"sum_logits": -16.57656478881836, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.588706970214844, "logits_per_token": -8.28828239440918, "logits_per_char": -1.657656478881836, "num_chars": 10}, {"sum_logits": -25.66119956970215, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -24.270618438720703, "logits_per_token": -8.553733189900717, "logits_per_char": -1.8329428264072962, "num_chars": 14}, {"sum_logits": -20.409141540527344, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.666357040405273, "logits_per_token": -10.204570770263672, "logits_per_char": -1.855376503684304, "num_chars": 11}, {"sum_logits": -11.807600021362305, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.98509407043457, "logits_per_token": -3.935866673787435, "logits_per_char": -0.9082769247201773, "num_chars": 13}, {"sum_logits": -12.416732788085938, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.596142768859863, "logits_per_token": -12.416732788085938, "logits_per_char": -2.4833465576171876, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 881, "native_id": "b4f79ca5f3595248ee25292ab60ad105", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.413862705230713, "incorrect_loss_raw": 10.3360595703125, "correct_loss_per_char": 0.45115522543589276, "incorrect_loss_per_char": 0.9791834396545332, "correct_loss_per_token": 2.7069313526153564, "incorrect_loss_per_token": 4.63498870531718, "correct_loss_uncond": -14.30396032333374, "incorrect_loss_uncond": -9.411675930023193}, "model_output": [{"sum_logits": -5.413862705230713, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -19.717823028564453, "logits_per_token": -2.7069313526153564, "logits_per_char": -0.45115522543589276, "num_chars": 12}, {"sum_logits": -12.792985916137695, "num_tokens": 3, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -24.832361221313477, "logits_per_token": -4.264328638712565, "logits_per_char": -1.1629987196488814, "num_chars": 11}, {"sum_logits": -8.274455070495605, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -18.70823097229004, "logits_per_token": -4.137227535247803, "logits_per_char": -0.8274455070495605, "num_chars": 10}, {"sum_logits": -11.76076602935791, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.65550422668457, "logits_per_token": -5.880383014678955, "logits_per_char": -0.9800638357798258, "num_chars": 12}, {"sum_logits": -8.516031265258789, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.794845581054688, "logits_per_token": -4.2580156326293945, "logits_per_char": -0.9462256961398654, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 882, "native_id": "c39131d979c9205c11d0e109e18188e4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 14.143314361572266, "incorrect_loss_raw": 12.698653936386108, "correct_loss_per_char": 0.8319596683277803, "incorrect_loss_per_char": 1.7328931797118414, "correct_loss_per_token": 4.714438120524089, "incorrect_loss_per_token": 11.206003904342651, "correct_loss_uncond": -5.50999641418457, "incorrect_loss_uncond": -2.3976027965545654}, "model_output": [{"sum_logits": -11.471339225769043, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.664348602294922, "logits_per_token": -11.471339225769043, "logits_per_char": -2.2942678451538088, "num_chars": 5}, {"sum_logits": -11.941200256347656, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.564393997192383, "logits_per_token": -5.970600128173828, "logits_per_char": -1.492650032043457, "num_chars": 8}, {"sum_logits": -14.495294570922852, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.998748779296875, "logits_per_token": -14.495294570922852, "logits_per_char": -2.070756367274693, "num_chars": 7}, {"sum_logits": -12.886781692504883, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -12.886781692504883, "logits_per_char": -1.073898474375407, "num_chars": 12}, {"sum_logits": -14.143314361572266, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.653310775756836, "logits_per_token": -4.714438120524089, "logits_per_char": -0.8319596683277803, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 883, "native_id": "bd773d64f4e22db2358c6e00cbdf2d83", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.387949466705322, "incorrect_loss_raw": 7.486025929450989, "correct_loss_per_char": 0.9125642095293317, "incorrect_loss_per_char": 1.1880987366040547, "correct_loss_per_token": 6.387949466705322, "incorrect_loss_per_token": 7.486025929450989, "correct_loss_uncond": -7.6835551261901855, "incorrect_loss_uncond": -6.182872176170349}, "model_output": [{"sum_logits": -6.387949466705322, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.071504592895508, "logits_per_token": -6.387949466705322, "logits_per_char": -0.9125642095293317, "num_chars": 7}, {"sum_logits": -6.494563579559326, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.904221534729004, "logits_per_token": -6.494563579559326, "logits_per_char": -1.2989127159118652, "num_chars": 5}, {"sum_logits": -8.785588264465332, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.075064659118652, "logits_per_token": -8.785588264465332, "logits_per_char": -1.2550840377807617, "num_chars": 7}, {"sum_logits": -8.76970100402832, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.672751426696777, "logits_per_token": -8.76970100402832, "logits_per_char": -1.46161683400472, "num_chars": 6}, {"sum_logits": -5.894250869750977, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.023554801940918, "logits_per_token": -5.894250869750977, "logits_per_char": -0.7367813587188721, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 884, "native_id": "2b416120e2fbd84b44b5dcd4eb42ed5c", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.3150100708007812, "incorrect_loss_raw": 7.835735201835632, "correct_loss_per_char": 0.13617706298828125, "incorrect_loss_per_char": 0.998591383298238, "correct_loss_per_token": 1.1575050354003906, "incorrect_loss_per_token": 7.225927531719208, "correct_loss_uncond": -17.215787887573242, "incorrect_loss_uncond": -7.091337561607361}, "model_output": [{"sum_logits": -9.809370040893555, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.84918212890625, "logits_per_token": -9.809370040893555, "logits_per_char": -1.2261712551116943, "num_chars": 8}, {"sum_logits": -7.736047744750977, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -11.941009521484375, "logits_per_token": -7.736047744750977, "logits_per_char": -1.2893412907918294, "num_chars": 6}, {"sum_logits": -4.8784613609313965, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.24420738220215, "logits_per_token": -2.4392306804656982, "logits_per_char": -0.48784613609313965, "num_chars": 10}, {"sum_logits": -2.3150100708007812, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": true, "sum_logits_uncond": -19.530797958374023, "logits_per_token": -1.1575050354003906, "logits_per_char": -0.13617706298828125, "num_chars": 17}, {"sum_logits": -8.919061660766602, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.6738920211792, "logits_per_token": -8.919061660766602, "logits_per_char": -0.9910068511962891, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 885, "native_id": "cef855ec07c66a731741026c2839b0d3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.816379070281982, "incorrect_loss_raw": 11.271852731704712, "correct_loss_per_char": 0.7816379070281982, "incorrect_loss_per_char": 1.2302607881571639, "correct_loss_per_token": 3.908189535140991, "incorrect_loss_per_token": 6.914516568183899, "correct_loss_uncond": -9.804522037506104, "incorrect_loss_uncond": -5.119593143463135}, "model_output": [{"sum_logits": -12.084039688110352, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -17.070417404174805, "logits_per_token": -6.042019844055176, "logits_per_char": -1.3426710764567058, "num_chars": 9}, {"sum_logits": -10.228721618652344, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -15.50186824798584, "logits_per_token": -10.228721618652344, "logits_per_char": -1.4612459455217635, "num_chars": 7}, {"sum_logits": -7.816379070281982, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -17.620901107788086, "logits_per_token": -3.908189535140991, "logits_per_char": -0.7816379070281982, "num_chars": 10}, {"sum_logits": -12.09167194366455, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -17.656919479370117, "logits_per_token": -6.045835971832275, "logits_per_char": -0.9301286110511193, "num_chars": 13}, {"sum_logits": -10.682977676391602, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -15.336578369140625, "logits_per_token": -5.341488838195801, "logits_per_char": -1.1869975195990667, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 886, "native_id": "0bbb82c1dc4bfd3b0e0c409a0afd248b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 10.661396026611328, "incorrect_loss_raw": 13.343296885490417, "correct_loss_per_char": 0.9692178206010298, "incorrect_loss_per_char": 1.703435516116595, "correct_loss_per_token": 10.661396026611328, "incorrect_loss_per_token": 9.112500429153442, "correct_loss_uncond": -3.6701154708862305, "incorrect_loss_uncond": -2.550904631614685}, "model_output": [{"sum_logits": -10.661396026611328, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.331511497497559, "logits_per_token": -10.661396026611328, "logits_per_char": -0.9692178206010298, "num_chars": 11}, {"sum_logits": -21.38678741455078, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.803192138671875, "logits_per_token": -10.69339370727539, "logits_per_char": -1.9442534013227983, "num_chars": 11}, {"sum_logits": -6.918632984161377, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.346129417419434, "logits_per_token": -6.918632984161377, "logits_per_char": -1.3837265968322754, "num_chars": 5}, {"sum_logits": -12.45958423614502, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.53363037109375, "logits_per_token": -6.22979211807251, "logits_per_char": -1.3843982484605577, "num_chars": 9}, {"sum_logits": -12.608182907104492, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.893854141235352, "logits_per_token": -12.608182907104492, "logits_per_char": -2.1013638178507485, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 887, "native_id": "67beae081a9b5ef56988f205f80cf129", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.0091500282287598, "incorrect_loss_raw": 10.21901798248291, "correct_loss_per_char": 0.33435000313652885, "incorrect_loss_per_char": 1.0547104597091674, "correct_loss_per_token": 3.0091500282287598, "incorrect_loss_per_token": 8.836919665336609, "correct_loss_uncond": -9.957285404205322, "incorrect_loss_uncond": -2.8428657054901123}, "model_output": [{"sum_logits": -9.008604049682617, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.533917427062988, "logits_per_token": -9.008604049682617, "logits_per_char": -0.8189640045166016, "num_chars": 11}, {"sum_logits": -11.05678653717041, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.056007385253906, "logits_per_token": -5.528393268585205, "logits_per_char": -1.105678653717041, "num_chars": 10}, {"sum_logits": -3.0091500282287598, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.966435432434082, "logits_per_token": -3.0091500282287598, "logits_per_char": -0.33435000313652885, "num_chars": 9}, {"sum_logits": -12.285439491271973, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.104439735412598, "logits_per_token": -12.285439491271973, "logits_per_char": -1.2285439491271972, "num_chars": 10}, {"sum_logits": -8.52524185180664, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -8.553170204162598, "logits_per_token": -8.52524185180664, "logits_per_char": -1.06565523147583, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 888, "native_id": "3b4dcfcab4726496bdbe9535cc669082", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.103285789489746, "incorrect_loss_raw": 8.657514929771423, "correct_loss_per_char": 0.3419404824574788, "incorrect_loss_per_char": 0.8429526700214907, "correct_loss_per_token": 1.3677619298299153, "incorrect_loss_per_token": 5.908701658248901, "correct_loss_uncond": -10.186491966247559, "incorrect_loss_uncond": -6.896592497825623}, "model_output": [{"sum_logits": -4.861626625061035, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.774347305297852, "logits_per_token": -4.861626625061035, "logits_per_char": -0.4861626625061035, "num_chars": 10}, {"sum_logits": -11.316329956054688, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.363466262817383, "logits_per_token": -5.658164978027344, "logits_per_char": -0.9430274963378906, "num_chars": 12}, {"sum_logits": -10.674176216125488, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.62109375, "logits_per_token": -5.337088108062744, "logits_per_char": -0.9703796560114081, "num_chars": 11}, {"sum_logits": -4.103285789489746, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.289777755737305, "logits_per_token": -1.3677619298299153, "logits_per_char": -0.3419404824574788, "num_chars": 12}, {"sum_logits": -7.777926921844482, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.45752239227295, "logits_per_token": -7.777926921844482, "logits_per_char": -0.9722408652305603, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 889, "native_id": "eebddf5f35d85e9fe2ecbd9b56c1db60", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.530541896820068, "incorrect_loss_raw": 9.470615863800049, "correct_loss_per_char": 0.5027765360745516, "incorrect_loss_per_char": 1.0347952511575487, "correct_loss_per_token": 2.765270948410034, "incorrect_loss_per_token": 7.091201186180115, "correct_loss_uncond": -15.3207688331604, "incorrect_loss_uncond": -6.199982643127441}, "model_output": [{"sum_logits": -5.530541896820068, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -20.85131072998047, "logits_per_token": -2.765270948410034, "logits_per_char": -0.5027765360745516, "num_chars": 11}, {"sum_logits": -10.589156150817871, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.463623046875, "logits_per_token": -10.589156150817871, "logits_per_char": -1.17657290564643, "num_chars": 9}, {"sum_logits": -8.257989883422852, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.77853012084961, "logits_per_token": -8.257989883422852, "logits_per_char": -1.3763316472371419, "num_chars": 6}, {"sum_logits": -8.433989524841309, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.658267974853516, "logits_per_token": -4.216994762420654, "logits_per_char": -0.7028324604034424, "num_chars": 12}, {"sum_logits": -10.601327896118164, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.781972885131836, "logits_per_token": -5.300663948059082, "logits_per_char": -0.8834439913431803, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 890, "native_id": "5393ba1ce298bd1ac4744c07d7373a9c", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.423478126525879, "incorrect_loss_raw": 7.132311105728149, "correct_loss_per_char": 0.5529347658157349, "incorrect_loss_per_char": 0.7980525493621826, "correct_loss_per_token": 4.423478126525879, "incorrect_loss_per_token": 7.132311105728149, "correct_loss_uncond": -8.542835235595703, "incorrect_loss_uncond": -7.180345058441162}, "model_output": [{"sum_logits": -5.761934280395508, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.809243202209473, "logits_per_token": -5.761934280395508, "logits_per_char": -0.8231334686279297, "num_chars": 7}, {"sum_logits": -8.961779594421387, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.13828182220459, "logits_per_token": -8.961779594421387, "logits_per_char": -0.8147072358564897, "num_chars": 11}, {"sum_logits": -5.761934280395508, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.809243202209473, "logits_per_token": -5.761934280395508, "logits_per_char": -0.8231334686279297, "num_chars": 7}, {"sum_logits": -4.423478126525879, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.966313362121582, "logits_per_token": -4.423478126525879, "logits_per_char": -0.5529347658157349, "num_chars": 8}, {"sum_logits": -8.043596267700195, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.49385643005371, "logits_per_token": -8.043596267700195, "logits_per_char": -0.7312360243363814, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 891, "native_id": "fde48d43e27cefed6ed9c52514e0bb6d", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.789226531982422, "incorrect_loss_raw": 7.977429270744324, "correct_loss_per_char": 1.1789226531982422, "incorrect_loss_per_char": 0.8642987847328186, "correct_loss_per_token": 3.929742177327474, "incorrect_loss_per_token": 4.1953364610672, "correct_loss_uncond": -6.137325286865234, "incorrect_loss_uncond": -8.522809863090515}, "model_output": [{"sum_logits": -11.789226531982422, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.926551818847656, "logits_per_token": -3.929742177327474, "logits_per_char": -1.1789226531982422, "num_chars": 10}, {"sum_logits": -11.399578094482422, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.858604431152344, "logits_per_token": -5.699789047241211, "logits_per_char": -1.4249472618103027, "num_chars": 8}, {"sum_logits": -7.210846424102783, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.631242752075195, "logits_per_token": -2.4036154747009277, "logits_per_char": -0.6009038686752319, "num_chars": 12}, {"sum_logits": -9.24270248413086, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.588943481445312, "logits_per_token": -4.62135124206543, "logits_per_char": -0.9242702484130859, "num_chars": 10}, {"sum_logits": -4.0565900802612305, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.922165870666504, "logits_per_token": -4.0565900802612305, "logits_per_char": -0.5070737600326538, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 892, "native_id": "da83d85e28778c082d9a63f5b890b26d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.779335975646973, "incorrect_loss_raw": 11.558974146842957, "correct_loss_per_char": 0.5186223983764648, "incorrect_loss_per_char": 1.0610923743437206, "correct_loss_per_token": 3.8896679878234863, "incorrect_loss_per_token": 8.471446633338928, "correct_loss_uncond": -12.102028846740723, "incorrect_loss_uncond": -4.932847380638123}, "model_output": [{"sum_logits": -7.779335975646973, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.881364822387695, "logits_per_token": -3.8896679878234863, "logits_per_char": -0.5186223983764648, "num_chars": 15}, {"sum_logits": -6.2023162841796875, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.34246826171875, "logits_per_token": -6.2023162841796875, "logits_per_char": -0.6891462537977431, "num_chars": 9}, {"sum_logits": -3.6886143684387207, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.665094375610352, "logits_per_token": -3.6886143684387207, "logits_per_char": -0.3353285789489746, "num_chars": 11}, {"sum_logits": -11.644745826721191, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.63976001739502, "logits_per_token": -11.644745826721191, "logits_per_char": -1.455593228340149, "num_chars": 8}, {"sum_logits": -24.700220108032227, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -23.319963455200195, "logits_per_token": -12.350110054016113, "logits_per_char": -1.7643014362880163, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 893, "native_id": "cfa980561efe82e7ae7080d4f081b463", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.4129109382629395, "incorrect_loss_raw": 12.612175226211548, "correct_loss_per_char": 0.2437793527330671, "incorrect_loss_per_char": 1.1967527305378634, "correct_loss_per_token": 1.7064554691314697, "incorrect_loss_per_token": 9.224487900733948, "correct_loss_uncond": -15.366981029510498, "incorrect_loss_uncond": -5.044240713119507}, "model_output": [{"sum_logits": -12.341145515441895, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.946389198303223, "logits_per_token": -12.341145515441895, "logits_per_char": -1.7630207879202706, "num_chars": 7}, {"sum_logits": -11.006056785583496, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.297995567321777, "logits_per_token": -11.006056785583496, "logits_per_char": -1.572293826511928, "num_chars": 7}, {"sum_logits": -12.718972206115723, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.542770385742188, "logits_per_token": -6.359486103057861, "logits_per_char": -0.6056653431483677, "num_chars": 21}, {"sum_logits": -3.4129109382629395, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.779891967773438, "logits_per_token": -1.7064554691314697, "logits_per_char": -0.2437793527330671, "num_chars": 14}, {"sum_logits": -14.382526397705078, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.83850860595703, "logits_per_token": -7.191263198852539, "logits_per_char": -0.846030964570887, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 894, "native_id": "384b89e789e0f4b4796120394fb6303b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.700875759124756, "incorrect_loss_raw": 14.625334739685059, "correct_loss_per_char": 0.45299269171322093, "incorrect_loss_per_char": 1.3832013233035219, "correct_loss_per_token": 3.850437879562378, "incorrect_loss_per_token": 9.394702593485514, "correct_loss_uncond": -8.887903690338135, "incorrect_loss_uncond": -3.2592854499816895}, "model_output": [{"sum_logits": -13.124349594116211, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.027772903442383, "logits_per_token": -13.124349594116211, "logits_per_char": -2.1873915990193686, "num_chars": 6}, {"sum_logits": -13.993196487426758, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.07509994506836, "logits_per_token": -13.993196487426758, "logits_per_char": -1.5547996097140842, "num_chars": 9}, {"sum_logits": -16.980361938476562, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -23.193115234375, "logits_per_token": -5.6601206461588545, "logits_per_char": -0.9433534410264757, "num_chars": 18}, {"sum_logits": -7.700875759124756, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.58877944946289, "logits_per_token": -3.850437879562378, "logits_per_char": -0.45299269171322093, "num_chars": 17}, {"sum_logits": -14.403430938720703, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.24249267578125, "logits_per_token": -4.801143646240234, "logits_per_char": -0.847260643454159, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 895, "native_id": "0d66d33a17e41eaa3278ca7b3930c5ea", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.0915472507476807, "incorrect_loss_raw": 9.838698863983154, "correct_loss_per_char": 0.2987924643925258, "incorrect_loss_per_char": 1.3781357895244253, "correct_loss_per_token": 2.0915472507476807, "incorrect_loss_per_token": 7.346214175224304, "correct_loss_uncond": -11.813264608383179, "incorrect_loss_uncond": -4.431272268295288}, "model_output": [{"sum_logits": -7.584589958190918, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.39013957977295, "logits_per_token": -7.584589958190918, "logits_per_char": -1.8961474895477295, "num_chars": 4}, {"sum_logits": -9.53006362915039, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.906187057495117, "logits_per_token": -4.765031814575195, "logits_per_char": -1.1912579536437988, "num_chars": 8}, {"sum_logits": -11.830327987670898, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.325604438781738, "logits_per_token": -11.830327987670898, "logits_per_char": -1.4787909984588623, "num_chars": 8}, {"sum_logits": -10.40981388092041, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.457953453063965, "logits_per_token": -5.204906940460205, "logits_per_char": -0.94634671644731, "num_chars": 11}, {"sum_logits": -2.0915472507476807, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -13.90481185913086, "logits_per_token": -2.0915472507476807, "logits_per_char": -0.2987924643925258, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 896, "native_id": "732183ead4206e51ed4df18b9c9f14fe", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.63481068611145, "incorrect_loss_raw": 9.715085625648499, "correct_loss_per_char": 0.2195675571759542, "incorrect_loss_per_char": 1.8571660612310683, "correct_loss_per_token": 1.317405343055725, "incorrect_loss_per_token": 9.715085625648499, "correct_loss_uncond": -18.64776301383972, "incorrect_loss_uncond": -3.6342674493789673}, "model_output": [{"sum_logits": -6.165123462677002, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.174287796020508, "logits_per_token": -6.165123462677002, "logits_per_char": -1.5412808656692505, "num_chars": 4}, {"sum_logits": -2.63481068611145, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.282573699951172, "logits_per_token": -1.317405343055725, "logits_per_char": -0.2195675571759542, "num_chars": 12}, {"sum_logits": -11.404057502746582, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.026251792907715, "logits_per_token": -11.404057502746582, "logits_per_char": -1.6291510718209403, "num_chars": 7}, {"sum_logits": -12.329049110412598, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.000849723815918, "logits_per_token": -12.329049110412598, "logits_per_char": -2.4658098220825195, "num_chars": 5}, {"sum_logits": -8.962112426757812, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.196022987365723, "logits_per_token": -8.962112426757812, "logits_per_char": -1.7924224853515625, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 897, "native_id": "2632ff6c9b781d3aa74e8dd36b990871", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.209446907043457, "incorrect_loss_raw": 10.343802690505981, "correct_loss_per_char": 0.27618086338043213, "incorrect_loss_per_char": 0.981662138303121, "correct_loss_per_token": 1.1047234535217285, "incorrect_loss_per_token": 8.688702821731567, "correct_loss_uncond": -13.84299373626709, "incorrect_loss_uncond": -4.168911695480347}, "model_output": [{"sum_logits": -8.849513053894043, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -13.331343650817871, "logits_per_token": -8.849513053894043, "logits_per_char": -1.1061891317367554, "num_chars": 8}, {"sum_logits": -2.209446907043457, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": true, "sum_logits_uncond": -16.052440643310547, "logits_per_token": -1.1047234535217285, "logits_per_char": -0.27618086338043213, "num_chars": 8}, {"sum_logits": -9.449143409729004, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -11.810260772705078, "logits_per_token": -9.449143409729004, "logits_per_char": -1.1811429262161255, "num_chars": 8}, {"sum_logits": -9.835755348205566, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -13.35517406463623, "logits_per_token": -9.835755348205566, "logits_per_char": -0.756596565246582, "num_chars": 13}, {"sum_logits": -13.240798950195312, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -19.554079055786133, "logits_per_token": -6.620399475097656, "logits_per_char": -0.8827199300130208, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 898, "native_id": "63db79b940f36f0333377f85c19eacb2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.095209121704102, "incorrect_loss_raw": 10.222186803817749, "correct_loss_per_char": 0.5079340934753418, "incorrect_loss_per_char": 0.7253868899175099, "correct_loss_per_token": 6.095209121704102, "incorrect_loss_per_token": 5.1110934019088745, "correct_loss_uncond": -7.673521995544434, "incorrect_loss_uncond": -6.514665603637695}, "model_output": [{"sum_logits": -10.239068031311035, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.16889190673828, "logits_per_token": -5.119534015655518, "logits_per_char": -0.6399417519569397, "num_chars": 16}, {"sum_logits": -6.095209121704102, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.768731117248535, "logits_per_token": -6.095209121704102, "logits_per_char": -0.5079340934753418, "num_chars": 12}, {"sum_logits": -10.352296829223633, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.523252487182617, "logits_per_token": -5.176148414611816, "logits_per_char": -0.7394497735159737, "num_chars": 14}, {"sum_logits": -7.130954742431641, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.076842308044434, "logits_per_token": -3.5654773712158203, "logits_per_char": -0.5093539101736886, "num_chars": 14}, {"sum_logits": -13.166427612304688, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.178422927856445, "logits_per_token": -6.583213806152344, "logits_per_char": -1.0128021240234375, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 899, "native_id": "1520a8fd3116e7b856947c5e308d7ce5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.819389343261719, "incorrect_loss_raw": 11.046228766441345, "correct_loss_per_char": 0.8819389343261719, "incorrect_loss_per_char": 1.2468451639962574, "correct_loss_per_token": 8.819389343261719, "incorrect_loss_per_token": 8.904576659202576, "correct_loss_uncond": -3.9151182174682617, "incorrect_loss_uncond": -4.071617484092712}, "model_output": [{"sum_logits": -17.133216857910156, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.45038604736328, "logits_per_token": -8.566608428955078, "logits_per_char": -1.0708260536193848, "num_chars": 16}, {"sum_logits": -6.9156904220581055, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.609784126281738, "logits_per_token": -6.9156904220581055, "logits_per_char": -0.9879557745797294, "num_chars": 7}, {"sum_logits": -8.819389343261719, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -8.819389343261719, "logits_per_char": -0.8819389343261719, "num_chars": 10}, {"sum_logits": -12.442763328552246, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -12.442763328552246, "logits_per_char": -2.073793888092041, "num_chars": 6}, {"sum_logits": -7.693244457244873, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.339823722839355, "logits_per_token": -7.693244457244873, "logits_per_char": -0.8548049396938748, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 900, "native_id": "bd780fea2d4dd262583446e64c0f314d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.97993278503418, "incorrect_loss_raw": 8.644852876663208, "correct_loss_per_char": 0.2766629325018989, "incorrect_loss_per_char": 1.2636006508554731, "correct_loss_per_token": 2.48996639251709, "incorrect_loss_per_token": 6.923396706581116, "correct_loss_uncond": -15.516914367675781, "incorrect_loss_uncond": -7.462262153625488}, "model_output": [{"sum_logits": -9.70711612701416, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -12.718345642089844, "logits_per_token": -9.70711612701416, "logits_per_char": -1.3867308752877372, "num_chars": 7}, {"sum_logits": -11.100646018981934, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -12.02552318572998, "logits_per_token": -11.100646018981934, "logits_per_char": -2.7751615047454834, "num_chars": 4}, {"sum_logits": -4.97993278503418, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -20.49684715270996, "logits_per_token": -2.48996639251709, "logits_per_char": -0.2766629325018989, "num_chars": 18}, {"sum_logits": -6.1439361572265625, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -19.747159957885742, "logits_per_token": -3.0719680786132812, "logits_per_char": -0.38399600982666016, "num_chars": 16}, {"sum_logits": -7.627713203430176, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -19.93743133544922, "logits_per_token": -3.813856601715088, "logits_per_char": -0.5085142135620118, "num_chars": 15}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 901, "native_id": "99e0b2ddf88ebed98b977043b7c2331b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.038037300109863, "incorrect_loss_raw": 11.086495757102966, "correct_loss_per_char": 1.0042263666788738, "incorrect_loss_per_char": 1.0910811998627403, "correct_loss_per_token": 3.012679100036621, "incorrect_loss_per_token": 6.287291049957275, "correct_loss_uncond": -6.796982765197754, "incorrect_loss_uncond": -7.179206013679504}, "model_output": [{"sum_logits": -5.952345371246338, "num_tokens": 1, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -13.80385971069336, "logits_per_token": -5.952345371246338, "logits_per_char": -0.5952345371246338, "num_chars": 10}, {"sum_logits": -11.710909843444824, "num_tokens": 2, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -16.926790237426758, "logits_per_token": -5.855454921722412, "logits_per_char": -1.1710909843444823, "num_chars": 10}, {"sum_logits": -18.9526424407959, "num_tokens": 2, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -22.52896499633789, "logits_per_token": -9.47632122039795, "logits_per_char": -1.89526424407959, "num_chars": 10}, {"sum_logits": -7.730085372924805, "num_tokens": 2, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -19.803192138671875, "logits_per_token": -3.8650426864624023, "logits_per_char": -0.702735033902255, "num_chars": 11}, {"sum_logits": -9.038037300109863, "num_tokens": 3, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -15.835020065307617, "logits_per_token": -3.012679100036621, "logits_per_char": -1.0042263666788738, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 902, "native_id": "eb0e0c4eaf19c1e9b4df3b4d3a11be3d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.419415473937988, "incorrect_loss_raw": 10.787033319473267, "correct_loss_per_char": 0.9170593534197126, "incorrect_loss_per_char": 1.6162823011004734, "correct_loss_per_token": 6.419415473937988, "incorrect_loss_per_token": 10.787033319473267, "correct_loss_uncond": -7.655649185180664, "incorrect_loss_uncond": -3.5262949466705322}, "model_output": [{"sum_logits": -12.28101634979248, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.022502899169922, "logits_per_token": -12.28101634979248, "logits_per_char": -1.3645573721991644, "num_chars": 9}, {"sum_logits": -6.716334342956543, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -6.716334342956543, "logits_per_char": -1.3432668685913085, "num_chars": 5}, {"sum_logits": -6.419415473937988, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.075064659118652, "logits_per_token": -6.419415473937988, "logits_per_char": -0.9170593534197126, "num_chars": 7}, {"sum_logits": -12.90211296081543, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.932482719421387, "logits_per_token": -12.90211296081543, "logits_per_char": -2.150352160135905, "num_chars": 6}, {"sum_logits": -11.248669624328613, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.470205307006836, "logits_per_token": -11.248669624328613, "logits_per_char": -1.6069528034755163, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 903, "native_id": "467a3b464b08b3ffc9922e2a726554f6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 11.591339111328125, "incorrect_loss_raw": 7.975208759307861, "correct_loss_per_char": 0.6818434771369485, "incorrect_loss_per_char": 0.8512571474353036, "correct_loss_per_token": 5.7956695556640625, "incorrect_loss_per_token": 5.5562180280685425, "correct_loss_uncond": -10.049507141113281, "incorrect_loss_uncond": -7.627866506576538}, "model_output": [{"sum_logits": -5.839117050170898, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.681355476379395, "logits_per_token": -5.839117050170898, "logits_per_char": -0.8341595785958427, "num_chars": 7}, {"sum_logits": -11.591339111328125, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.640846252441406, "logits_per_token": -5.7956695556640625, "logits_per_char": -0.6818434771369485, "num_chars": 17}, {"sum_logits": -10.553196907043457, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.383237838745117, "logits_per_token": -5.2765984535217285, "logits_per_char": -1.0553196907043456, "num_chars": 10}, {"sum_logits": -8.798728942871094, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.25385093688965, "logits_per_token": -4.399364471435547, "logits_per_char": -0.6768253032977765, "num_chars": 13}, {"sum_logits": -6.709792137145996, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.093856811523438, "logits_per_token": -6.709792137145996, "logits_per_char": -0.8387240171432495, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 904, "native_id": "dea70fe40fac9ad03bf319bf8a480efa", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.3817766904830933, "incorrect_loss_raw": 10.733091473579407, "correct_loss_per_char": 0.23029611508051553, "incorrect_loss_per_char": 1.2586165195419676, "correct_loss_per_token": 1.3817766904830933, "incorrect_loss_per_token": 6.939647316932678, "correct_loss_uncond": -12.21815025806427, "incorrect_loss_uncond": -6.303282618522644}, "model_output": [{"sum_logits": -4.251322269439697, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.924269676208496, "logits_per_token": -4.251322269439697, "logits_per_char": -0.7085537115732828, "num_chars": 6}, {"sum_logits": -17.960948944091797, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -23.48508071899414, "logits_per_token": -8.980474472045898, "logits_per_char": -1.2829249245779855, "num_chars": 14}, {"sum_logits": -8.333490371704102, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.359588623046875, "logits_per_token": -8.333490371704102, "logits_per_char": -1.6666980743408204, "num_chars": 5}, {"sum_logits": -1.3817766904830933, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": true, "sum_logits_uncond": -13.599926948547363, "logits_per_token": -1.3817766904830933, "logits_per_char": -0.23029611508051553, "num_chars": 6}, {"sum_logits": -12.386604309082031, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.376557350158691, "logits_per_token": -6.193302154541016, "logits_per_char": -1.3762893676757812, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 905, "native_id": "2f1680da0d388a8453150ff3637e4689", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.926534652709961, "incorrect_loss_raw": 11.886781930923462, "correct_loss_per_char": 0.48775577545166016, "incorrect_loss_per_char": 1.062059229383102, "correct_loss_per_token": 2.926534652709961, "incorrect_loss_per_token": 6.143396735191345, "correct_loss_uncond": -9.87891674041748, "incorrect_loss_uncond": -6.686823129653931}, "model_output": [{"sum_logits": -13.355635643005371, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.958751678466797, "logits_per_token": -6.6778178215026855, "logits_per_char": -1.02735658792349, "num_chars": 13}, {"sum_logits": -7.3180975914001465, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.094645500183105, "logits_per_token": -7.3180975914001465, "logits_per_char": -1.2196829319000244, "num_chars": 6}, {"sum_logits": -5.145763874053955, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.477009773254395, "logits_per_token": -5.145763874053955, "logits_per_char": -0.6432204842567444, "num_chars": 8}, {"sum_logits": -21.727630615234375, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -26.764013290405273, "logits_per_token": -5.431907653808594, "logits_per_char": -1.3579769134521484, "num_chars": 16}, {"sum_logits": -2.926534652709961, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.805451393127441, "logits_per_token": -2.926534652709961, "logits_per_char": -0.48775577545166016, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 906, "native_id": "8369adc4b4710d00f917d80a75d844d7", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.154777526855469, "incorrect_loss_raw": 14.927242517471313, "correct_loss_per_char": 0.6971735954284668, "incorrect_loss_per_char": 2.1396452566082966, "correct_loss_per_token": 5.577388763427734, "incorrect_loss_per_token": 12.286248445510864, "correct_loss_uncond": -9.673690795898438, "incorrect_loss_uncond": -0.2654752731323242}, "model_output": [{"sum_logits": -21.127952575683594, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.859439849853516, "logits_per_token": -10.563976287841797, "logits_per_char": -1.111997503983347, "num_chars": 19}, {"sum_logits": -18.109628677368164, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.137733459472656, "logits_per_token": -18.109628677368164, "logits_per_char": -3.6219257354736327, "num_chars": 5}, {"sum_logits": -11.154777526855469, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.828468322753906, "logits_per_token": -5.577388763427734, "logits_per_char": -0.6971735954284668, "num_chars": 16}, {"sum_logits": -12.342769622802734, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.4013671875, "logits_per_token": -12.342769622802734, "logits_per_char": -3.0856924057006836, "num_chars": 4}, {"sum_logits": -8.128619194030762, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.372330665588379, "logits_per_token": -8.128619194030762, "logits_per_char": -0.7389653812755238, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 907, "native_id": "20a3bb788cf408d9a3e25e610fe60905", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.319660186767578, "incorrect_loss_raw": 10.104941725730896, "correct_loss_per_char": 0.5319660186767579, "incorrect_loss_per_char": 0.9691159171717507, "correct_loss_per_token": 2.659830093383789, "incorrect_loss_per_token": 6.055178225040436, "correct_loss_uncond": -9.981143951416016, "incorrect_loss_uncond": -6.574323773384094}, "model_output": [{"sum_logits": -11.31786823272705, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.580500602722168, "logits_per_token": -11.31786823272705, "logits_per_char": -1.4147335290908813, "num_chars": 8}, {"sum_logits": -13.5177583694458, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.01646614074707, "logits_per_token": -4.505919456481934, "logits_per_char": -0.9655541692461286, "num_chars": 14}, {"sum_logits": -6.001186847686768, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.858729362487793, "logits_per_token": -6.001186847686768, "logits_per_char": -0.8573124068123954, "num_chars": 7}, {"sum_logits": -5.319660186767578, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.300804138183594, "logits_per_token": -2.659830093383789, "logits_per_char": -0.5319660186767579, "num_chars": 10}, {"sum_logits": -9.582953453063965, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.26136589050293, "logits_per_token": -2.395738363265991, "logits_per_char": -0.6388635635375977, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 908, "native_id": "36c1f50eec01c287b8ef6ffe69fe0528", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.142963409423828, "incorrect_loss_raw": 12.995373487472534, "correct_loss_per_char": 0.845246950785319, "incorrect_loss_per_char": 1.3662049164847723, "correct_loss_per_token": 5.071481704711914, "incorrect_loss_per_token": 5.328375697135925, "correct_loss_uncond": -10.119686126708984, "incorrect_loss_uncond": -4.516557455062866}, "model_output": [{"sum_logits": -14.29732894897461, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.881155014038086, "logits_per_token": -4.76577631632487, "logits_per_char": -2.04247556413923, "num_chars": 7}, {"sum_logits": -10.142963409423828, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.262649536132812, "logits_per_token": -5.071481704711914, "logits_per_char": -0.845246950785319, "num_chars": 12}, {"sum_logits": -8.47003173828125, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.843746185302734, "logits_per_token": -4.235015869140625, "logits_per_char": -0.9411146375868056, "num_chars": 9}, {"sum_logits": -15.447997093200684, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -7.723998546600342, "logits_per_char": -1.7164441214667425, "num_chars": 9}, {"sum_logits": -13.766136169433594, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.04419708251953, "logits_per_token": -4.588712056477864, "logits_per_char": -0.7647853427463107, "num_chars": 18}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 909, "native_id": "5f4825137a27f369fe859e85dfe1793f", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.404144287109375, "incorrect_loss_raw": 12.606230020523071, "correct_loss_per_char": 1.0505180358886719, "incorrect_loss_per_char": 0.9925295673883878, "correct_loss_per_token": 4.2020721435546875, "incorrect_loss_per_token": 5.440049350261688, "correct_loss_uncond": -10.400497436523438, "incorrect_loss_uncond": -9.109855890274048}, "model_output": [{"sum_logits": -8.404144287109375, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.804641723632812, "logits_per_token": -4.2020721435546875, "logits_per_char": -1.0505180358886719, "num_chars": 8}, {"sum_logits": -15.767587661743164, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -22.243751525878906, "logits_per_token": -7.883793830871582, "logits_per_char": -1.5767587661743163, "num_chars": 10}, {"sum_logits": -7.09229850769043, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.251426696777344, "logits_per_token": -3.546149253845215, "logits_per_char": -0.6447544097900391, "num_chars": 11}, {"sum_logits": -13.755983352661133, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -21.2037353515625, "logits_per_token": -6.877991676330566, "logits_per_char": -1.0581525655893178, "num_chars": 13}, {"sum_logits": -13.809050559997559, "num_tokens": 4, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -25.165430068969727, "logits_per_token": -3.4522626399993896, "logits_per_char": -0.6904525279998779, "num_chars": 20}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 910, "native_id": "b3dc6d6a5e2f9d7da8eb72816c80b3f8_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.299048662185669, "incorrect_loss_raw": 13.826956748962402, "correct_loss_per_char": 0.47129266602652414, "incorrect_loss_per_char": 1.6351657126849388, "correct_loss_per_token": 3.299048662185669, "incorrect_loss_per_token": 7.524846235911052, "correct_loss_uncond": -9.784117460250854, "incorrect_loss_uncond": -2.042170763015747}, "model_output": [{"sum_logits": -3.299048662185669, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.083166122436523, "logits_per_token": -3.299048662185669, "logits_per_char": -0.47129266602652414, "num_chars": 7}, {"sum_logits": -10.588939666748047, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -10.588939666748047, "logits_per_char": -1.764823277791341, "num_chars": 6}, {"sum_logits": -17.093990325927734, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.034259796142578, "logits_per_token": -5.697996775309245, "logits_per_char": -1.7093990325927735, "num_chars": 10}, {"sum_logits": -14.278635025024414, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -17.58711051940918, "logits_per_token": -7.139317512512207, "logits_per_char": -2.0398050035749162, "num_chars": 7}, {"sum_logits": -13.346261978149414, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -16.783748626708984, "logits_per_token": -6.673130989074707, "logits_per_char": -1.026635536780724, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 911, "native_id": "63bb6128026ce24209583d0eea75fc27", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.89192008972168, "incorrect_loss_raw": 5.771334230899811, "correct_loss_per_char": 0.8153200149536133, "incorrect_loss_per_char": 0.8939130125301225, "correct_loss_per_token": 4.89192008972168, "incorrect_loss_per_token": 5.301024258136749, "correct_loss_uncond": -6.997733116149902, "incorrect_loss_uncond": -7.992615163326263}, "model_output": [{"sum_logits": -3.762479782104492, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.48346710205078, "logits_per_token": -1.881239891052246, "logits_per_char": -0.313539981842041, "num_chars": 12}, {"sum_logits": -3.450083017349243, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -3.450083017349243, "logits_per_char": -0.4312603771686554, "num_chars": 8}, {"sum_logits": -9.857969284057617, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.52527904510498, "logits_per_token": -9.857969284057617, "logits_per_char": -1.9715938568115234, "num_chars": 5}, {"sum_logits": -4.89192008972168, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -4.89192008972168, "logits_per_char": -0.8153200149536133, "num_chars": 6}, {"sum_logits": -6.014804840087891, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.470205307006836, "logits_per_token": -6.014804840087891, "logits_per_char": -0.8592578342982701, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 912, "native_id": "e8a9142d2402f818273dd62cf5a7b559_1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.7997307777404785, "incorrect_loss_raw": 12.614204406738281, "correct_loss_per_char": 0.7999551296234131, "incorrect_loss_per_char": 1.314990060670035, "correct_loss_per_token": 4.7997307777404785, "incorrect_loss_per_token": 8.896774371465048, "correct_loss_uncond": -8.082544803619385, "incorrect_loss_uncond": -3.4355533123016357}, "model_output": [{"sum_logits": -14.227140426635742, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -17.90312957763672, "logits_per_token": -7.113570213317871, "logits_per_char": -1.5807933807373047, "num_chars": 9}, {"sum_logits": -11.088455200195312, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -13.043295860290527, "logits_per_token": -11.088455200195312, "logits_per_char": -1.5840650285993303, "num_chars": 7}, {"sum_logits": -4.7997307777404785, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -12.882275581359863, "logits_per_token": -4.7997307777404785, "logits_per_char": -0.7999551296234131, "num_chars": 6}, {"sum_logits": -13.506997108459473, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -14.973722457885742, "logits_per_token": -13.506997108459473, "logits_per_char": -1.1255830923716228, "num_chars": 12}, {"sum_logits": -11.634224891662598, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -18.27888298034668, "logits_per_token": -3.8780749638875327, "logits_per_char": -0.9695187409718832, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 913, "native_id": "ead9c9744aee08678759158efe005175", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.139472007751465, "incorrect_loss_raw": 7.1354371309280396, "correct_loss_per_char": 0.3671051434108189, "incorrect_loss_per_char": 0.8506140359724411, "correct_loss_per_token": 5.139472007751465, "incorrect_loss_per_token": 6.447750985622406, "correct_loss_uncond": -7.950234413146973, "incorrect_loss_uncond": -4.833234071731567}, "model_output": [{"sum_logits": -5.139472007751465, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.089706420898438, "logits_per_token": -5.139472007751465, "logits_per_char": -0.3671051434108189, "num_chars": 14}, {"sum_logits": -10.657151222229004, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.307923316955566, "logits_per_token": -10.657151222229004, "logits_per_char": -0.9688319292935458, "num_chars": 11}, {"sum_logits": -5.501489162445068, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.12031364440918, "logits_per_token": -2.750744581222534, "logits_per_char": -0.6112765736050076, "num_chars": 9}, {"sum_logits": -5.840368270874023, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -7.939584255218506, "logits_per_token": -5.840368270874023, "logits_per_char": -1.1680736541748047, "num_chars": 5}, {"sum_logits": -6.5427398681640625, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -9.506863594055176, "logits_per_token": -6.5427398681640625, "logits_per_char": -0.6542739868164062, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 914, "native_id": "ab8bf60f76bc6119459271140ccae781", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.608824729919434, "incorrect_loss_raw": 10.166515707969666, "correct_loss_per_char": 0.3072549819946289, "incorrect_loss_per_char": 0.7778659566418156, "correct_loss_per_token": 2.304412364959717, "incorrect_loss_per_token": 4.289529740810394, "correct_loss_uncond": -14.160920143127441, "incorrect_loss_uncond": -7.822198033332825}, "model_output": [{"sum_logits": -5.7882537841796875, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.43128490447998, "logits_per_token": -2.8941268920898438, "logits_per_char": -0.4134466988699777, "num_chars": 14}, {"sum_logits": -23.013629913330078, "num_tokens": 4, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -25.677370071411133, "logits_per_token": -5.7534074783325195, "logits_per_char": -1.150681495666504, "num_chars": 20}, {"sum_logits": -6.707189083099365, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.018077850341797, "logits_per_token": -3.3535945415496826, "logits_per_char": -0.5159376217768743, "num_chars": 13}, {"sum_logits": -4.608824729919434, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.769744873046875, "logits_per_token": -2.304412364959717, "logits_per_char": -0.3072549819946289, "num_chars": 15}, {"sum_logits": -5.156990051269531, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -5.156990051269531, "logits_per_char": -1.0313980102539062, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 915, "native_id": "3c6e2d95a63316b31986e8c7979582c9", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.382794380187988, "incorrect_loss_raw": 11.926303505897522, "correct_loss_per_char": 1.0273424557277135, "incorrect_loss_per_char": 1.1345687983355996, "correct_loss_per_token": 7.191397190093994, "incorrect_loss_per_token": 6.413384556770325, "correct_loss_uncond": -7.158228874206543, "incorrect_loss_uncond": -4.6244200468063354}, "model_output": [{"sum_logits": -9.522016525268555, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.42358684539795, "logits_per_token": -9.522016525268555, "logits_per_char": -1.904403305053711, "num_chars": 5}, {"sum_logits": -4.191042423248291, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.298989295959473, "logits_per_token": -4.191042423248291, "logits_per_char": -0.4191042423248291, "num_chars": 10}, {"sum_logits": -14.382794380187988, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.54102325439453, "logits_per_token": -7.191397190093994, "logits_per_char": -1.0273424557277135, "num_chars": 14}, {"sum_logits": -13.76976203918457, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.27543067932129, "logits_per_token": -6.884881019592285, "logits_per_char": -1.251796549016779, "num_chars": 11}, {"sum_logits": -20.222393035888672, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.20488739013672, "logits_per_token": -5.055598258972168, "logits_per_char": -0.9629710969470796, "num_chars": 21}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 916, "native_id": "5c171b9837af49211891ce40e4a10204", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.6954350471496582, "incorrect_loss_raw": 9.52412223815918, "correct_loss_per_char": 0.24220500673566545, "incorrect_loss_per_char": 1.3132164009979792, "correct_loss_per_token": 1.6954350471496582, "incorrect_loss_per_token": 7.755549271901448, "correct_loss_uncond": -11.34786081314087, "incorrect_loss_uncond": -4.827517747879028}, "model_output": [{"sum_logits": -8.72693920135498, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.075064659118652, "logits_per_token": -8.72693920135498, "logits_per_char": -1.2467056001935686, "num_chars": 7}, {"sum_logits": -8.778510093688965, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.892474174499512, "logits_per_token": -8.778510093688965, "logits_per_char": -1.254072870526995, "num_chars": 7}, {"sum_logits": -9.979601860046387, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.071504592895508, "logits_per_token": -9.979601860046387, "logits_per_char": -1.4256574085780553, "num_chars": 7}, {"sum_logits": -1.6954350471496582, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.043295860290527, "logits_per_token": -1.6954350471496582, "logits_per_char": -0.24220500673566545, "num_chars": 7}, {"sum_logits": -10.611437797546387, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.36751651763916, "logits_per_token": -3.5371459325154624, "logits_per_char": -1.3264297246932983, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 917, "native_id": "56d0fc282a144565f2c852415c6fa92c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.688789367675781, "incorrect_loss_raw": 11.353524923324585, "correct_loss_per_char": 0.6989808516068892, "incorrect_loss_per_char": 0.817359355517796, "correct_loss_per_token": 7.688789367675781, "incorrect_loss_per_token": 8.317723870277405, "correct_loss_uncond": -7.603610038757324, "incorrect_loss_uncond": -6.341306447982788}, "model_output": [{"sum_logits": -11.252853393554688, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.613886833190918, "logits_per_token": -11.252853393554688, "logits_per_char": -0.937737782796224, "num_chars": 12}, {"sum_logits": -9.874837875366211, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.563517570495605, "logits_per_token": -9.874837875366211, "logits_per_char": -0.658322525024414, "num_chars": 15}, {"sum_logits": -7.688789367675781, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.292399406433105, "logits_per_token": -7.688789367675781, "logits_per_char": -0.6989808516068892, "num_chars": 11}, {"sum_logits": -12.886932373046875, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -25.61383819580078, "logits_per_token": -6.4434661865234375, "logits_per_char": -0.8591288248697917, "num_chars": 15}, {"sum_logits": -11.399476051330566, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.988082885742188, "logits_per_token": -5.699738025665283, "logits_per_char": -0.8142482893807548, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 918, "native_id": "5b8a3081c3235d62bc77e2d15f3ad454", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.7727348804473877, "incorrect_loss_raw": 12.163511991500854, "correct_loss_per_char": 0.25324784006391254, "incorrect_loss_per_char": 1.3269759605801297, "correct_loss_per_token": 1.7727348804473877, "incorrect_loss_per_token": 8.617873668670654, "correct_loss_uncond": -12.246941328048706, "incorrect_loss_uncond": -2.0619819164276123}, "model_output": [{"sum_logits": -1.7727348804473877, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": true, "sum_logits_uncond": -14.019676208496094, "logits_per_token": -1.7727348804473877, "logits_per_char": -0.25324784006391254, "num_chars": 7}, {"sum_logits": -13.770734786987305, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.022502899169922, "logits_per_token": -13.770734786987305, "logits_per_char": -1.5300816429985895, "num_chars": 9}, {"sum_logits": -6.518206596374512, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.886388778686523, "logits_per_token": -6.518206596374512, "logits_per_char": -1.0863677660624187, "num_chars": 6}, {"sum_logits": -11.597646713256836, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.714458465576172, "logits_per_token": -5.798823356628418, "logits_per_char": -0.8284033366612026, "num_chars": 14}, {"sum_logits": -16.767459869384766, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -8.383729934692383, "logits_per_char": -1.8630510965983074, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 919, "native_id": "e43c4eaa04243ddee30f29171718eb92", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.098965644836426, "incorrect_loss_raw": 7.837695360183716, "correct_loss_per_char": 1.0089968768033115, "incorrect_loss_per_char": 1.1322500944137575, "correct_loss_per_token": 5.549482822418213, "incorrect_loss_per_token": 7.837695360183716, "correct_loss_uncond": -8.247567176818848, "incorrect_loss_uncond": -6.046913146972656}, "model_output": [{"sum_logits": -11.098965644836426, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -19.346532821655273, "logits_per_token": -5.549482822418213, "logits_per_char": -1.0089968768033115, "num_chars": 11}, {"sum_logits": -7.44536018371582, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -14.455964088439941, "logits_per_token": -7.44536018371582, "logits_per_char": -0.744536018371582, "num_chars": 10}, {"sum_logits": -3.595905303955078, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -14.463623046875, "logits_per_token": -3.595905303955078, "logits_per_char": -0.39954503377278644, "num_chars": 9}, {"sum_logits": -13.849788665771484, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -13.840316772460938, "logits_per_token": -13.849788665771484, "logits_per_char": -2.308298110961914, "num_chars": 6}, {"sum_logits": -6.4597272872924805, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -12.77853012084961, "logits_per_token": -6.4597272872924805, "logits_per_char": -1.0766212145487468, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 920, "native_id": "84a736d4b702a6869d8fa8523aee6f1b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.414694786071777, "incorrect_loss_raw": 16.13904643058777, "correct_loss_per_char": 0.9268368482589722, "incorrect_loss_per_char": 1.6290241777062082, "correct_loss_per_token": 7.414694786071777, "incorrect_loss_per_token": 11.941035668055216, "correct_loss_uncond": -6.783088684082031, "incorrect_loss_uncond": -0.9799022674560547}, "model_output": [{"sum_logits": -7.414694786071777, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.197783470153809, "logits_per_token": -7.414694786071777, "logits_per_char": -0.9268368482589722, "num_chars": 8}, {"sum_logits": -15.58770751953125, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.434986114501953, "logits_per_token": -15.58770751953125, "logits_per_char": -1.9484634399414062, "num_chars": 8}, {"sum_logits": -25.188064575195312, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -27.909528732299805, "logits_per_token": -8.396021525065104, "logits_per_char": -1.4816508573644303, "num_chars": 17}, {"sum_logits": -9.803415298461914, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.893023490905762, "logits_per_token": -9.803415298461914, "logits_per_char": -1.0892683664957683, "num_chars": 9}, {"sum_logits": -13.976998329162598, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.238256454467773, "logits_per_token": -13.976998329162598, "logits_per_char": -1.9967140470232283, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 921, "native_id": "72611791cdcb040f2d699827fb9cebc4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.701501369476318, "incorrect_loss_raw": 12.554567337036133, "correct_loss_per_char": 0.33484788562940515, "incorrect_loss_per_char": 1.0653387533637868, "correct_loss_per_token": 3.850750684738159, "incorrect_loss_per_token": 7.263800263404846, "correct_loss_uncond": -12.309736728668213, "incorrect_loss_uncond": -4.853821277618408}, "model_output": [{"sum_logits": -16.33156967163086, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.490997314453125, "logits_per_token": -8.16578483581543, "logits_per_char": -1.4846881519664417, "num_chars": 11}, {"sum_logits": -10.978699684143066, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.55258560180664, "logits_per_token": -5.489349842071533, "logits_per_char": -0.9148916403452555, "num_chars": 12}, {"sum_logits": -7.701501369476318, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.01123809814453, "logits_per_token": -3.850750684738159, "logits_per_char": -0.33484788562940515, "num_chars": 23}, {"sum_logits": -15.015867233276367, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.131317138671875, "logits_per_token": -7.507933616638184, "logits_per_char": -1.0725619452340263, "num_chars": 14}, {"sum_logits": -7.892132759094238, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.458654403686523, "logits_per_token": -7.892132759094238, "logits_per_char": -0.7892132759094238, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 922, "native_id": "4477fb61fde4bb8695c241dfc366b554", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.6771946549415588, "incorrect_loss_raw": 10.904007196426392, "correct_loss_per_char": 0.08464933186769485, "incorrect_loss_per_char": 1.2617131131035941, "correct_loss_per_token": 0.3385973274707794, "incorrect_loss_per_token": 8.59702455997467, "correct_loss_uncond": -14.897706925868988, "incorrect_loss_uncond": -4.513451337814331}, "model_output": [{"sum_logits": -8.676100730895996, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.98350715637207, "logits_per_token": -4.338050365447998, "logits_per_char": -0.723008394241333, "num_chars": 12}, {"sum_logits": -9.779760360717773, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.974644660949707, "logits_per_token": -4.889880180358887, "logits_per_char": -1.3971086229596819, "num_chars": 7}, {"sum_logits": -11.21524715423584, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.973722457885742, "logits_per_token": -11.21524715423584, "logits_per_char": -0.9346039295196533, "num_chars": 12}, {"sum_logits": -0.6771946549415588, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": true, "sum_logits_uncond": -15.574901580810547, "logits_per_token": -0.3385973274707794, "logits_per_char": -0.08464933186769485, "num_chars": 8}, {"sum_logits": -13.944920539855957, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.737959861755371, "logits_per_token": -13.944920539855957, "logits_per_char": -1.9921315056937081, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 923, "native_id": "ce246bc94a54431b9c0530e71d2456b5", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.6617889404296875, "incorrect_loss_raw": 11.518500089645386, "correct_loss_per_char": 0.5551490783691406, "incorrect_loss_per_char": 1.129466085963779, "correct_loss_per_token": 3.3308944702148438, "incorrect_loss_per_token": 5.759250044822693, "correct_loss_uncond": -14.103801727294922, "incorrect_loss_uncond": -6.375573396682739}, "model_output": [{"sum_logits": -9.466190338134766, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.906187057495117, "logits_per_token": -4.733095169067383, "logits_per_char": -1.1832737922668457, "num_chars": 8}, {"sum_logits": -9.756692886352539, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.7001895904541, "logits_per_token": -4.8783464431762695, "logits_per_char": -0.9756692886352539, "num_chars": 10}, {"sum_logits": -15.609465599060059, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -7.804732799530029, "logits_per_char": -1.7343850665622287, "num_chars": 9}, {"sum_logits": -11.24165153503418, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.69129180908203, "logits_per_token": -5.62082576751709, "logits_per_char": -0.6245361963907877, "num_chars": 18}, {"sum_logits": -6.6617889404296875, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.76559066772461, "logits_per_token": -3.3308944702148438, "logits_per_char": -0.5551490783691406, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 924, "native_id": "2eef2d255fe629414f4d24ade8590102", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.031279563903809, "incorrect_loss_raw": 14.832449436187744, "correct_loss_per_char": 0.781253284878201, "incorrect_loss_per_char": 1.4403228355090512, "correct_loss_per_token": 7.031279563903809, "incorrect_loss_per_token": 8.512992024421692, "correct_loss_uncond": -6.991223335266113, "incorrect_loss_uncond": -3.23635196685791}, "model_output": [{"sum_logits": -21.94451904296875, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -23.706554412841797, "logits_per_token": -10.972259521484375, "logits_per_char": -1.9949562766335227, "num_chars": 11}, {"sum_logits": -8.774138450622559, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.434986114501953, "logits_per_token": -8.774138450622559, "logits_per_char": -1.0967673063278198, "num_chars": 8}, {"sum_logits": -7.031279563903809, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.022502899169922, "logits_per_token": -7.031279563903809, "logits_per_char": -0.781253284878201, "num_chars": 9}, {"sum_logits": -12.838085174560547, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.855039596557617, "logits_per_token": -6.419042587280273, "logits_per_char": -0.917006083897182, "num_chars": 14}, {"sum_logits": -15.773055076599121, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -7.8865275382995605, "logits_per_char": -1.75256167517768, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 925, "native_id": "2f85d53721ccc8b3fa4cfc184186d124", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.09400463104248, "incorrect_loss_raw": 12.176709175109863, "correct_loss_per_char": 0.8267276937311346, "incorrect_loss_per_char": 1.2711765095299365, "correct_loss_per_token": 9.09400463104248, "incorrect_loss_per_token": 10.151026248931885, "correct_loss_uncond": -6.48824405670166, "incorrect_loss_uncond": -1.6627299785614014}, "model_output": [{"sum_logits": -9.09400463104248, "num_tokens": 1, "num_tokens_all": 163, "is_greedy": false, "sum_logits_uncond": -15.58224868774414, "logits_per_token": -9.09400463104248, "logits_per_char": -0.8267276937311346, "num_chars": 11}, {"sum_logits": -16.205463409423828, "num_tokens": 2, "num_tokens_all": 164, "is_greedy": false, "sum_logits_uncond": -17.082138061523438, "logits_per_token": -8.102731704711914, "logits_per_char": -0.9532625534955192, "num_chars": 17}, {"sum_logits": -10.984769821166992, "num_tokens": 1, "num_tokens_all": 163, "is_greedy": false, "sum_logits_uncond": -12.327664375305176, "logits_per_token": -10.984769821166992, "logits_per_char": -1.373096227645874, "num_chars": 8}, {"sum_logits": -12.416299819946289, "num_tokens": 1, "num_tokens_all": 163, "is_greedy": false, "sum_logits_uncond": -13.598484992980957, "logits_per_token": -12.416299819946289, "logits_per_char": -1.2416299819946288, "num_chars": 10}, {"sum_logits": -9.100303649902344, "num_tokens": 1, "num_tokens_all": 163, "is_greedy": false, "sum_logits_uncond": -12.349469184875488, "logits_per_token": -9.100303649902344, "logits_per_char": -1.5167172749837239, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 926, "native_id": "2192c5c2145a6e03755ad89a02e64055", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.917003631591797, "incorrect_loss_raw": 11.070403099060059, "correct_loss_per_char": 0.5917003631591797, "incorrect_loss_per_char": 1.2672809419177828, "correct_loss_per_token": 2.9585018157958984, "incorrect_loss_per_token": 6.308804273605347, "correct_loss_uncond": -12.829856872558594, "incorrect_loss_uncond": -6.73369288444519}, "model_output": [{"sum_logits": -14.792753219604492, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -21.804302215576172, "logits_per_token": -4.930917739868164, "logits_per_char": -1.6436392466227214, "num_chars": 9}, {"sum_logits": -5.917003631591797, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.74686050415039, "logits_per_token": -2.9585018157958984, "logits_per_char": -0.5917003631591797, "num_chars": 10}, {"sum_logits": -11.119739532470703, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.043295860290527, "logits_per_token": -11.119739532470703, "logits_per_char": -1.588534218924386, "num_chars": 7}, {"sum_logits": -8.268001556396484, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.565593719482422, "logits_per_token": -4.134000778198242, "logits_per_char": -0.9186668395996094, "num_chars": 9}, {"sum_logits": -10.101118087768555, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.803192138671875, "logits_per_token": -5.050559043884277, "logits_per_char": -0.9182834625244141, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 927, "native_id": "bea07406aaadeef50110883b6932d86a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.122395992279053, "incorrect_loss_raw": 6.130728125572205, "correct_loss_per_char": 0.8537326653798422, "incorrect_loss_per_char": 0.6169659104797389, "correct_loss_per_token": 5.122395992279053, "incorrect_loss_per_token": 5.073786735534668, "correct_loss_uncond": -6.763992786407471, "incorrect_loss_uncond": -7.771396040916443}, "model_output": [{"sum_logits": -5.122395992279053, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.886388778686523, "logits_per_token": -5.122395992279053, "logits_per_char": -0.8537326653798422, "num_chars": 6}, {"sum_logits": -3.877573251724243, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.684529304504395, "logits_per_token": -3.877573251724243, "logits_per_char": -0.35250665924765845, "num_chars": 11}, {"sum_logits": -9.838027954101562, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.072720527648926, "logits_per_token": -9.838027954101562, "logits_per_char": -1.2297534942626953, "num_chars": 8}, {"sum_logits": -2.3517801761627197, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.311802864074707, "logits_per_token": -2.3517801761627197, "logits_per_char": -0.23517801761627197, "num_chars": 10}, {"sum_logits": -8.455531120300293, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.539443969726562, "logits_per_token": -4.2277655601501465, "logits_per_char": -0.6504254707923303, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 928, "native_id": "7a58e7e7bf76658751e850f790922aba", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.6355983018875122, "incorrect_loss_raw": 9.588921427726746, "correct_loss_per_char": 0.181733144654168, "incorrect_loss_per_char": 1.2706552970977056, "correct_loss_per_token": 1.6355983018875122, "incorrect_loss_per_token": 8.51619553565979, "correct_loss_uncond": -13.350881457328796, "incorrect_loss_uncond": -5.311030745506287}, "model_output": [{"sum_logits": -9.8148193359375, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.718345642089844, "logits_per_token": -9.8148193359375, "logits_per_char": -1.4021170479910714, "num_chars": 7}, {"sum_logits": -8.581807136535645, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.694374084472656, "logits_per_token": -4.290903568267822, "logits_per_char": -1.2259724480765206, "num_chars": 7}, {"sum_logits": -7.46104097366333, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.071504592895508, "logits_per_token": -7.46104097366333, "logits_per_char": -1.0658629962376185, "num_chars": 7}, {"sum_logits": -12.498018264770508, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.115584373474121, "logits_per_token": -12.498018264770508, "logits_per_char": -1.388668696085612, "num_chars": 9}, {"sum_logits": -1.6355983018875122, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -14.986479759216309, "logits_per_token": -1.6355983018875122, "logits_per_char": -0.181733144654168, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 929, "native_id": "76b2c6d254f9127b4fd66d90e1a330e7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.448230266571045, "incorrect_loss_raw": 18.05725383758545, "correct_loss_per_char": 0.7413717110951742, "incorrect_loss_per_char": 1.2217535368909092, "correct_loss_per_token": 4.448230266571045, "incorrect_loss_per_token": 8.71877384185791, "correct_loss_uncond": -9.533870220184326, "incorrect_loss_uncond": 0.27761149406433105}, "model_output": [{"sum_logits": -21.6262264251709, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.239274978637695, "logits_per_token": -7.208742141723633, "logits_per_char": -1.5447304589407784, "num_chars": 14}, {"sum_logits": -4.448230266571045, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.982100486755371, "logits_per_token": -4.448230266571045, "logits_per_char": -0.7413717110951742, "num_chars": 6}, {"sum_logits": -20.75214958190918, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.514463424682617, "logits_per_token": -6.917383193969727, "logits_per_char": -1.2207146812887752, "num_chars": 17}, {"sum_logits": -11.647300720214844, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.290284156799316, "logits_per_token": -11.647300720214844, "logits_per_char": -1.294144524468316, "num_chars": 9}, {"sum_logits": -18.203338623046875, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.074546813964844, "logits_per_token": -9.101669311523438, "logits_per_char": -0.8274244828657671, "num_chars": 22}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 930, "native_id": "cdd3d074031fbd3efeb4f9408abef04e", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.169192790985107, "incorrect_loss_raw": 10.289341926574707, "correct_loss_per_char": 0.47794618606567385, "incorrect_loss_per_char": 0.8415389523361667, "correct_loss_per_token": 1.7922981977462769, "incorrect_loss_per_token": 5.1446709632873535, "correct_loss_uncond": -13.092173099517822, "incorrect_loss_uncond": -8.133561849594116}, "model_output": [{"sum_logits": -11.224224090576172, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.767902374267578, "logits_per_token": -5.612112045288086, "logits_per_char": -0.9353520075480143, "num_chars": 12}, {"sum_logits": -9.533845901489258, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.818565368652344, "logits_per_token": -4.766922950744629, "logits_per_char": -0.6355897267659505, "num_chars": 15}, {"sum_logits": -7.169192790985107, "num_tokens": 4, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.26136589050293, "logits_per_token": -1.7922981977462769, "logits_per_char": -0.47794618606567385, "num_chars": 15}, {"sum_logits": -7.823314666748047, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.980475425720215, "logits_per_token": -3.9116573333740234, "logits_per_char": -0.6519428888956705, "num_chars": 12}, {"sum_logits": -12.575983047485352, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.124671936035156, "logits_per_token": -6.287991523742676, "logits_per_char": -1.143271186135032, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 931, "native_id": "359aed918343d228e67cef329b693904", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 14.124140739440918, "incorrect_loss_raw": 12.976617097854614, "correct_loss_per_char": 0.6725781304495675, "incorrect_loss_per_char": 1.2389226601674008, "correct_loss_per_token": 3.5310351848602295, "incorrect_loss_per_token": 6.637886881828308, "correct_loss_uncond": -15.728228569030762, "incorrect_loss_uncond": -6.578460454940796}, "model_output": [{"sum_logits": -26.81663703918457, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -28.35321617126465, "logits_per_token": -8.938879013061523, "logits_per_char": -2.0628182337834287, "num_chars": 13}, {"sum_logits": -6.6412858963012695, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.771018981933594, "logits_per_token": -3.3206429481506348, "logits_per_char": -0.5108681458693284, "num_chars": 13}, {"sum_logits": -14.124140739440918, "num_tokens": 4, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -29.85236930847168, "logits_per_token": -3.5310351848602295, "logits_per_char": -0.6725781304495675, "num_chars": 21}, {"sum_logits": -10.135505676269531, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.378252029418945, "logits_per_token": -10.135505676269531, "logits_per_char": -1.6892509460449219, "num_chars": 6}, {"sum_logits": -8.313039779663086, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.717823028564453, "logits_per_token": -4.156519889831543, "logits_per_char": -0.6927533149719238, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 932, "native_id": "cf02cca40a47c2deefd8b2e5a5ff2f70", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 22.897136688232422, "incorrect_loss_raw": 11.077160954475403, "correct_loss_per_char": 1.2051124572753906, "incorrect_loss_per_char": 2.0668804134641374, "correct_loss_per_token": 5.7242841720581055, "incorrect_loss_per_token": 7.5880104303359985, "correct_loss_uncond": -7.616710662841797, "incorrect_loss_uncond": -3.38046395778656}, "model_output": [{"sum_logits": -22.897136688232422, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -30.51384735107422, "logits_per_token": -5.7242841720581055, "logits_per_char": -1.2051124572753906, "num_chars": 19}, {"sum_logits": -6.261088848114014, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.627041816711426, "logits_per_token": -6.261088848114014, "logits_per_char": -1.2522177696228027, "num_chars": 5}, {"sum_logits": -10.398624420166016, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.11893081665039, "logits_per_token": -5.199312210083008, "logits_per_char": -1.4855177743094308, "num_chars": 7}, {"sum_logits": -10.134350776672363, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.949381828308105, "logits_per_token": -10.134350776672363, "logits_per_char": -2.0268701553344726, "num_chars": 5}, {"sum_logits": -17.51457977294922, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.13514518737793, "logits_per_token": -8.75728988647461, "logits_per_char": -3.502915954589844, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 933, "native_id": "ac1abecdbbd7bcde6592ca645c2ecb1e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.999693870544434, "incorrect_loss_raw": 11.020482182502747, "correct_loss_per_char": 0.5714067050388881, "incorrect_loss_per_char": 0.9858486942119069, "correct_loss_per_token": 3.999846935272217, "incorrect_loss_per_token": 5.510241091251373, "correct_loss_uncond": -14.012749671936035, "incorrect_loss_uncond": -8.055286288261414}, "model_output": [{"sum_logits": -7.999693870544434, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -22.01244354248047, "logits_per_token": -3.999846935272217, "logits_per_char": -0.5714067050388881, "num_chars": 14}, {"sum_logits": -8.732521057128906, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.487031936645508, "logits_per_token": -4.366260528564453, "logits_per_char": -0.5457825660705566, "num_chars": 16}, {"sum_logits": -12.355277061462402, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -21.461597442626953, "logits_per_token": -6.177638530731201, "logits_per_char": -1.0296064217885335, "num_chars": 12}, {"sum_logits": -19.14937973022461, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.789621353149414, "logits_per_token": -9.574689865112305, "logits_per_char": -2.1277088589138455, "num_chars": 9}, {"sum_logits": -3.8447508811950684, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.564823150634766, "logits_per_token": -1.9223754405975342, "logits_per_char": -0.24029693007469177, "num_chars": 16}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 934, "native_id": "2adbb4fc0d5249dc411dda433f378591", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.403341770172119, "incorrect_loss_raw": 10.384390354156494, "correct_loss_per_char": 0.5821219791065563, "incorrect_loss_per_char": 1.2692885213428073, "correct_loss_per_token": 6.403341770172119, "incorrect_loss_per_token": 6.072842121124268, "correct_loss_uncond": -7.927196025848389, "incorrect_loss_uncond": -5.218572378158569}, "model_output": [{"sum_logits": -12.569479942321777, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.310033798217773, "logits_per_token": -6.284739971160889, "logits_per_char": -1.3966088824801974, "num_chars": 9}, {"sum_logits": -9.36055850982666, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.308795928955078, "logits_per_token": -4.68027925491333, "logits_per_char": -0.936055850982666, "num_chars": 10}, {"sum_logits": -10.185762405395508, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.148138999938965, "logits_per_token": -10.185762405395508, "logits_per_char": -1.697627067565918, "num_chars": 6}, {"sum_logits": -6.403341770172119, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.330537796020508, "logits_per_token": -6.403341770172119, "logits_per_char": -0.5821219791065563, "num_chars": 11}, {"sum_logits": -9.421760559082031, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.644882202148438, "logits_per_token": -3.1405868530273438, "logits_per_char": -1.046862284342448, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 935, "native_id": "5a1c8a9dbbb60e523cc1ba14a370729c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.7185285091400146, "incorrect_loss_raw": 17.915455102920532, "correct_loss_per_char": 0.7437057018280029, "incorrect_loss_per_char": 1.3573177763910005, "correct_loss_per_token": 3.7185285091400146, "incorrect_loss_per_token": 4.963774474461873, "correct_loss_uncond": -8.114296197891235, "incorrect_loss_uncond": -5.244252443313599}, "model_output": [{"sum_logits": -40.13517761230469, "num_tokens": 5, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -35.54286193847656, "logits_per_token": -8.027035522460938, "logits_per_char": -3.344598134358724, "num_chars": 12}, {"sum_logits": -11.207737922668457, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.950382232666016, "logits_per_token": -3.735912640889486, "logits_per_char": -0.5898809432983398, "num_chars": 19}, {"sum_logits": -7.915088653564453, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.462339401245117, "logits_per_token": -3.9575443267822266, "logits_per_char": -0.7195535139604048, "num_chars": 11}, {"sum_logits": -12.403816223144531, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.683246612548828, "logits_per_token": -4.134605407714844, "logits_per_char": -0.7752385139465332, "num_chars": 16}, {"sum_logits": -3.7185285091400146, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.83282470703125, "logits_per_token": -3.7185285091400146, "logits_per_char": -0.7437057018280029, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 936, "native_id": "3665b329f93f7c84edeabe394140f8d2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 15.444246292114258, "incorrect_loss_raw": 10.428001165390015, "correct_loss_per_char": 1.4040223901922053, "incorrect_loss_per_char": 1.3028758215525795, "correct_loss_per_token": 7.722123146057129, "incorrect_loss_per_token": 8.466089248657227, "correct_loss_uncond": -9.506196975708008, "incorrect_loss_uncond": -4.21499490737915}, "model_output": [{"sum_logits": -5.235448837280273, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.677410125732422, "logits_per_token": -5.235448837280273, "logits_per_char": -1.3088622093200684, "num_chars": 4}, {"sum_logits": -15.444246292114258, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -24.950443267822266, "logits_per_token": -7.722123146057129, "logits_per_char": -1.4040223901922053, "num_chars": 11}, {"sum_logits": -12.275907516479492, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.786282539367676, "logits_per_token": -12.275907516479492, "logits_per_char": -1.363989724053277, "num_chars": 9}, {"sum_logits": -8.505352973937988, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.803169250488281, "logits_per_token": -8.505352973937988, "logits_per_char": -1.4175588289896648, "num_chars": 6}, {"sum_logits": -15.695295333862305, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.30512237548828, "logits_per_token": -7.847647666931152, "logits_per_char": -1.1210925238473075, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 937, "native_id": "dbcedaa6a6f1f68bc8f2bf7aef23294e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.193040370941162, "incorrect_loss_raw": 10.51312780380249, "correct_loss_per_char": 0.5321733951568604, "incorrect_loss_per_char": 1.2860614524947271, "correct_loss_per_token": 3.193040370941162, "incorrect_loss_per_token": 10.51312780380249, "correct_loss_uncond": -10.122608661651611, "incorrect_loss_uncond": -3.7953314781188965}, "model_output": [{"sum_logits": -9.643192291259766, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.434986114501953, "logits_per_token": -9.643192291259766, "logits_per_char": -1.2053990364074707, "num_chars": 8}, {"sum_logits": -8.086902618408203, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.364352226257324, "logits_per_token": -8.086902618408203, "logits_per_char": -0.8985447353786893, "num_chars": 9}, {"sum_logits": -13.477314949035645, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.202067375183105, "logits_per_token": -13.477314949035645, "logits_per_char": -1.6846643686294556, "num_chars": 8}, {"sum_logits": -10.845101356506348, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.232431411743164, "logits_per_token": -10.845101356506348, "logits_per_char": -1.3556376695632935, "num_chars": 8}, {"sum_logits": -3.193040370941162, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.315649032592773, "logits_per_token": -3.193040370941162, "logits_per_char": -0.5321733951568604, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 938, "native_id": "ba3a2b9ff289c106051163f840a6f5ba", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.456153869628906, "incorrect_loss_raw": 13.163068652153015, "correct_loss_per_char": 0.4611538478306362, "incorrect_loss_per_char": 1.1365157529905245, "correct_loss_per_token": 2.1520512898763022, "incorrect_loss_per_token": 6.581534326076508, "correct_loss_uncond": -14.014699935913086, "incorrect_loss_uncond": -5.499527096748352}, "model_output": [{"sum_logits": -3.1570229530334473, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.578645706176758, "logits_per_token": -1.5785114765167236, "logits_per_char": -0.2870020866394043, "num_chars": 11}, {"sum_logits": -19.390146255493164, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.27543067932129, "logits_per_token": -9.695073127746582, "logits_per_char": -1.7627405686811968, "num_chars": 11}, {"sum_logits": -6.456153869628906, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.470853805541992, "logits_per_token": -2.1520512898763022, "logits_per_char": -0.4611538478306362, "num_chars": 14}, {"sum_logits": -12.10844898223877, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.25528335571289, "logits_per_token": -6.054224491119385, "logits_per_char": -1.2108448982238769, "num_chars": 10}, {"sum_logits": -17.99665641784668, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.54102325439453, "logits_per_token": -8.99832820892334, "logits_per_char": -1.28547545841762, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 939, "native_id": "13fc28f53423a9b3a656c9431df1b3b5", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.786630630493164, "incorrect_loss_raw": 9.714799642562866, "correct_loss_per_char": 0.7786630630493164, "incorrect_loss_per_char": 0.9118123405604588, "correct_loss_per_token": 7.786630630493164, "incorrect_loss_per_token": 6.541468024253845, "correct_loss_uncond": -4.947876930236816, "incorrect_loss_uncond": -7.295894145965576}, "model_output": [{"sum_logits": -9.97939395904541, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.579822540283203, "logits_per_token": -4.989696979522705, "logits_per_char": -0.52523126100239, "num_chars": 19}, {"sum_logits": -7.068873405456543, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.585420608520508, "logits_per_token": -7.068873405456543, "logits_per_char": -1.0098390579223633, "num_chars": 7}, {"sum_logits": -6.403672218322754, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.339823722839355, "logits_per_token": -6.403672218322754, "logits_per_char": -0.7115191353691949, "num_chars": 9}, {"sum_logits": -15.407258987426758, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.537708282470703, "logits_per_token": -7.703629493713379, "logits_per_char": -1.400659907947887, "num_chars": 11}, {"sum_logits": -7.786630630493164, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -7.786630630493164, "logits_per_char": -0.7786630630493164, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 940, "native_id": "3f4b48708d08f8bf7bec796531023f9c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.8174495697021484, "incorrect_loss_raw": 8.577067017555237, "correct_loss_per_char": 0.636241594950358, "incorrect_loss_per_char": 1.4123171732539224, "correct_loss_per_token": 3.8174495697021484, "incorrect_loss_per_token": 8.577067017555237, "correct_loss_uncond": -9.6810941696167, "incorrect_loss_uncond": -4.897398591041565}, "model_output": [{"sum_logits": -3.8174495697021484, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -13.498543739318848, "logits_per_token": -3.8174495697021484, "logits_per_char": -0.636241594950358, "num_chars": 6}, {"sum_logits": -7.674740314483643, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -13.586409568786621, "logits_per_token": -7.674740314483643, "logits_per_char": -1.2791233857472737, "num_chars": 6}, {"sum_logits": -9.1248779296875, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -14.048408508300781, "logits_per_token": -9.1248779296875, "logits_per_char": -1.3035539899553572, "num_chars": 7}, {"sum_logits": -7.614926338195801, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -12.520729064941406, "logits_per_token": -7.614926338195801, "logits_per_char": -1.0878466197422572, "num_chars": 7}, {"sum_logits": -9.893723487854004, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -13.742315292358398, "logits_per_token": -9.893723487854004, "logits_per_char": -1.9787446975708007, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 941, "native_id": "c61790eb63ff6652b878ca051493c07d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.259904384613037, "incorrect_loss_raw": 8.88099479675293, "correct_loss_per_char": 0.40460802958561826, "incorrect_loss_per_char": 0.9856402723780482, "correct_loss_per_token": 2.6299521923065186, "incorrect_loss_per_token": 6.068421721458435, "correct_loss_uncond": -15.466112613677979, "incorrect_loss_uncond": -8.177656888961792}, "model_output": [{"sum_logits": -5.139355182647705, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.63223648071289, "logits_per_token": -5.139355182647705, "logits_per_char": -0.7341935975211007, "num_chars": 7}, {"sum_logits": -7.884039402008057, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.787047386169434, "logits_per_token": -7.884039402008057, "logits_per_char": -1.5768078804016112, "num_chars": 5}, {"sum_logits": -5.259904384613037, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.726016998291016, "logits_per_token": -2.6299521923065186, "logits_per_char": -0.40460802958561826, "num_chars": 13}, {"sum_logits": -9.677322387695312, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.37601089477539, "logits_per_token": -4.838661193847656, "logits_per_char": -0.6451548258463542, "num_chars": 15}, {"sum_logits": -12.823262214660645, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.439311981201172, "logits_per_token": -6.411631107330322, "logits_per_char": -0.9864047857431265, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 942, "native_id": "e5ebbe0ea4097bb197ac525b49108362", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.5191057324409485, "incorrect_loss_raw": 10.90693747997284, "correct_loss_per_char": 0.05191057324409485, "incorrect_loss_per_char": 1.493522711442067, "correct_loss_per_token": 0.5191057324409485, "incorrect_loss_per_token": 7.314436912536621, "correct_loss_uncond": -12.569520175457, "incorrect_loss_uncond": -2.977007269859314}, "model_output": [{"sum_logits": -13.090829849243164, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.56519889831543, "logits_per_token": -6.545414924621582, "logits_per_char": -1.0069869114802434, "num_chars": 13}, {"sum_logits": -15.649174690246582, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.878514289855957, "logits_per_token": -7.824587345123291, "logits_per_char": -2.6081957817077637, "num_chars": 6}, {"sum_logits": -0.5191057324409485, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": true, "sum_logits_uncond": -13.08862590789795, "logits_per_token": -0.5191057324409485, "logits_per_char": -0.05191057324409485, "num_chars": 10}, {"sum_logits": -6.639199733734131, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.245282173156738, "logits_per_token": -6.639199733734131, "logits_per_char": -1.3278399467468263, "num_chars": 5}, {"sum_logits": -8.24854564666748, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.846783638000488, "logits_per_token": -8.24854564666748, "logits_per_char": -1.031068205833435, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 943, "native_id": "029e36d8f65982b142c319064dc5e32f", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.828145980834961, "incorrect_loss_raw": 13.210911750793457, "correct_loss_per_char": 0.6828145980834961, "incorrect_loss_per_char": 1.0899682298190612, "correct_loss_per_token": 3.4140729904174805, "incorrect_loss_per_token": 7.15494579076767, "correct_loss_uncond": -8.769970893859863, "incorrect_loss_uncond": -5.570367813110352}, "model_output": [{"sum_logits": -13.81017780303955, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.96675682067871, "logits_per_token": -4.603392601013184, "logits_per_char": -0.8631361126899719, "num_chars": 16}, {"sum_logits": -6.828145980834961, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.598116874694824, "logits_per_token": -3.4140729904174805, "logits_per_char": -0.6828145980834961, "num_chars": 10}, {"sum_logits": -13.50573444366455, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.984130859375, "logits_per_token": -6.752867221832275, "logits_per_char": -0.8441084027290344, "num_chars": 16}, {"sum_logits": -14.50878620147705, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.218299865722656, "logits_per_token": -14.50878620147705, "logits_per_char": -2.07268374306815, "num_chars": 7}, {"sum_logits": -11.018948554992676, "num_tokens": 4, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.955930709838867, "logits_per_token": -2.754737138748169, "logits_per_char": -0.5799446607890882, "num_chars": 19}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 944, "native_id": "3d1a67f87b34303f97549ba83e5521c2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.250441074371338, "incorrect_loss_raw": 10.8630530834198, "correct_loss_per_char": 1.0417401790618896, "incorrect_loss_per_char": 1.4064182830923393, "correct_loss_per_token": 3.125220537185669, "incorrect_loss_per_token": 5.953810731569925, "correct_loss_uncond": -8.087840557098389, "incorrect_loss_uncond": -4.91549277305603}, "model_output": [{"sum_logits": -6.250441074371338, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.338281631469727, "logits_per_token": -3.125220537185669, "logits_per_char": -1.0417401790618896, "num_chars": 6}, {"sum_logits": -8.890730857849121, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.872467041015625, "logits_per_token": -2.9635769526163735, "logits_per_char": -0.8082482598044656, "num_chars": 11}, {"sum_logits": -7.141850471496582, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.043295860290527, "logits_per_token": -7.141850471496582, "logits_per_char": -1.0202643530709403, "num_chars": 7}, {"sum_logits": -13.910003662109375, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -6.9550018310546875, "logits_per_char": -1.5455559624565973, "num_chars": 9}, {"sum_logits": -13.509627342224121, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.919795036315918, "logits_per_token": -6.7548136711120605, "logits_per_char": -2.2516045570373535, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 945, "native_id": "e050bce7048da1b3743a54153e91694e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.477414608001709, "incorrect_loss_raw": 13.59589958190918, "correct_loss_per_char": 0.22387073040008545, "incorrect_loss_per_char": 1.1755629539489747, "correct_loss_per_token": 2.2387073040008545, "incorrect_loss_per_token": 6.839094559351603, "correct_loss_uncond": -14.272142887115479, "incorrect_loss_uncond": -5.610424995422363}, "model_output": [{"sum_logits": -4.477414608001709, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.749557495117188, "logits_per_token": -2.2387073040008545, "logits_per_char": -0.22387073040008545, "num_chars": 20}, {"sum_logits": -11.586069107055664, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.66000747680664, "logits_per_token": -3.862023035685221, "logits_per_char": -0.9655057589213053, "num_chars": 12}, {"sum_logits": -14.502603530883789, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.813220977783203, "logits_per_token": -7.2513017654418945, "logits_per_char": -1.2085502942403157, "num_chars": 12}, {"sum_logits": -10.217117309570312, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.4810791015625, "logits_per_token": -10.217117309570312, "logits_per_char": -1.0217117309570312, "num_chars": 10}, {"sum_logits": -18.077808380126953, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.870990753173828, "logits_per_token": -6.025936126708984, "logits_per_char": -1.506484031677246, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 946, "native_id": "8233ccb60dd0c0ff3b7ca5d73e5681f2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.509840965270996, "incorrect_loss_raw": 15.951169729232788, "correct_loss_per_char": 0.47276894251505536, "incorrect_loss_per_char": 1.4835537857625074, "correct_loss_per_token": 4.254920482635498, "incorrect_loss_per_token": 8.144377787907919, "correct_loss_uncond": -9.232266426086426, "incorrect_loss_uncond": -1.5839252471923828}, "model_output": [{"sum_logits": -20.08910369873047, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.544958114624023, "logits_per_token": -6.696367899576823, "logits_per_char": -1.545315669133113, "num_chars": 13}, {"sum_logits": -8.509840965270996, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.742107391357422, "logits_per_token": -4.254920482635498, "logits_per_char": -0.47276894251505536, "num_chars": 18}, {"sum_logits": -12.531705856323242, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.31043529510498, "logits_per_token": -12.531705856323242, "logits_per_char": -1.790243693760463, "num_chars": 7}, {"sum_logits": -13.454983711242676, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.936298370361328, "logits_per_token": -4.484994570414226, "logits_per_char": -1.1212486426035564, "num_chars": 12}, {"sum_logits": -17.728885650634766, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.34868812561035, "logits_per_token": -8.864442825317383, "logits_per_char": -1.4774071375528972, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 947, "native_id": "eb4b2cd0f2a69686e5a82250c5806b84", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.2418680191040039, "incorrect_loss_raw": 13.794643640518188, "correct_loss_per_char": 0.026874224344889324, "incorrect_loss_per_char": 1.3761585042590188, "correct_loss_per_token": 0.2418680191040039, "incorrect_loss_per_token": 9.092693249384563, "correct_loss_uncond": -12.550891876220703, "incorrect_loss_uncond": -2.6418964862823486}, "model_output": [{"sum_logits": -11.490015029907227, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.256620407104492, "logits_per_token": -11.490015029907227, "logits_per_char": -1.6414307185581751, "num_chars": 7}, {"sum_logits": -15.91623306274414, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.88067626953125, "logits_per_token": -5.305411020914714, "logits_per_char": -0.795811653137207, "num_chars": 20}, {"sum_logits": -16.393959045410156, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.110511779785156, "logits_per_token": -8.196979522705078, "logits_per_char": -1.170997074672154, "num_chars": 14}, {"sum_logits": -11.37836742401123, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.49835205078125, "logits_per_token": -11.37836742401123, "logits_per_char": -1.8963945706685383, "num_chars": 6}, {"sum_logits": -0.2418680191040039, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -12.792759895324707, "logits_per_token": -0.2418680191040039, "logits_per_char": -0.026874224344889324, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 948, "native_id": "d0bda97a087904320216e4d0b8a08a8d", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.01041316986084, "incorrect_loss_raw": 18.08456015586853, "correct_loss_per_char": 0.7150295121329171, "incorrect_loss_per_char": 1.7540031953708275, "correct_loss_per_token": 3.3368043899536133, "incorrect_loss_per_token": 10.810489892959595, "correct_loss_uncond": -6.800652503967285, "incorrect_loss_uncond": 0.4757821559906006}, "model_output": [{"sum_logits": -16.167858123779297, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.975189208984375, "logits_per_token": -8.083929061889648, "logits_per_char": -1.243681394136869, "num_chars": 13}, {"sum_logits": -14.145678520202637, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.572244644165039, "logits_per_token": -14.145678520202637, "logits_per_char": -1.7682098150253296, "num_chars": 8}, {"sum_logits": -20.20633316040039, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.558530807495117, "logits_per_token": -10.103166580200195, "logits_per_char": -2.020633316040039, "num_chars": 10}, {"sum_logits": -10.01041316986084, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.811065673828125, "logits_per_token": -3.3368043899536133, "logits_per_char": -0.7150295121329171, "num_chars": 14}, {"sum_logits": -21.818370819091797, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.329147338867188, "logits_per_token": -10.909185409545898, "logits_per_char": -1.9834882562810725, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 949, "native_id": "e216381e9f0ddd1d248ee25fccca2b1f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.9032278060913086, "incorrect_loss_raw": 14.661683559417725, "correct_loss_per_char": 0.3629034757614136, "incorrect_loss_per_char": 1.4275187799940894, "correct_loss_per_token": 2.9032278060913086, "incorrect_loss_per_token": 8.379097143809, "correct_loss_uncond": -10.227487564086914, "incorrect_loss_uncond": -3.383894681930542}, "model_output": [{"sum_logits": -9.38961410522461, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.660371780395508, "logits_per_token": -9.38961410522461, "logits_per_char": -0.938961410522461, "num_chars": 10}, {"sum_logits": -7.770044326782227, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.692679405212402, "logits_per_token": -7.770044326782227, "logits_per_char": -1.2950073877970378, "num_chars": 6}, {"sum_logits": -15.166229248046875, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.820125579833984, "logits_per_token": -7.5831146240234375, "logits_per_char": -1.0833020891462053, "num_chars": 14}, {"sum_logits": -26.320846557617188, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -24.009136199951172, "logits_per_token": -8.773615519205729, "logits_per_char": -2.3928042325106533, "num_chars": 11}, {"sum_logits": -2.9032278060913086, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.130715370178223, "logits_per_token": -2.9032278060913086, "logits_per_char": -0.3629034757614136, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 950, "native_id": "b1fba9ad6193c6751ddb3f58f7f39b35", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.8494160175323486, "incorrect_loss_raw": 10.457131385803223, "correct_loss_per_char": 0.2566277345021566, "incorrect_loss_per_char": 0.828377464413643, "correct_loss_per_token": 1.9247080087661743, "incorrect_loss_per_token": 5.026227871576944, "correct_loss_uncond": -14.158709287643433, "incorrect_loss_uncond": -9.690058946609497}, "model_output": [{"sum_logits": -3.8494160175323486, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.00812530517578, "logits_per_token": -1.9247080087661743, "logits_per_char": -0.2566277345021566, "num_chars": 15}, {"sum_logits": -9.100171089172363, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -23.26673126220703, "logits_per_token": -3.0333903630574546, "logits_per_char": -0.5687606930732727, "num_chars": 16}, {"sum_logits": -18.090784072875977, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -26.330224990844727, "logits_per_token": -6.030261357625325, "logits_per_char": -0.9045392036437988, "num_chars": 20}, {"sum_logits": -7.192621231079102, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.296344757080078, "logits_per_token": -3.596310615539551, "logits_per_char": -0.5993851025899252, "num_chars": 12}, {"sum_logits": -7.444949150085449, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.695460319519043, "logits_per_token": -7.444949150085449, "logits_per_char": -1.240824858347575, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 951, "native_id": "3ceae7a18073050bd2c0448abef1f393", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.8354758024215698, "incorrect_loss_raw": 11.449578523635864, "correct_loss_per_char": 0.13110541445868357, "incorrect_loss_per_char": 1.8094927413122994, "correct_loss_per_token": 1.8354758024215698, "incorrect_loss_per_token": 8.787814378738403, "correct_loss_uncond": -12.122758030891418, "incorrect_loss_uncond": -4.204593896865845}, "model_output": [{"sum_logits": -11.64822006225586, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.36113166809082, "logits_per_token": -11.64822006225586, "logits_per_char": -1.4560275077819824, "num_chars": 8}, {"sum_logits": -1.8354758024215698, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.958233833312988, "logits_per_token": -1.8354758024215698, "logits_per_char": -0.13110541445868357, "num_chars": 14}, {"sum_logits": -4.424633979797363, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.445282936096191, "logits_per_token": -4.424633979797363, "logits_per_char": -0.6320905685424805, "num_chars": 7}, {"sum_logits": -8.431346893310547, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.39013957977295, "logits_per_token": -8.431346893310547, "logits_per_char": -2.1078367233276367, "num_chars": 4}, {"sum_logits": -21.294113159179688, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -23.420135498046875, "logits_per_token": -10.647056579589844, "logits_per_char": -3.042016165597098, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 952, "native_id": "f1182e3a070f5a1be529843aa6e5c20c", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.665412902832031, "incorrect_loss_raw": 9.235480546951294, "correct_loss_per_char": 0.5183792114257812, "incorrect_loss_per_char": 1.1095572013359565, "correct_loss_per_token": 4.665412902832031, "incorrect_loss_per_token": 8.141435503959656, "correct_loss_uncond": -9.674410820007324, "incorrect_loss_uncond": -5.1674628257751465}, "model_output": [{"sum_logits": -8.752360343933105, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.604297637939453, "logits_per_token": -4.376180171966553, "logits_per_char": -1.2503371919904436, "num_chars": 7}, {"sum_logits": -9.968433380126953, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.60936164855957, "logits_per_token": -9.968433380126953, "logits_per_char": -0.9062212163751776, "num_chars": 11}, {"sum_logits": -9.968433380126953, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.60936164855957, "logits_per_token": -9.968433380126953, "logits_per_char": -0.9062212163751776, "num_chars": 11}, {"sum_logits": -4.665412902832031, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.339823722839355, "logits_per_token": -4.665412902832031, "logits_per_char": -0.5183792114257812, "num_chars": 9}, {"sum_logits": -8.252695083618164, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.788752555847168, "logits_per_token": -8.252695083618164, "logits_per_char": -1.3754491806030273, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 953, "native_id": "5799089c131e26473697afc54d5f6964", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.960265636444092, "incorrect_loss_raw": 12.275340795516968, "correct_loss_per_char": 0.3600241487676447, "incorrect_loss_per_char": 1.2915217106802421, "correct_loss_per_token": 1.980132818222046, "incorrect_loss_per_token": 7.239487369855245, "correct_loss_uncond": -9.959855556488037, "incorrect_loss_uncond": -5.655624866485596}, "model_output": [{"sum_logits": -7.762824058532715, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.640544891357422, "logits_per_token": -3.8814120292663574, "logits_per_char": -0.5175216039021809, "num_chars": 15}, {"sum_logits": -14.705604553222656, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.225759506225586, "logits_per_token": -4.901868184407552, "logits_per_char": -1.225467046101888, "num_chars": 12}, {"sum_logits": -13.71640396118164, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.0188627243042, "logits_per_token": -13.71640396118164, "logits_per_char": -2.743280792236328, "num_chars": 5}, {"sum_logits": -12.91653060913086, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.838695526123047, "logits_per_token": -6.45826530456543, "logits_per_char": -0.6798174004805716, "num_chars": 19}, {"sum_logits": -3.960265636444092, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.920121192932129, "logits_per_token": -1.980132818222046, "logits_per_char": -0.3600241487676447, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 954, "native_id": "7ce1f99e8185489a7113e6d18c71abb0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.838388442993164, "incorrect_loss_raw": 10.355803370475769, "correct_loss_per_char": 1.1676776885986329, "incorrect_loss_per_char": 1.4004668309575037, "correct_loss_per_token": 5.838388442993164, "incorrect_loss_per_token": 7.812584221363068, "correct_loss_uncond": -8.070406913757324, "incorrect_loss_uncond": -5.406575560569763}, "model_output": [{"sum_logits": -11.929244041442871, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -11.929244041442871, "logits_per_char": -1.7041777202061243, "num_chars": 7}, {"sum_logits": -6.259124279022217, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.68304443359375, "logits_per_token": -3.1295621395111084, "logits_per_char": -0.8941606112888881, "num_chars": 7}, {"sum_logits": -9.148216247558594, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.687225341796875, "logits_per_token": -9.148216247558594, "logits_per_char": -1.8296432495117188, "num_chars": 5}, {"sum_logits": -5.838388442993164, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.908795356750488, "logits_per_token": -5.838388442993164, "logits_per_char": -1.1676776885986329, "num_chars": 5}, {"sum_logits": -14.086628913879395, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.116256713867188, "logits_per_token": -7.043314456939697, "logits_per_char": -1.173885742823283, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 955, "native_id": "69425fb4cd2dc034e9ff223d2d5676ec", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.6631340980529785, "incorrect_loss_raw": 13.058701992034912, "correct_loss_per_char": 0.6385945081710815, "incorrect_loss_per_char": 1.1245406507666595, "correct_loss_per_token": 3.8315670490264893, "incorrect_loss_per_token": 7.756582736968994, "correct_loss_uncond": -8.995133876800537, "incorrect_loss_uncond": -5.232298851013184}, "model_output": [{"sum_logits": -14.439481735229492, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -19.409805297851562, "logits_per_token": -7.219740867614746, "logits_per_char": -1.2032901446024578, "num_chars": 12}, {"sum_logits": -7.6631340980529785, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -16.658267974853516, "logits_per_token": -3.8315670490264893, "logits_per_char": -0.6385945081710815, "num_chars": 12}, {"sum_logits": -9.817853927612305, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -14.463623046875, "logits_per_token": -9.817853927612305, "logits_per_char": -1.0908726586235895, "num_chars": 9}, {"sum_logits": -17.30063247680664, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -19.001684188842773, "logits_per_token": -8.65031623840332, "logits_per_char": -1.017684263341567, "num_chars": 17}, {"sum_logits": -10.676839828491211, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -20.288890838623047, "logits_per_token": -5.3384199142456055, "logits_per_char": -1.1863155364990234, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 956, "native_id": "f75b22d5b88ac56ae7df030c1ebeded5", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.16052770614624, "incorrect_loss_raw": 7.013317346572876, "correct_loss_per_char": 0.7372182437351772, "incorrect_loss_per_char": 0.8335235526164373, "correct_loss_per_token": 5.16052770614624, "incorrect_loss_per_token": 6.0220160484313965, "correct_loss_uncond": -8.021368503570557, "incorrect_loss_uncond": -8.516424417495728}, "model_output": [{"sum_logits": -7.930410385131836, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.095705032348633, "logits_per_token": -3.965205192565918, "logits_per_char": -0.6608675320943197, "num_chars": 12}, {"sum_logits": -7.528234004974365, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.307762145996094, "logits_per_token": -7.528234004974365, "logits_per_char": -0.9410292506217957, "num_chars": 8}, {"sum_logits": -7.091023921966553, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.303086280822754, "logits_per_token": -7.091023921966553, "logits_per_char": -1.1818373203277588, "num_chars": 6}, {"sum_logits": -5.50360107421875, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.412413597106934, "logits_per_token": -5.50360107421875, "logits_per_char": -0.550360107421875, "num_chars": 10}, {"sum_logits": -5.16052770614624, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.181896209716797, "logits_per_token": -5.16052770614624, "logits_per_char": -0.7372182437351772, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 957, "native_id": "4eb3e69c0d42a2287692d2b9d2cb5979", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.163445472717285, "incorrect_loss_raw": 11.307649612426758, "correct_loss_per_char": 1.0272409121195476, "incorrect_loss_per_char": 1.5782098571459453, "correct_loss_per_token": 6.163445472717285, "incorrect_loss_per_token": 9.839438080787659, "correct_loss_uncond": -8.545639038085938, "incorrect_loss_uncond": -1.5354480743408203}, "model_output": [{"sum_logits": -12.43978500366211, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.893023490905762, "logits_per_token": -12.43978500366211, "logits_per_char": -1.3821983337402344, "num_chars": 9}, {"sum_logits": -6.163445472717285, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.709084510803223, "logits_per_token": -6.163445472717285, "logits_per_char": -1.0272409121195476, "num_chars": 6}, {"sum_logits": -13.32939624786377, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -13.32939624786377, "logits_per_char": -2.665879249572754, "num_chars": 5}, {"sum_logits": -7.715724945068359, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.376425743103027, "logits_per_token": -7.715724945068359, "logits_per_char": -1.2859541575113933, "num_chars": 6}, {"sum_logits": -11.745692253112793, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.848213195800781, "logits_per_token": -5.8728461265563965, "logits_per_char": -0.9788076877593994, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 958, "native_id": "7d937233b4a9043da0b976dbd42d141b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.2101545333862305, "incorrect_loss_raw": 6.677148222923279, "correct_loss_per_char": 0.24693496410663313, "incorrect_loss_per_char": 0.740629427962833, "correct_loss_per_token": 3.2101545333862305, "incorrect_loss_per_token": 6.677148222923279, "correct_loss_uncond": -10.8046875, "incorrect_loss_uncond": -7.711303353309631}, "model_output": [{"sum_logits": -10.64158821105957, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.284228324890137, "logits_per_token": -10.64158821105957, "logits_per_char": -1.1823986901177301, "num_chars": 9}, {"sum_logits": -5.007975101470947, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.457324981689453, "logits_per_token": -5.007975101470947, "logits_per_char": -0.3577125072479248, "num_chars": 14}, {"sum_logits": -6.009848594665527, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.886456489562988, "logits_per_token": -6.009848594665527, "logits_per_char": -1.0016414324442546, "num_chars": 6}, {"sum_logits": -3.2101545333862305, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.01484203338623, "logits_per_token": -3.2101545333862305, "logits_per_char": -0.24693496410663313, "num_chars": 13}, {"sum_logits": -5.04918098449707, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.925796508789062, "logits_per_token": -5.04918098449707, "logits_per_char": -0.42076508204142254, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 959, "native_id": "6bd176cc91a2a2088807ec446c008856", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.455725908279419, "incorrect_loss_raw": 10.5822172164917, "correct_loss_per_char": 0.20464382568995157, "incorrect_loss_per_char": 1.4588177939256033, "correct_loss_per_token": 2.455725908279419, "incorrect_loss_per_token": 8.640602231025696, "correct_loss_uncond": -12.517996549606323, "incorrect_loss_uncond": -4.304158449172974}, "model_output": [{"sum_logits": -2.455725908279419, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -14.973722457885742, "logits_per_token": -2.455725908279419, "logits_per_char": -0.20464382568995157, "num_chars": 12}, {"sum_logits": -6.457111358642578, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.891663551330566, "logits_per_token": -6.457111358642578, "logits_per_char": -0.8071389198303223, "num_chars": 8}, {"sum_logits": -10.206870079040527, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.307762145996094, "logits_per_token": -10.206870079040527, "logits_per_char": -1.275858759880066, "num_chars": 8}, {"sum_logits": -15.532919883728027, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.204669952392578, "logits_per_token": -7.766459941864014, "logits_per_char": -1.7258799870808919, "num_chars": 9}, {"sum_logits": -10.131967544555664, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.141407012939453, "logits_per_token": -10.131967544555664, "logits_per_char": -2.026393508911133, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 960, "native_id": "c3890d43b84635d9e61c007ca2521d5b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.265083312988281, "incorrect_loss_raw": 15.572766065597534, "correct_loss_per_char": 0.635775639460637, "incorrect_loss_per_char": 1.1655730313778567, "correct_loss_per_token": 4.132541656494141, "incorrect_loss_per_token": 6.277105708916982, "correct_loss_uncond": -10.723932266235352, "incorrect_loss_uncond": -4.906106233596802}, "model_output": [{"sum_logits": -13.197976112365723, "num_tokens": 4, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.955930709838867, "logits_per_token": -3.2994940280914307, "logits_per_char": -0.6946303217034591, "num_chars": 19}, {"sum_logits": -17.078704833984375, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.18935775756836, "logits_per_token": -8.539352416992188, "logits_per_char": -1.3137465256911058, "num_chars": 13}, {"sum_logits": -15.588691711425781, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -22.85982894897461, "logits_per_token": -7.794345855712891, "logits_per_char": -1.5588691711425782, "num_chars": 10}, {"sum_logits": -8.265083312988281, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -18.989015579223633, "logits_per_token": -4.132541656494141, "logits_per_char": -0.635775639460637, "num_chars": 13}, {"sum_logits": -16.425691604614258, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.910371780395508, "logits_per_token": -5.475230534871419, "logits_per_char": -1.0950461069742838, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 961, "native_id": "6195ed74cf445cb5d991e1076a080dde", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.9172565937042236, "incorrect_loss_raw": 9.851784467697144, "correct_loss_per_char": 0.3013274302849403, "incorrect_loss_per_char": 0.7780265203862813, "correct_loss_per_token": 1.9586282968521118, "incorrect_loss_per_token": 4.254508376121521, "correct_loss_uncond": -12.597002744674683, "incorrect_loss_uncond": -7.364395380020142}, "model_output": [{"sum_logits": -6.07228946685791, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.393335342407227, "logits_per_token": -3.036144733428955, "logits_per_char": -0.43373496191842215, "num_chars": 14}, {"sum_logits": -16.11321258544922, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.333938598632812, "logits_per_token": -5.371070861816406, "logits_per_char": -1.4648375077681108, "num_chars": 11}, {"sum_logits": -8.180245399475098, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.134056091308594, "logits_per_token": -4.090122699737549, "logits_per_char": -0.6816871166229248, "num_chars": 12}, {"sum_logits": -3.9172565937042236, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.514259338378906, "logits_per_token": -1.9586282968521118, "logits_per_char": -0.3013274302849403, "num_chars": 13}, {"sum_logits": -9.041390419006348, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.003389358520508, "logits_per_token": -4.520695209503174, "logits_per_char": -0.5318464952356675, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 962, "native_id": "37644422df4bcd28b3f54bbf3fc2c0f8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.1534267663955688, "incorrect_loss_raw": 11.14342999458313, "correct_loss_per_char": 0.19223779439926147, "incorrect_loss_per_char": 1.290109759110671, "correct_loss_per_token": 0.5767133831977844, "incorrect_loss_per_token": 6.532185196876526, "correct_loss_uncond": -14.181610465049744, "incorrect_loss_uncond": -3.733198642730713}, "model_output": [{"sum_logits": -12.895007133483887, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.977118492126465, "logits_per_token": -6.447503566741943, "logits_per_char": -1.8421438762119837, "num_chars": 7}, {"sum_logits": -1.1534267663955688, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": true, "sum_logits_uncond": -15.335037231445312, "logits_per_token": -0.5767133831977844, "logits_per_char": -0.19223779439926147, "num_chars": 6}, {"sum_logits": -11.688520431518555, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.943564414978027, "logits_per_token": -5.844260215759277, "logits_per_char": -0.8348943165370396, "num_chars": 14}, {"sum_logits": -12.30643081665039, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.34054946899414, "logits_per_token": -6.153215408325195, "logits_per_char": -0.9466485243577224, "num_chars": 13}, {"sum_logits": -7.6837615966796875, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.245282173156738, "logits_per_token": -7.6837615966796875, "logits_per_char": -1.5367523193359376, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 963, "native_id": "23d97480fe45bace231503f8fc367a5b", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.418213844299316, "incorrect_loss_raw": 17.769777536392212, "correct_loss_per_char": 0.5298724174499512, "incorrect_loss_per_char": 1.0290801424355736, "correct_loss_per_token": 3.709106922149658, "incorrect_loss_per_token": 6.5900183234895975, "correct_loss_uncond": -13.164584159851074, "incorrect_loss_uncond": -7.8584864139556885}, "model_output": [{"sum_logits": -22.5364990234375, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -25.008155822753906, "logits_per_token": -11.26824951171875, "logits_per_char": -1.5024332682291666, "num_chars": 15}, {"sum_logits": -12.884891510009766, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -20.54024314880371, "logits_per_token": -6.442445755004883, "logits_per_char": -1.0737409591674805, "num_chars": 12}, {"sum_logits": -25.70254898071289, "num_tokens": 7, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -38.23808670043945, "logits_per_token": -3.6717927115304128, "logits_per_char": -0.9179481778826032, "num_chars": 28}, {"sum_logits": -9.955170631408691, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -18.72657012939453, "logits_per_token": -4.977585315704346, "logits_per_char": -0.6221981644630432, "num_chars": 16}, {"sum_logits": -7.418213844299316, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -20.58279800415039, "logits_per_token": -3.709106922149658, "logits_per_char": -0.5298724174499512, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 964, "native_id": "15556e26feaa5a8a29c9f30896e535d4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.332159996032715, "incorrect_loss_raw": 9.689654350280762, "correct_loss_per_char": 0.3808685711451939, "incorrect_loss_per_char": 0.6031856265003326, "correct_loss_per_token": 2.6660799980163574, "incorrect_loss_per_token": 4.423436522483826, "correct_loss_uncond": -12.021187782287598, "incorrect_loss_uncond": -9.926848888397217}, "model_output": [{"sum_logits": -5.332159996032715, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.353347778320312, "logits_per_token": -2.6660799980163574, "logits_per_char": -0.3808685711451939, "num_chars": 14}, {"sum_logits": -9.023151397705078, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.001684188842773, "logits_per_token": -4.511575698852539, "logits_per_char": -0.5307736116297105, "num_chars": 17}, {"sum_logits": -8.99342155456543, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.660459518432617, "logits_per_token": -4.496710777282715, "logits_per_char": -0.6918016580434946, "num_chars": 13}, {"sum_logits": -10.628668785095215, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.881364822387695, "logits_per_token": -5.314334392547607, "logits_per_char": -0.7085779190063477, "num_chars": 15}, {"sum_logits": -10.113375663757324, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.922504425048828, "logits_per_token": -3.3711252212524414, "logits_per_char": -0.48158931732177734, "num_chars": 21}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 965, "native_id": "6be05d227f4f6fe727218fc8be9df340", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.219962120056152, "incorrect_loss_raw": 13.701293706893921, "correct_loss_per_char": 0.9349968433380127, "incorrect_loss_per_char": 1.2224339034822251, "correct_loss_per_token": 5.609981060028076, "incorrect_loss_per_token": 7.7299957275390625, "correct_loss_uncond": -6.37418270111084, "incorrect_loss_uncond": -3.0746541023254395}, "model_output": [{"sum_logits": -14.573925018310547, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.543140411376953, "logits_per_token": -4.857975006103516, "logits_per_char": -1.214493751525879, "num_chars": 12}, {"sum_logits": -13.426492691040039, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.310033798217773, "logits_per_token": -6.7132463455200195, "logits_per_char": -1.491832521226671, "num_chars": 9}, {"sum_logits": -11.892765998840332, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.918665885925293, "logits_per_token": -11.892765998840332, "logits_per_char": -1.1892765998840331, "num_chars": 10}, {"sum_logits": -14.911991119384766, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.331951141357422, "logits_per_token": -7.455995559692383, "logits_per_char": -0.9941327412923177, "num_chars": 15}, {"sum_logits": -11.219962120056152, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.594144821166992, "logits_per_token": -5.609981060028076, "logits_per_char": -0.9349968433380127, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 966, "native_id": "3f3ba1d9a3bfe63df11247a968eaddce", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 21.45301055908203, "incorrect_loss_raw": 5.107644140720367, "correct_loss_per_char": 1.340813159942627, "incorrect_loss_per_char": 0.699328057822727, "correct_loss_per_token": 7.151003519694011, "incorrect_loss_per_token": 3.3849308490753174, "correct_loss_uncond": -12.346687316894531, "incorrect_loss_uncond": -10.80210393667221}, "model_output": [{"sum_logits": -2.7550642490386963, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.827999114990234, "logits_per_token": -0.9183547496795654, "logits_per_char": -0.39358060700552805, "num_chars": 7}, {"sum_logits": -21.45301055908203, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -33.79969787597656, "logits_per_token": -7.151003519694011, "logits_per_char": -1.340813159942627, "num_chars": 16}, {"sum_logits": -7.567224979400635, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.149666786193848, "logits_per_token": -7.567224979400635, "logits_per_char": -0.9459031224250793, "num_chars": 8}, {"sum_logits": -4.663022041320801, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.788237571716309, "logits_per_token": -2.3315110206604004, "logits_per_char": -0.7771703402201334, "num_chars": 6}, {"sum_logits": -5.445265293121338, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.873088836669922, "logits_per_token": -2.722632646560669, "logits_per_char": -0.6806581616401672, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 967, "native_id": "ca9a3ccfb140aa66816f96ac983b6d9f_1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 3.3680808544158936, "incorrect_loss_raw": 9.146589040756226, "correct_loss_per_char": 0.5613468090693156, "incorrect_loss_per_char": 0.8798962109618718, "correct_loss_per_token": 3.3680808544158936, "incorrect_loss_per_token": 7.406734824180603, "correct_loss_uncond": -10.323039293289185, "incorrect_loss_uncond": -6.568471670150757}, "model_output": [{"sum_logits": -4.530673980712891, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.412413597106934, "logits_per_token": -4.530673980712891, "logits_per_char": -0.45306739807128904, "num_chars": 10}, {"sum_logits": -13.91883373260498, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -19.153587341308594, "logits_per_token": -6.95941686630249, "logits_per_char": -0.927922248840332, "num_chars": 15}, {"sum_logits": -3.3680808544158936, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.691120147705078, "logits_per_token": -3.3680808544158936, "logits_per_char": -0.5613468090693156, "num_chars": 6}, {"sum_logits": -9.252781867980957, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.986479759216309, "logits_per_token": -9.252781867980957, "logits_per_char": -1.0280868742201064, "num_chars": 9}, {"sum_logits": -8.884066581726074, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.307762145996094, "logits_per_token": -8.884066581726074, "logits_per_char": -1.1105083227157593, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 968, "native_id": "487cabfcd776d89748ee7e7bb681ad59", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 16.14126968383789, "incorrect_loss_raw": 17.213780403137207, "correct_loss_per_char": 1.0760846455891928, "incorrect_loss_per_char": 1.1486914724613277, "correct_loss_per_token": 5.380423227945964, "incorrect_loss_per_token": 6.936222195625305, "correct_loss_uncond": -4.233682632446289, "incorrect_loss_uncond": -3.277806282043457}, "model_output": [{"sum_logits": -18.96617889404297, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.637561798095703, "logits_per_token": -6.322059631347656, "logits_per_char": -1.580514907836914, "num_chars": 12}, {"sum_logits": -12.623144149780273, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -17.529441833496094, "logits_per_token": -6.311572074890137, "logits_per_char": -1.1475585590709338, "num_chars": 11}, {"sum_logits": -16.14126968383789, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.37495231628418, "logits_per_token": -5.380423227945964, "logits_per_char": -1.0760846455891928, "num_chars": 15}, {"sum_logits": -7.726409912109375, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.535873413085938, "logits_per_token": -7.726409912109375, "logits_per_char": -0.7726409912109375, "num_chars": 10}, {"sum_logits": -29.53938865661621, "num_tokens": 4, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -30.263469696044922, "logits_per_token": -7.384847164154053, "logits_per_char": -1.0940514317265264, "num_chars": 27}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 969, "native_id": "6915dfdefe3b1cd5fd8886c8bb84929a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.8375244140625, "incorrect_loss_raw": 11.242835998535156, "correct_loss_per_char": 0.319793701171875, "incorrect_loss_per_char": 1.0931720813115438, "correct_loss_per_token": 3.8375244140625, "incorrect_loss_per_token": 8.388890385627747, "correct_loss_uncond": -10.347129821777344, "incorrect_loss_uncond": -5.0849809646606445}, "model_output": [{"sum_logits": -3.8375244140625, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.184654235839844, "logits_per_token": -3.8375244140625, "logits_per_char": -0.319793701171875, "num_chars": 12}, {"sum_logits": -10.61137866973877, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.330135345458984, "logits_per_token": -10.61137866973877, "logits_per_char": -1.51591123853411, "num_chars": 7}, {"sum_logits": -11.285269737243652, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -19.125717163085938, "logits_per_token": -5.642634868621826, "logits_per_char": -0.8060906955174038, "num_chars": 14}, {"sum_logits": -11.546295166015625, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.32412338256836, "logits_per_token": -5.7731475830078125, "logits_per_char": -0.7697530110677083, "num_chars": 15}, {"sum_logits": -11.528400421142578, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.531291961669922, "logits_per_token": -11.528400421142578, "logits_per_char": -1.2809333801269531, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 970, "native_id": "ec224c1dbfb569cce7ec317fe987ae68", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.469413757324219, "incorrect_loss_raw": 10.699596762657166, "correct_loss_per_char": 1.246941375732422, "incorrect_loss_per_char": 1.091219146220715, "correct_loss_per_token": 6.234706878662109, "incorrect_loss_per_token": 5.8296836614608765, "correct_loss_uncond": -5.964393615722656, "incorrect_loss_uncond": -6.452795147895813}, "model_output": [{"sum_logits": -17.125457763671875, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.547882080078125, "logits_per_token": -8.562728881835938, "logits_per_char": -1.7125457763671874, "num_chars": 10}, {"sum_logits": -12.469413757324219, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.433807373046875, "logits_per_token": -6.234706878662109, "logits_per_char": -1.246941375732422, "num_chars": 10}, {"sum_logits": -8.744363784790039, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.23485565185547, "logits_per_token": -4.3721818923950195, "logits_per_char": -0.7949421622536399, "num_chars": 11}, {"sum_logits": -13.089483261108398, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.25528335571289, "logits_per_token": -6.544741630554199, "logits_per_char": -1.3089483261108399, "num_chars": 10}, {"sum_logits": -3.8390822410583496, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.57154655456543, "logits_per_token": -3.8390822410583496, "logits_per_char": -0.5484403201511928, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 971, "native_id": "0cba8ddda21e29c8c53482e131d741cd", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.913640022277832, "incorrect_loss_raw": 12.8851797580719, "correct_loss_per_char": 0.909470001856486, "incorrect_loss_per_char": 1.2757952796088325, "correct_loss_per_token": 5.456820011138916, "incorrect_loss_per_token": 8.980133771896362, "correct_loss_uncond": -9.328001976013184, "incorrect_loss_uncond": -2.6822471618652344}, "model_output": [{"sum_logits": -10.962526321411133, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.214130401611328, "logits_per_token": -3.6541754404703775, "logits_per_char": -1.2180584801567926, "num_chars": 9}, {"sum_logits": -10.913640022277832, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.241641998291016, "logits_per_token": -5.456820011138916, "logits_per_char": -0.909470001856486, "num_chars": 12}, {"sum_logits": -12.46774959564209, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.83594512939453, "logits_per_token": -4.155916531880696, "logits_per_char": -0.6926527553134494, "num_chars": 18}, {"sum_logits": -12.85342025756836, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -12.85342025756836, "logits_per_char": -1.285342025756836, "num_chars": 10}, {"sum_logits": -15.257022857666016, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.485124588012695, "logits_per_token": -15.257022857666016, "logits_per_char": -1.907127857208252, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 972, "native_id": "e65559cd9f5d96b577caeb78d9033502", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.516068935394287, "incorrect_loss_raw": 13.880987644195557, "correct_loss_per_char": 0.45160689353942873, "incorrect_loss_per_char": 1.1091349569021487, "correct_loss_per_token": 4.516068935394287, "incorrect_loss_per_token": 8.422183752059937, "correct_loss_uncond": -8.572556972503662, "incorrect_loss_uncond": -3.263115406036377}, "model_output": [{"sum_logits": -13.065990447998047, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.563986778259277, "logits_per_token": -13.065990447998047, "logits_per_char": -1.0888325373331706, "num_chars": 12}, {"sum_logits": -4.516068935394287, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.08862590789795, "logits_per_token": -4.516068935394287, "logits_per_char": -0.45160689353942873, "num_chars": 10}, {"sum_logits": -7.233089447021484, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.892474174499512, "logits_per_token": -7.233089447021484, "logits_per_char": -1.0332984924316406, "num_chars": 7}, {"sum_logits": -9.888189315795898, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.46664047241211, "logits_per_token": -4.944094657897949, "logits_per_char": -0.8240157763163248, "num_chars": 12}, {"sum_logits": -25.336681365966797, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.653310775756836, "logits_per_token": -8.445560455322266, "logits_per_char": -1.4903930215274586, "num_chars": 17}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 973, "native_id": "b8937a30f25093910c040f4e63e1d352", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.1114115715026855, "incorrect_loss_raw": 14.854820966720581, "correct_loss_per_char": 0.19446322321891785, "incorrect_loss_per_char": 1.4348453954780178, "correct_loss_per_token": 1.5557057857513428, "incorrect_loss_per_token": 7.771352370580037, "correct_loss_uncond": -20.72558832168579, "incorrect_loss_uncond": -3.4006388187408447}, "model_output": [{"sum_logits": -14.445621490478516, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.336376190185547, "logits_per_token": -7.222810745239258, "logits_per_char": -1.3132383173162288, "num_chars": 11}, {"sum_logits": -19.188980102539062, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.405588150024414, "logits_per_token": -9.594490051269531, "logits_per_char": -1.4760753925030048, "num_chars": 13}, {"sum_logits": -3.1114115715026855, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -23.836999893188477, "logits_per_token": -1.5557057857513428, "logits_per_char": -0.19446322321891785, "num_chars": 16}, {"sum_logits": -8.509821891784668, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -10.994718551635742, "logits_per_token": -8.509821891784668, "logits_per_char": -2.127455472946167, "num_chars": 4}, {"sum_logits": -17.274860382080078, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.28515625, "logits_per_token": -5.758286794026692, "logits_per_char": -0.8226123991466704, "num_chars": 21}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 974, "native_id": "aabe8eb218468fc63b6c9aa6d428c951", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.619819641113281, "incorrect_loss_raw": 9.600696086883545, "correct_loss_per_char": 1.4524774551391602, "incorrect_loss_per_char": 1.3403282290413265, "correct_loss_per_token": 5.809909820556641, "incorrect_loss_per_token": 8.127308368682861, "correct_loss_uncond": -3.7958221435546875, "incorrect_loss_uncond": -4.906806468963623}, "model_output": [{"sum_logits": -6.578591346740723, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.260344505310059, "logits_per_token": -6.578591346740723, "logits_per_char": -1.3157182693481446, "num_chars": 5}, {"sum_logits": -11.619819641113281, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.415641784667969, "logits_per_token": -5.809909820556641, "logits_per_char": -1.4524774551391602, "num_chars": 8}, {"sum_logits": -11.787101745605469, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.719802856445312, "logits_per_token": -5.893550872802734, "logits_per_char": -0.9822584788004557, "num_chars": 12}, {"sum_logits": -11.599519729614258, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.554266929626465, "logits_per_token": -11.599519729614258, "logits_per_char": -1.657074247087751, "num_chars": 7}, {"sum_logits": -8.43757152557373, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.495595932006836, "logits_per_token": -8.43757152557373, "logits_per_char": -1.406261920928955, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 975, "native_id": "43ba9669564217f2f909f33acbedaf95", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.88302993774414, "incorrect_loss_raw": 14.301896333694458, "correct_loss_per_char": 0.8487878526960101, "incorrect_loss_per_char": 1.7328495320045587, "correct_loss_per_token": 3.961009979248047, "incorrect_loss_per_token": 14.301896333694458, "correct_loss_uncond": -6.589147567749023, "incorrect_loss_uncond": -0.2580697536468506}, "model_output": [{"sum_logits": -12.631352424621582, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.305567741394043, "logits_per_token": -12.631352424621582, "logits_per_char": -2.105225404103597, "num_chars": 6}, {"sum_logits": -11.88302993774414, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.472177505493164, "logits_per_token": -3.961009979248047, "logits_per_char": -0.8487878526960101, "num_chars": 14}, {"sum_logits": -14.225863456726074, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.54829216003418, "logits_per_token": -14.225863456726074, "logits_per_char": -1.7782329320907593, "num_chars": 8}, {"sum_logits": -14.296357154846191, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.339823722839355, "logits_per_token": -14.296357154846191, "logits_per_char": -1.5884841283162434, "num_chars": 9}, {"sum_logits": -16.054012298583984, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.046180725097656, "logits_per_token": -16.054012298583984, "logits_per_char": -1.459455663507635, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 976, "native_id": "2b9b625c788584b8d41f1a74d740e126", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.190507888793945, "incorrect_loss_raw": 11.96716833114624, "correct_loss_per_char": 0.798945320977105, "incorrect_loss_per_char": 1.221454131879795, "correct_loss_per_token": 7.190507888793945, "incorrect_loss_per_token": 7.331257581710815, "correct_loss_uncond": -6.683239936828613, "incorrect_loss_uncond": -4.044760227203369}, "model_output": [{"sum_logits": -12.10031509399414, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -21.08599090576172, "logits_per_token": -6.05015754699707, "logits_per_char": -1.344479454888238, "num_chars": 9}, {"sum_logits": -10.781387329101562, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.564719200134277, "logits_per_token": -10.781387329101562, "logits_per_char": -1.5401981898716517, "num_chars": 7}, {"sum_logits": -12.652956008911133, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.169787406921387, "logits_per_token": -6.326478004455566, "logits_per_char": -0.9733043083777795, "num_chars": 13}, {"sum_logits": -7.190507888793945, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.873747825622559, "logits_per_token": -7.190507888793945, "logits_per_char": -0.798945320977105, "num_chars": 9}, {"sum_logits": -12.334014892578125, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.227216720581055, "logits_per_token": -6.1670074462890625, "logits_per_char": -1.0278345743815105, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 977, "native_id": "eb6807290df71b040e2c7bcc5d11fdea", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.03082275390625, "incorrect_loss_raw": 10.355419874191284, "correct_loss_per_char": 0.5515411376953125, "incorrect_loss_per_char": 1.3361068872305062, "correct_loss_per_token": 5.515411376953125, "incorrect_loss_per_token": 8.990183353424072, "correct_loss_uncond": -11.471929550170898, "incorrect_loss_uncond": -3.4887826442718506}, "model_output": [{"sum_logits": -11.03082275390625, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -22.50275230407715, "logits_per_token": -5.515411376953125, "logits_per_char": -0.5515411376953125, "num_chars": 20}, {"sum_logits": -8.65593433380127, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.023500442504883, "logits_per_token": -8.65593433380127, "logits_per_char": -1.4426557223002117, "num_chars": 6}, {"sum_logits": -13.896381378173828, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.023554801940918, "logits_per_token": -13.896381378173828, "logits_per_char": -1.7370476722717285, "num_chars": 8}, {"sum_logits": -10.921892166137695, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.047258377075195, "logits_per_token": -5.460946083068848, "logits_per_char": -0.8401455512413611, "num_chars": 13}, {"sum_logits": -7.947471618652344, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.282496452331543, "logits_per_token": -7.947471618652344, "logits_per_char": -1.3245786031087239, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 978, "native_id": "f06852fb4bb2764dc208a991d037f211", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.157819747924805, "incorrect_loss_raw": 11.902710318565369, "correct_loss_per_char": 3.539454936981201, "incorrect_loss_per_char": 0.7617896942081853, "correct_loss_per_token": 7.078909873962402, "incorrect_loss_per_token": 4.348643978436788, "correct_loss_uncond": 0.39421653747558594, "incorrect_loss_uncond": -7.9480961561203}, "model_output": [{"sum_logits": -7.166642665863037, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.82883071899414, "logits_per_token": -2.388880888621012, "logits_per_char": -0.35833213329315183, "num_chars": 20}, {"sum_logits": -8.281281471252441, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.789663314819336, "logits_per_token": -2.760427157084147, "logits_per_char": -0.4871342041913201, "num_chars": 17}, {"sum_logits": -14.157819747924805, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.763603210449219, "logits_per_token": -7.078909873962402, "logits_per_char": -3.539454936981201, "num_chars": 4}, {"sum_logits": -26.556865692138672, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -27.582622528076172, "logits_per_token": -6.639216423034668, "logits_per_char": -1.7704577128092447, "num_chars": 15}, {"sum_logits": -5.606051445007324, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.202109336853027, "logits_per_token": -5.606051445007324, "logits_per_char": -0.43123472653902495, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 979, "native_id": "5efadabaf61b5174916e3ab659bcd283", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.879437446594238, "incorrect_loss_raw": 8.083395004272461, "correct_loss_per_char": 1.2088263829549153, "incorrect_loss_per_char": 0.8273498578075036, "correct_loss_per_token": 5.439718723297119, "incorrect_loss_per_token": 5.861638784408569, "correct_loss_uncond": -8.660529136657715, "incorrect_loss_uncond": -7.422417640686035}, "model_output": [{"sum_logits": -9.742819786071777, "num_tokens": 2, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -17.823257446289062, "logits_per_token": -4.871409893035889, "logits_per_char": -1.2178524732589722, "num_chars": 8}, {"sum_logits": -4.941976547241211, "num_tokens": 1, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -13.953621864318848, "logits_per_token": -4.941976547241211, "logits_per_char": -0.44927059520374646, "num_chars": 11}, {"sum_logits": -9.6175537109375, "num_tokens": 1, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -12.893023490905762, "logits_per_token": -9.6175537109375, "logits_per_char": -1.0686170789930556, "num_chars": 9}, {"sum_logits": -8.031229972839355, "num_tokens": 2, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -17.353347778320312, "logits_per_token": -4.015614986419678, "logits_per_char": -0.5736592837742397, "num_chars": 14}, {"sum_logits": -10.879437446594238, "num_tokens": 2, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -19.539966583251953, "logits_per_token": -5.439718723297119, "logits_per_char": -1.2088263829549153, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 980, "native_id": "e9d4c747018ff81b8c0aefb5abc3c539", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.380456924438477, "incorrect_loss_raw": 12.759446859359741, "correct_loss_per_char": 0.6700326374598912, "incorrect_loss_per_char": 0.9075604298758129, "correct_loss_per_token": 4.690228462219238, "incorrect_loss_per_token": 5.212778401374817, "correct_loss_uncond": -12.404546737670898, "incorrect_loss_uncond": -7.251906633377075}, "model_output": [{"sum_logits": -9.380456924438477, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.785003662109375, "logits_per_token": -4.690228462219238, "logits_per_char": -0.6700326374598912, "num_chars": 14}, {"sum_logits": -8.709850311279297, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.125892639160156, "logits_per_token": -4.354925155639648, "logits_per_char": -0.48388057284884983, "num_chars": 18}, {"sum_logits": -10.483269691467285, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.00286102294922, "logits_per_token": -5.241634845733643, "logits_per_char": -1.0483269691467285, "num_chars": 10}, {"sum_logits": -15.559267044067383, "num_tokens": 5, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.512964248657227, "logits_per_token": -3.1118534088134764, "logits_per_char": -0.740917478288923, "num_chars": 21}, {"sum_logits": -16.285400390625, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.403696060180664, "logits_per_token": -8.1427001953125, "logits_per_char": -1.35711669921875, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 981, "native_id": "30a8cfd186f1aae5acd425a52d058863", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.010817527770996, "incorrect_loss_raw": 12.094839334487915, "correct_loss_per_char": 1.1684695879618328, "incorrect_loss_per_char": 1.1857995203563143, "correct_loss_per_token": 7.010817527770996, "incorrect_loss_per_token": 8.337816834449768, "correct_loss_uncond": -7.272580146789551, "incorrect_loss_uncond": -3.2212090492248535}, "model_output": [{"sum_logits": -14.363044738769531, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.62946891784668, "logits_per_token": -7.181522369384766, "logits_per_char": -1.436304473876953, "num_chars": 10}, {"sum_logits": -7.010817527770996, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.283397674560547, "logits_per_token": -7.010817527770996, "logits_per_char": -1.1684695879618328, "num_chars": 6}, {"sum_logits": -10.071632385253906, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.030601501464844, "logits_per_token": -10.071632385253906, "logits_per_char": -1.0071632385253906, "num_chars": 10}, {"sum_logits": -8.251544952392578, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -8.251544952392578, "logits_per_char": -1.1787921360560827, "num_chars": 7}, {"sum_logits": -15.693135261535645, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.041133880615234, "logits_per_token": -7.846567630767822, "logits_per_char": -1.1209382329668318, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 982, "native_id": "9e7805871c8a276300a89fe910a90949", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.470216274261475, "incorrect_loss_raw": 11.91250205039978, "correct_loss_per_char": 0.45585135618845624, "incorrect_loss_per_char": 1.3557997649366207, "correct_loss_per_token": 2.7351081371307373, "incorrect_loss_per_token": 8.066086053848267, "correct_loss_uncond": -9.790018558502197, "incorrect_loss_uncond": -2.097623825073242}, "model_output": [{"sum_logits": -5.470216274261475, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.260234832763672, "logits_per_token": -2.7351081371307373, "logits_per_char": -0.45585135618845624, "num_chars": 12}, {"sum_logits": -15.645429611206055, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.94527816772461, "logits_per_token": -7.822714805603027, "logits_per_char": -1.422311782836914, "num_chars": 11}, {"sum_logits": -15.125898361206055, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.37967586517334, "logits_per_token": -7.562949180603027, "logits_per_char": -1.5125898361206054, "num_chars": 10}, {"sum_logits": -5.995766639709473, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -8.624171257019043, "logits_per_token": -5.995766639709473, "logits_per_char": -1.4989416599273682, "num_chars": 4}, {"sum_logits": -10.882913589477539, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.091378211975098, "logits_per_token": -10.882913589477539, "logits_per_char": -0.9893557808615945, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 983, "native_id": "047c2d8c65d297b39aa42821c1ca76a9", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 13.741283416748047, "incorrect_loss_raw": 17.0752112865448, "correct_loss_per_char": 0.6870641708374023, "incorrect_loss_per_char": 1.1511519719392826, "correct_loss_per_token": 4.580427805582683, "incorrect_loss_per_token": 5.7392235199610395, "correct_loss_uncond": -16.191314697265625, "incorrect_loss_uncond": -6.434255361557007}, "model_output": [{"sum_logits": -16.018667221069336, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -24.27492904663086, "logits_per_token": -8.009333610534668, "logits_per_char": -1.3348889350891113, "num_chars": 12}, {"sum_logits": -13.741283416748047, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -29.932598114013672, "logits_per_token": -4.580427805582683, "logits_per_char": -0.6870641708374023, "num_chars": 20}, {"sum_logits": -8.984432220458984, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.86726951599121, "logits_per_token": -4.492216110229492, "logits_per_char": -1.123054027557373, "num_chars": 8}, {"sum_logits": -9.71716022491455, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.104385375976562, "logits_per_token": -4.858580112457275, "logits_per_char": -0.7474738634549655, "num_chars": 13}, {"sum_logits": -33.58058547973633, "num_tokens": 6, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -34.791282653808594, "logits_per_token": -5.596764246622722, "logits_per_char": -1.3991910616556804, "num_chars": 24}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 984, "native_id": "0bed77da54b6c54facd0ee6614aad72e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 14.434764862060547, "incorrect_loss_raw": 10.640773057937622, "correct_loss_per_char": 1.0310546330043249, "incorrect_loss_per_char": 1.2895601954725053, "correct_loss_per_token": 4.811588287353516, "incorrect_loss_per_token": 9.018119931221008, "correct_loss_uncond": -7.213321685791016, "incorrect_loss_uncond": -4.871269464492798}, "model_output": [{"sum_logits": -14.434764862060547, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -21.648086547851562, "logits_per_token": -4.811588287353516, "logits_per_char": -1.0310546330043249, "num_chars": 14}, {"sum_logits": -8.399935722351074, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -14.098298072814941, "logits_per_token": -8.399935722351074, "logits_per_char": -1.0499919652938843, "num_chars": 8}, {"sum_logits": -11.343141555786133, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -15.317315101623535, "logits_per_token": -11.343141555786133, "logits_per_char": -1.2603490617540147, "num_chars": 9}, {"sum_logits": -12.98122501373291, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -17.63345718383789, "logits_per_token": -6.490612506866455, "logits_per_char": -1.4423583348592122, "num_chars": 9}, {"sum_logits": -9.838789939880371, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -14.999099731445312, "logits_per_token": -9.838789939880371, "logits_per_char": -1.4055414199829102, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 985, "native_id": "32e2adee67aace0a98c830fb39463015", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.242713451385498, "incorrect_loss_raw": 11.897514820098877, "correct_loss_per_char": 0.24919038348727757, "incorrect_loss_per_char": 1.5568396024029663, "correct_loss_per_token": 1.121356725692749, "incorrect_loss_per_token": 8.97221565246582, "correct_loss_uncond": -15.121984958648682, "incorrect_loss_uncond": -2.8977255821228027}, "model_output": [{"sum_logits": -2.9334640502929688, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.473599433898926, "logits_per_token": -2.9334640502929688, "logits_per_char": -0.26667855002663354, "num_chars": 11}, {"sum_logits": -11.872352600097656, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.663012504577637, "logits_per_token": -11.872352600097656, "logits_per_char": -1.484044075012207, "num_chars": 8}, {"sum_logits": -9.38184928894043, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -9.38184928894043, "logits_per_char": -1.8763698577880858, "num_chars": 5}, {"sum_logits": -2.242713451385498, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.36469841003418, "logits_per_token": -1.121356725692749, "logits_per_char": -0.24919038348727757, "num_chars": 9}, {"sum_logits": -23.402393341064453, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.789621353149414, "logits_per_token": -11.701196670532227, "logits_per_char": -2.6002659267849393, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 986, "native_id": "8272f08792b873885f93d4c148e307e5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 9.15167236328125, "incorrect_loss_raw": 14.449981927871704, "correct_loss_per_char": 0.915167236328125, "incorrect_loss_per_char": 2.0478768255029403, "correct_loss_per_token": 3.0505574544270835, "incorrect_loss_per_token": 8.152910590171814, "correct_loss_uncond": -7.186012268066406, "incorrect_loss_uncond": -2.3473455905914307}, "model_output": [{"sum_logits": -14.045985221862793, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.046030044555664, "logits_per_token": -7.0229926109313965, "logits_per_char": -1.7557481527328491, "num_chars": 8}, {"sum_logits": -9.15167236328125, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.337684631347656, "logits_per_token": -3.0505574544270835, "logits_per_char": -0.915167236328125, "num_chars": 10}, {"sum_logits": -22.946815490722656, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.32434844970703, "logits_per_token": -11.473407745361328, "logits_per_char": -3.2781164986746654, "num_chars": 7}, {"sum_logits": -10.037827491760254, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.67752456665039, "logits_per_token": -3.345942497253418, "logits_per_char": -1.0037827491760254, "num_chars": 10}, {"sum_logits": -10.769299507141113, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.141407012939453, "logits_per_token": -10.769299507141113, "logits_per_char": -2.1538599014282225, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 987, "native_id": "bc05bc6b4df7a3d25a361515fe8912ad", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.647318840026855, "incorrect_loss_raw": 12.412957429885864, "correct_loss_per_char": 0.9647318840026855, "incorrect_loss_per_char": 1.27226722240448, "correct_loss_per_token": 3.2157729466756186, "incorrect_loss_per_token": 5.7614950736363735, "correct_loss_uncond": -5.641225814819336, "incorrect_loss_uncond": -3.7529778480529785}, "model_output": [{"sum_logits": -10.97551441192627, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.39467716217041, "logits_per_token": -5.487757205963135, "logits_per_char": -1.219501601325141, "num_chars": 9}, {"sum_logits": -10.679607391357422, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.835020065307617, "logits_per_token": -3.559869130452474, "logits_per_char": -1.186623043484158, "num_chars": 9}, {"sum_logits": -12.595869064331055, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -6.297934532165527, "logits_per_char": -1.3995410071478949, "num_chars": 9}, {"sum_logits": -9.647318840026855, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.288544654846191, "logits_per_token": -3.2157729466756186, "logits_per_char": -0.9647318840026855, "num_chars": 10}, {"sum_logits": -15.400838851928711, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.155418395996094, "logits_per_token": -7.7004194259643555, "logits_per_char": -1.2834032376607258, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 988, "native_id": "b893a6e7a2b172bd71f03c9dbee4f960", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.583981990814209, "incorrect_loss_raw": 9.039279460906982, "correct_loss_per_char": 0.5076347264376554, "incorrect_loss_per_char": 1.1335773851190294, "correct_loss_per_token": 5.583981990814209, "incorrect_loss_per_token": 7.920348525047302, "correct_loss_uncond": -9.34130334854126, "incorrect_loss_uncond": -5.600820064544678}, "model_output": [{"sum_logits": -8.951447486877441, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.718940734863281, "logits_per_token": -4.475723743438721, "logits_per_char": -1.1189309358596802, "num_chars": 8}, {"sum_logits": -11.433370590209961, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.34337329864502, "logits_per_token": -11.433370590209961, "logits_per_char": -1.4291713237762451, "num_chars": 8}, {"sum_logits": -7.362480163574219, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.977478981018066, "logits_per_token": -7.362480163574219, "logits_per_char": -1.0517828805106026, "num_chars": 7}, {"sum_logits": -5.583981990814209, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.925285339355469, "logits_per_token": -5.583981990814209, "logits_per_char": -0.5076347264376554, "num_chars": 11}, {"sum_logits": -8.409819602966309, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.520605087280273, "logits_per_token": -8.409819602966309, "logits_per_char": -0.9344244003295898, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 989, "native_id": "cf8e30dd6956d03e3f0f0397112a8696", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.874343872070312, "incorrect_loss_raw": 16.12219214439392, "correct_loss_per_char": 0.7395286560058594, "incorrect_loss_per_char": 1.2927196017333438, "correct_loss_per_token": 4.437171936035156, "incorrect_loss_per_token": 8.732826709747314, "correct_loss_uncond": -8.213550567626953, "incorrect_loss_uncond": -2.863807439804077}, "model_output": [{"sum_logits": -8.874343872070312, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.087894439697266, "logits_per_token": -4.437171936035156, "logits_per_char": -0.7395286560058594, "num_chars": 12}, {"sum_logits": -20.96220588684082, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.74852180480957, "logits_per_token": -10.48110294342041, "logits_per_char": -1.7468504905700684, "num_chars": 12}, {"sum_logits": -11.438080787658691, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.755424499511719, "logits_per_token": -11.438080787658691, "logits_per_char": -1.4297600984573364, "num_chars": 8}, {"sum_logits": -18.192707061767578, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.23810577392578, "logits_per_token": -6.064235687255859, "logits_per_char": -1.2994790758405412, "num_chars": 14}, {"sum_logits": -13.895774841308594, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.201946258544922, "logits_per_token": -6.947887420654297, "logits_per_char": -0.6947887420654297, "num_chars": 20}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 990, "native_id": "159d50e325b59c6d29ec371500e173b4", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.533177375793457, "incorrect_loss_raw": 12.16534686088562, "correct_loss_per_char": 1.6332943439483643, "incorrect_loss_per_char": 1.2568854044645261, "correct_loss_per_token": 6.533177375793457, "incorrect_loss_per_token": 7.0877416133880615, "correct_loss_uncond": -5.323800086975098, "incorrect_loss_uncond": -3.460773229598999}, "model_output": [{"sum_logits": -11.443790435791016, "num_tokens": 4, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.881448745727539, "logits_per_token": -2.860947608947754, "logits_per_char": -0.5721895217895507, "num_chars": 20}, {"sum_logits": -7.969158172607422, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.959211349487305, "logits_per_token": -3.984579086303711, "logits_per_char": -0.6130121671236478, "num_chars": 13}, {"sum_logits": -13.76244068145752, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.704168319702148, "logits_per_token": -13.76244068145752, "logits_per_char": -2.2937401135762534, "num_chars": 6}, {"sum_logits": -15.485998153686523, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.959651947021484, "logits_per_token": -7.742999076843262, "logits_per_char": -1.5485998153686524, "num_chars": 10}, {"sum_logits": -6.533177375793457, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.856977462768555, "logits_per_token": -6.533177375793457, "logits_per_char": -1.6332943439483643, "num_chars": 4}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 991, "native_id": "17eafc807b198236faf06a66f4c05313", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.8362511992454529, "incorrect_loss_raw": 14.59260630607605, "correct_loss_per_char": 0.0643270153265733, "incorrect_loss_per_char": 2.0601206018811182, "correct_loss_per_token": 0.41812559962272644, "incorrect_loss_per_token": 10.558530251185099, "correct_loss_uncond": -15.334778845310211, "incorrect_loss_uncond": -0.9683501720428467}, "model_output": [{"sum_logits": -14.321174621582031, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.596142768859863, "logits_per_token": -14.321174621582031, "logits_per_char": -2.8642349243164062, "num_chars": 5}, {"sum_logits": -5.074912071228027, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.235330581665039, "logits_per_token": -5.074912071228027, "logits_per_char": -0.8458186785380045, "num_chars": 6}, {"sum_logits": -0.8362511992454529, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": true, "sum_logits_uncond": -16.171030044555664, "logits_per_token": -0.41812559962272644, "logits_per_char": -0.0643270153265733, "num_chars": 13}, {"sum_logits": -24.204456329345703, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.434873580932617, "logits_per_token": -8.0681521097819, "logits_per_char": -2.4204456329345705, "num_chars": 10}, {"sum_logits": -14.769882202148438, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.977478981018066, "logits_per_token": -14.769882202148438, "logits_per_char": -2.109983171735491, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 992, "native_id": "24eebfa678112100803da16dde148b2d", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.496799468994141, "incorrect_loss_raw": 7.083312034606934, "correct_loss_per_char": 0.6423999241420201, "incorrect_loss_per_char": 1.284182554199582, "correct_loss_per_token": 4.496799468994141, "incorrect_loss_per_token": 6.379925966262817, "correct_loss_uncond": -11.13543701171875, "incorrect_loss_uncond": -6.652170896530151}, "model_output": [{"sum_logits": -5.62708854675293, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.974644660949707, "logits_per_token": -2.813544273376465, "logits_per_char": -0.8038697923932757, "num_chars": 7}, {"sum_logits": -4.925495147705078, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.691120147705078, "logits_per_token": -4.925495147705078, "logits_per_char": -0.8209158579508463, "num_chars": 6}, {"sum_logits": -9.070596694946289, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.094871520996094, "logits_per_token": -9.070596694946289, "logits_per_char": -2.2676491737365723, "num_chars": 4}, {"sum_logits": -8.710067749023438, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.181295394897461, "logits_per_token": -8.710067749023438, "logits_per_char": -1.244295392717634, "num_chars": 7}, {"sum_logits": -4.496799468994141, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.63223648071289, "logits_per_token": -4.496799468994141, "logits_per_char": -0.6423999241420201, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 993, "native_id": "ec882fc3a9bfaeae2a26fe31c2ef2c07", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 1.5019512176513672, "incorrect_loss_raw": 11.39293646812439, "correct_loss_per_char": 0.21456445966448104, "incorrect_loss_per_char": 0.8760918394190256, "correct_loss_per_token": 1.5019512176513672, "incorrect_loss_per_token": 6.214685042699179, "correct_loss_uncond": -11.06103801727295, "incorrect_loss_uncond": -6.829276084899902}, "model_output": [{"sum_logits": -8.96229362487793, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.074176788330078, "logits_per_token": -2.987431208292643, "logits_per_char": -0.5974862416585286, "num_chars": 15}, {"sum_logits": -1.5019512176513672, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -1.5019512176513672, "logits_per_char": -0.21456445966448104, "num_chars": 7}, {"sum_logits": -14.230555534362793, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.978763580322266, "logits_per_token": -4.743518511454265, "logits_per_char": -0.8370915020213407, "num_chars": 17}, {"sum_logits": -10.502212524414062, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.801706314086914, "logits_per_token": -5.251106262207031, "logits_per_char": -0.7501580374581474, "num_chars": 14}, {"sum_logits": -11.876684188842773, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.03420352935791, "logits_per_token": -11.876684188842773, "logits_per_char": -1.319631576538086, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 994, "native_id": "0a006d16d9042e0c170935e5fbf7f9af", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.168941497802734, "incorrect_loss_raw": 8.878828048706055, "correct_loss_per_char": 0.5211176872253418, "incorrect_loss_per_char": 1.127230379316542, "correct_loss_per_token": 4.168941497802734, "incorrect_loss_per_token": 7.612582683563232, "correct_loss_uncond": -10.748686790466309, "incorrect_loss_uncond": -6.374743461608887}, "model_output": [{"sum_logits": -9.425216674804688, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.115584373474121, "logits_per_token": -9.425216674804688, "logits_per_char": -1.0472462972005208, "num_chars": 9}, {"sum_logits": -10.129962921142578, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.728057861328125, "logits_per_token": -5.064981460571289, "logits_per_char": -1.688327153523763, "num_chars": 6}, {"sum_logits": -9.23332405090332, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.447200775146484, "logits_per_token": -9.23332405090332, "logits_per_char": -1.0259248945448134, "num_chars": 9}, {"sum_logits": -6.726808547973633, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.723443031311035, "logits_per_token": -6.726808547973633, "logits_per_char": -0.7474231719970703, "num_chars": 9}, {"sum_logits": -4.168941497802734, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.917628288269043, "logits_per_token": -4.168941497802734, "logits_per_char": -0.5211176872253418, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 995, "native_id": "d33a81660058e570a18fb2eafa284a78", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.036788940429688, "incorrect_loss_raw": 11.749516487121582, "correct_loss_per_char": 0.7169134957449776, "incorrect_loss_per_char": 1.5599262775624816, "correct_loss_per_token": 5.018394470214844, "incorrect_loss_per_token": 10.160260558128357, "correct_loss_uncond": -8.830078125, "incorrect_loss_uncond": -3.2085180282592773}, "model_output": [{"sum_logits": -10.036788940429688, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.866867065429688, "logits_per_token": -5.018394470214844, "logits_per_char": -0.7169134957449776, "num_chars": 14}, {"sum_logits": -10.956000328063965, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.593781471252441, "logits_per_token": -10.956000328063965, "logits_per_char": -1.217333369784885, "num_chars": 9}, {"sum_logits": -13.983451843261719, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.188199996948242, "logits_per_token": -13.983451843261719, "logits_per_char": -1.997635977608817, "num_chars": 7}, {"sum_logits": -9.344566345214844, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.745972633361816, "logits_per_token": -9.344566345214844, "logits_per_char": -1.8689132690429688, "num_chars": 5}, {"sum_logits": -12.7140474319458, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.304183959960938, "logits_per_token": -6.3570237159729, "logits_per_char": -1.1558224938132546, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 996, "native_id": "1e09c3136a743b862e783700b7667028", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.891131401062012, "incorrect_loss_raw": 6.952783465385437, "correct_loss_per_char": 0.4446483091874556, "incorrect_loss_per_char": 0.9991690662172106, "correct_loss_per_token": 4.891131401062012, "incorrect_loss_per_token": 6.952783465385437, "correct_loss_uncond": -10.315669059753418, "incorrect_loss_uncond": -6.440060257911682}, "model_output": [{"sum_logits": -4.621897220611572, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.396528244018555, "logits_per_token": -4.621897220611572, "logits_per_char": -0.9243794441223144, "num_chars": 5}, {"sum_logits": -4.855743408203125, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.863301277160645, "logits_per_token": -4.855743408203125, "logits_per_char": -0.5395270453559028, "num_chars": 9}, {"sum_logits": -11.339288711547852, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.641867637634277, "logits_per_token": -11.339288711547852, "logits_per_char": -1.1339288711547852, "num_chars": 10}, {"sum_logits": -6.994204521179199, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.669677734375, "logits_per_token": -6.994204521179199, "logits_per_char": -1.3988409042358398, "num_chars": 5}, {"sum_logits": -4.891131401062012, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.20680046081543, "logits_per_token": -4.891131401062012, "logits_per_char": -0.4446483091874556, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 997, "native_id": "5e851c47682bdf79ec7c139ecf124c9a", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.005266189575195, "incorrect_loss_raw": 8.735061407089233, "correct_loss_per_char": 0.7005266189575196, "incorrect_loss_per_char": 1.015903635819753, "correct_loss_per_token": 7.005266189575195, "incorrect_loss_per_token": 6.276405215263367, "correct_loss_uncond": -5.628293037414551, "incorrect_loss_uncond": -7.004946231842041}, "model_output": [{"sum_logits": -12.657791137695312, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -18.280366897583008, "logits_per_token": -6.328895568847656, "logits_per_char": -1.2657791137695313, "num_chars": 10}, {"sum_logits": -7.958850860595703, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -14.434986114501953, "logits_per_token": -7.958850860595703, "logits_per_char": -0.9948563575744629, "num_chars": 8}, {"sum_logits": -7.005266189575195, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -12.633559226989746, "logits_per_token": -7.005266189575195, "logits_per_char": -0.7005266189575196, "num_chars": 10}, {"sum_logits": -7.011458396911621, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -16.658267974853516, "logits_per_token": -3.5057291984558105, "logits_per_char": -0.5842881997426351, "num_chars": 12}, {"sum_logits": -7.312145233154297, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -13.586409568786621, "logits_per_token": -7.312145233154297, "logits_per_char": -1.2186908721923828, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 998, "native_id": "b148f18fb8b5a504b67078ef6ac29717", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.88634967803955, "incorrect_loss_raw": 15.454719543457031, "correct_loss_per_char": 0.8078499707308683, "incorrect_loss_per_char": 1.0815179862228095, "correct_loss_per_token": 4.443174839019775, "incorrect_loss_per_token": 7.029407262802124, "correct_loss_uncond": -9.086848258972168, "incorrect_loss_uncond": -4.232234001159668}, "model_output": [{"sum_logits": -16.7508602142334, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.608497619628906, "logits_per_token": -5.583620071411133, "logits_per_char": -0.9853447184843176, "num_chars": 17}, {"sum_logits": -16.929590225219727, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -22.139081954956055, "logits_per_token": -8.464795112609863, "logits_per_char": -0.9958582485423368, "num_chars": 17}, {"sum_logits": -15.303550720214844, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.78986930847168, "logits_per_token": -7.651775360107422, "logits_per_char": -1.275295893351237, "num_chars": 12}, {"sum_logits": -8.88634967803955, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.97319793701172, "logits_per_token": -4.443174839019775, "logits_per_char": -0.8078499707308683, "num_chars": 11}, {"sum_logits": -12.834877014160156, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.210365295410156, "logits_per_token": -6.417438507080078, "logits_per_char": -1.0695730845133464, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 999, "native_id": "b6bbe013995fdb5def3d504319af0791", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.7315531969070435, "incorrect_loss_raw": 11.166053831577301, "correct_loss_per_char": 0.2473647424152919, "incorrect_loss_per_char": 0.8498703872745699, "correct_loss_per_token": 1.7315531969070435, "incorrect_loss_per_token": 6.203083902597427, "correct_loss_uncond": -13.159945368766785, "incorrect_loss_uncond": -7.411156594753265}, "model_output": [{"sum_logits": -16.75253677368164, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -17.399694442749023, "logits_per_token": -8.37626838684082, "logits_per_char": -0.8817124617727179, "num_chars": 19}, {"sum_logits": -19.829971313476562, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -24.8195743560791, "logits_per_token": -9.914985656738281, "logits_per_char": -1.5253824087289662, "num_chars": 13}, {"sum_logits": -1.7315531969070435, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": true, "sum_logits_uncond": -14.891498565673828, "logits_per_token": -1.7315531969070435, "logits_per_char": -0.2473647424152919, "num_chars": 7}, {"sum_logits": -3.121251344680786, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.896087646484375, "logits_per_token": -1.560625672340393, "logits_per_char": -0.2837501222437078, "num_chars": 11}, {"sum_logits": -4.960455894470215, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -17.193485260009766, "logits_per_token": -4.960455894470215, "logits_per_char": -0.7086365563528878, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1000, "native_id": "0c2fa15a02d0b6ca6707e98fac7589e4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.694693088531494, "incorrect_loss_raw": 15.462012529373169, "correct_loss_per_char": 0.29972068887007863, "incorrect_loss_per_char": 1.0706343124783229, "correct_loss_per_token": 2.847346544265747, "incorrect_loss_per_token": 7.7310062646865845, "correct_loss_uncond": -11.448190212249756, "incorrect_loss_uncond": -3.833150625228882}, "model_output": [{"sum_logits": -5.694693088531494, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.14288330078125, "logits_per_token": -2.847346544265747, "logits_per_char": -0.29972068887007863, "num_chars": 19}, {"sum_logits": -14.481613159179688, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.20990753173828, "logits_per_token": -7.240806579589844, "logits_per_char": -1.4481613159179687, "num_chars": 10}, {"sum_logits": -13.570542335510254, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.389366149902344, "logits_per_token": -6.785271167755127, "logits_per_char": -0.7539190186394585, "num_chars": 18}, {"sum_logits": -13.191600799560547, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.55258560180664, "logits_per_token": -6.595800399780273, "logits_per_char": -1.0993000666300456, "num_chars": 12}, {"sum_logits": -20.604293823242188, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -25.028793334960938, "logits_per_token": -10.302146911621094, "logits_per_char": -0.9811568487258184, "num_chars": 21}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1001, "native_id": "a656e74a943f9e2698a25bbcfb4e96db", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.985671043395996, "incorrect_loss_raw": 9.170961141586304, "correct_loss_per_char": 0.7488059202829996, "incorrect_loss_per_char": 1.0648777091313921, "correct_loss_per_token": 8.985671043395996, "incorrect_loss_per_token": 9.170961141586304, "correct_loss_uncond": -5.940125465393066, "incorrect_loss_uncond": -4.624818801879883}, "model_output": [{"sum_logits": -10.355865478515625, "num_tokens": 1, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -10.355865478515625, "logits_per_char": -1.0355865478515625, "num_chars": 10}, {"sum_logits": -7.390376091003418, "num_tokens": 1, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -13.284228324890137, "logits_per_token": -7.390376091003418, "logits_per_char": -0.8211528990003798, "num_chars": 9}, {"sum_logits": -8.985671043395996, "num_tokens": 1, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -14.925796508789062, "logits_per_token": -8.985671043395996, "logits_per_char": -0.7488059202829996, "num_chars": 12}, {"sum_logits": -9.531914710998535, "num_tokens": 1, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -14.53042984008789, "logits_per_token": -9.531914710998535, "logits_per_char": -1.0591016345553927, "num_chars": 9}, {"sum_logits": -9.405688285827637, "num_tokens": 1, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -14.633954048156738, "logits_per_token": -9.405688285827637, "logits_per_char": -1.3436697551182337, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1002, "native_id": "8086f022f2d4a4888ae1f8c7e4541ab9", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.876201629638672, "incorrect_loss_raw": 9.783076405525208, "correct_loss_per_char": 0.554762601852417, "incorrect_loss_per_char": 1.2753130353414095, "correct_loss_per_token": 2.958733876546224, "incorrect_loss_per_token": 6.461662292480469, "correct_loss_uncond": -7.859685897827148, "incorrect_loss_uncond": -6.084232926368713}, "model_output": [{"sum_logits": -7.62269401550293, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.094871520996094, "logits_per_token": -7.62269401550293, "logits_per_char": -1.9056735038757324, "num_chars": 4}, {"sum_logits": -12.332772254943848, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.503150939941406, "logits_per_token": -6.166386127471924, "logits_per_char": -0.9486747888418344, "num_chars": 13}, {"sum_logits": -8.876201629638672, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.73588752746582, "logits_per_token": -2.958733876546224, "logits_per_char": -0.554762601852417, "num_chars": 16}, {"sum_logits": -4.93829870223999, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.685454368591309, "logits_per_token": -4.93829870223999, "logits_per_char": -0.823049783706665, "num_chars": 6}, {"sum_logits": -14.238540649414062, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.185760498046875, "logits_per_token": -7.119270324707031, "logits_per_char": -1.4238540649414062, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1003, "native_id": "5655a3002dd9a6b7dabede1dd26a5893", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.171273708343506, "incorrect_loss_raw": 6.653722524642944, "correct_loss_per_char": 0.6952122847239176, "incorrect_loss_per_char": 0.9896189002763658, "correct_loss_per_token": 4.171273708343506, "incorrect_loss_per_token": 6.653722524642944, "correct_loss_uncond": -8.58211088180542, "incorrect_loss_uncond": -6.59849214553833}, "model_output": [{"sum_logits": -3.521076202392578, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -9.888992309570312, "logits_per_token": -3.521076202392578, "logits_per_char": -0.5868460337320963, "num_chars": 6}, {"sum_logits": -3.045558452606201, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -3.045558452606201, "logits_per_char": -0.5075930754343668, "num_chars": 6}, {"sum_logits": -12.752017974853516, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.63223648071289, "logits_per_token": -12.752017974853516, "logits_per_char": -1.8217168535505022, "num_chars": 7}, {"sum_logits": -7.296237468719482, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.416238784790039, "logits_per_token": -7.296237468719482, "logits_per_char": -1.0423196383884974, "num_chars": 7}, {"sum_logits": -4.171273708343506, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.753384590148926, "logits_per_token": -4.171273708343506, "logits_per_char": -0.6952122847239176, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1004, "native_id": "17d9bfaee1efac51b1ca240125bc5977", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.293246269226074, "incorrect_loss_raw": 11.692775964736938, "correct_loss_per_char": 0.6862164179484049, "incorrect_loss_per_char": 0.9145067101433164, "correct_loss_per_token": 5.146623134613037, "incorrect_loss_per_token": 7.856002648671468, "correct_loss_uncond": -10.699624061584473, "incorrect_loss_uncond": -6.883573770523071}, "model_output": [{"sum_logits": -13.095505714416504, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.904881477355957, "logits_per_token": -13.095505714416504, "logits_per_char": -0.8730337142944335, "num_chars": 15}, {"sum_logits": -8.578224182128906, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.600096702575684, "logits_per_token": -8.578224182128906, "logits_per_char": -1.0722780227661133, "num_chars": 8}, {"sum_logits": -16.790437698364258, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.041921615600586, "logits_per_token": -5.596812566121419, "logits_per_char": -1.119362513224284, "num_chars": 15}, {"sum_logits": -10.293246269226074, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.992870330810547, "logits_per_token": -5.146623134613037, "logits_per_char": -0.6862164179484049, "num_chars": 15}, {"sum_logits": -8.306936264038086, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.758499145507812, "logits_per_token": -4.153468132019043, "logits_per_char": -0.5933525902884347, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1005, "native_id": "801431167b8bff06b9870abe9721536b", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.608784675598145, "incorrect_loss_raw": 9.514163374900818, "correct_loss_per_char": 0.9565316306220161, "incorrect_loss_per_char": 0.9517608037361731, "correct_loss_per_token": 8.608784675598145, "incorrect_loss_per_token": 6.874968707561493, "correct_loss_uncond": -5.467319488525391, "incorrect_loss_uncond": -7.149298548698425}, "model_output": [{"sum_logits": -9.524126052856445, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.734291076660156, "logits_per_token": -9.524126052856445, "logits_per_char": -1.587354342142741, "num_chars": 6}, {"sum_logits": -7.691068172454834, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.206321716308594, "logits_per_token": -3.845534086227417, "logits_per_char": -0.5127378781636556, "num_chars": 15}, {"sum_logits": -13.422489166259766, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.473331451416016, "logits_per_token": -6.711244583129883, "logits_per_char": -1.0324991666353667, "num_chars": 13}, {"sum_logits": -8.608784675598145, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.076104164123535, "logits_per_token": -8.608784675598145, "logits_per_char": -0.9565316306220161, "num_chars": 9}, {"sum_logits": -7.418970108032227, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.239903450012207, "logits_per_token": -7.418970108032227, "logits_per_char": -0.6744518280029297, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1006, "native_id": "85ebdd4f1a3c2ac900eee8e75e48ccaa", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.067420959472656, "incorrect_loss_raw": 7.7046509981155396, "correct_loss_per_char": 0.3898016122671274, "incorrect_loss_per_char": 0.943239529832961, "correct_loss_per_token": 5.067420959472656, "incorrect_loss_per_token": 6.371386766433716, "correct_loss_uncond": -8.287753105163574, "incorrect_loss_uncond": -6.204753994941711}, "model_output": [{"sum_logits": -8.03211498260498, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.012441635131836, "logits_per_token": -8.03211498260498, "logits_per_char": -1.1474449975149972, "num_chars": 7}, {"sum_logits": -10.66611385345459, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.906187057495117, "logits_per_token": -5.333056926727295, "logits_per_char": -1.3332642316818237, "num_chars": 8}, {"sum_logits": -7.219023704528809, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.98448371887207, "logits_per_token": -7.219023704528809, "logits_per_char": -0.8021137449476454, "num_chars": 9}, {"sum_logits": -4.901351451873779, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -4.901351451873779, "logits_per_char": -0.49013514518737794, "num_chars": 10}, {"sum_logits": -5.067420959472656, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.35517406463623, "logits_per_token": -5.067420959472656, "logits_per_char": -0.3898016122671274, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1007, "native_id": "db1eb157671109bbb9113b0f71a6b957", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.491399765014648, "incorrect_loss_raw": 6.86737197637558, "correct_loss_per_char": 0.730107674231896, "incorrect_loss_per_char": 0.9952473888794582, "correct_loss_per_token": 9.491399765014648, "incorrect_loss_per_token": 4.233338296413422, "correct_loss_uncond": -3.710709571838379, "incorrect_loss_uncond": -8.49855524301529}, "model_output": [{"sum_logits": -9.491399765014648, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.202109336853027, "logits_per_token": -9.491399765014648, "logits_per_char": -0.730107674231896, "num_chars": 13}, {"sum_logits": -5.151028633117676, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.691120147705078, "logits_per_token": -5.151028633117676, "logits_per_char": -0.8585047721862793, "num_chars": 6}, {"sum_logits": -3.739522933959961, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.7302303314209, "logits_per_token": -1.2465076446533203, "logits_per_char": -0.23372018337249756, "num_chars": 16}, {"sum_logits": -2.4926974773406982, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.973722457885742, "logits_per_token": -2.4926974773406982, "logits_per_char": -0.2077247897783915, "num_chars": 12}, {"sum_logits": -16.086238861083984, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.068635940551758, "logits_per_token": -8.043119430541992, "logits_per_char": -2.681039810180664, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1008, "native_id": "c02a3c2d4f726b9e1be99533a24a6ab4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.966553688049316, "incorrect_loss_raw": 11.962754964828491, "correct_loss_per_char": 1.1610922813415527, "incorrect_loss_per_char": 1.5870441073463077, "correct_loss_per_token": 6.966553688049316, "incorrect_loss_per_token": 9.617942810058594, "correct_loss_uncond": -4.923099517822266, "incorrect_loss_uncond": -2.8529326915740967}, "model_output": [{"sum_logits": -18.75849723815918, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.041595458984375, "logits_per_token": -9.37924861907959, "logits_per_char": -2.0842774709065757, "num_chars": 9}, {"sum_logits": -8.696550369262695, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.693686485290527, "logits_per_token": -8.696550369262695, "logits_per_char": -1.739310073852539, "num_chars": 5}, {"sum_logits": -11.316472053527832, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.071504592895508, "logits_per_token": -11.316472053527832, "logits_per_char": -1.6166388647896903, "num_chars": 7}, {"sum_logits": -6.966553688049316, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -6.966553688049316, "logits_per_char": -1.1610922813415527, "num_chars": 6}, {"sum_logits": -9.079500198364258, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.455964088439941, "logits_per_token": -9.079500198364258, "logits_per_char": -0.9079500198364258, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1009, "native_id": "3ed6391c539e6daa5b5fdb1b6d5d8ace", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.050094604492188, "incorrect_loss_raw": 11.518612623214722, "correct_loss_per_char": 0.9269303541917068, "incorrect_loss_per_char": 1.0217705671603863, "correct_loss_per_token": 6.025047302246094, "incorrect_loss_per_token": 7.8025935888290405, "correct_loss_uncond": -7.4228057861328125, "incorrect_loss_uncond": -4.975318670272827}, "model_output": [{"sum_logits": -12.050094604492188, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -19.472900390625, "logits_per_token": -6.025047302246094, "logits_per_char": -0.9269303541917068, "num_chars": 13}, {"sum_logits": -15.622215270996094, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -21.235340118408203, "logits_per_token": -7.811107635498047, "logits_per_char": -1.2017088669996996, "num_chars": 13}, {"sum_logits": -14.105937004089355, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -19.590545654296875, "logits_per_token": -7.052968502044678, "logits_per_char": -0.9403958002726237, "num_chars": 15}, {"sum_logits": -2.317000389099121, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -10.810015678405762, "logits_per_token": -2.317000389099121, "logits_per_char": -0.3861667315165202, "num_chars": 6}, {"sum_logits": -14.029297828674316, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -14.339823722839355, "logits_per_token": -14.029297828674316, "logits_per_char": -1.558810869852702, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1010, "native_id": "1db19a32a3edbff9981976dc9ec800ce", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.893060684204102, "incorrect_loss_raw": 11.636718988418579, "correct_loss_per_char": 0.7410883903503418, "incorrect_loss_per_char": 2.015947537195115, "correct_loss_per_token": 2.964353561401367, "incorrect_loss_per_token": 10.466996550559998, "correct_loss_uncond": -9.385822296142578, "incorrect_loss_uncond": -2.393289566040039}, "model_output": [{"sum_logits": -8.893060684204102, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.27888298034668, "logits_per_token": -2.964353561401367, "logits_per_char": -0.7410883903503418, "num_chars": 12}, {"sum_logits": -8.274066925048828, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -8.274066925048828, "logits_per_char": -1.6548133850097657, "num_chars": 5}, {"sum_logits": -11.83353042602539, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.450029373168945, "logits_per_token": -11.83353042602539, "logits_per_char": -1.6905043465750558, "num_chars": 7}, {"sum_logits": -9.357779502868652, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.420734405517578, "logits_per_token": -4.678889751434326, "logits_per_char": -1.8715559005737306, "num_chars": 5}, {"sum_logits": -17.081499099731445, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.421148300170898, "logits_per_token": -17.081499099731445, "logits_per_char": -2.8469165166219077, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1011, "native_id": "1e5a138b4c7d456c37abf4990b402bbe", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.073894500732422, "incorrect_loss_raw": 11.074375867843628, "correct_loss_per_char": 0.552172227339311, "incorrect_loss_per_char": 1.2641794261954602, "correct_loss_per_token": 6.073894500732422, "incorrect_loss_per_token": 7.759339412053427, "correct_loss_uncond": -7.104560852050781, "incorrect_loss_uncond": -4.538207769393921}, "model_output": [{"sum_logits": -6.073894500732422, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.178455352783203, "logits_per_token": -6.073894500732422, "logits_per_char": -0.552172227339311, "num_chars": 11}, {"sum_logits": -8.58100700378418, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.796329498291016, "logits_per_token": -2.86033566792806, "logits_per_char": -0.7800915457985618, "num_chars": 11}, {"sum_logits": -15.078948974609375, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.15285873413086, "logits_per_token": -7.5394744873046875, "logits_per_char": -1.1599191518930287, "num_chars": 13}, {"sum_logits": -7.076409339904785, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.137409210205078, "logits_per_token": -7.076409339904785, "logits_per_char": -1.1794015566507976, "num_chars": 6}, {"sum_logits": -13.561138153076172, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.363737106323242, "logits_per_token": -13.561138153076172, "logits_per_char": -1.9373054504394531, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1012, "native_id": "9402864beae075392d2ee6c10115fc21", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.290365219116211, "incorrect_loss_raw": 11.117516994476318, "correct_loss_per_char": 0.9493118013654437, "incorrect_loss_per_char": 1.1657435167403447, "correct_loss_per_token": 6.6451826095581055, "incorrect_loss_per_token": 7.927102327346802, "correct_loss_uncond": -8.553558349609375, "incorrect_loss_uncond": -5.813287734985352}, "model_output": [{"sum_logits": -11.684809684753418, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.858729362487793, "logits_per_token": -11.684809684753418, "logits_per_char": -1.6692585263933455, "num_chars": 7}, {"sum_logits": -15.909688949584961, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.818172454833984, "logits_per_token": -7.9548444747924805, "logits_per_char": -1.0606459299723308, "num_chars": 15}, {"sum_logits": -9.613628387451172, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.21819496154785, "logits_per_token": -4.806814193725586, "logits_per_char": -0.4806814193725586, "num_chars": 20}, {"sum_logits": -13.290365219116211, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.843923568725586, "logits_per_token": -6.6451826095581055, "logits_per_char": -0.9493118013654437, "num_chars": 14}, {"sum_logits": -7.261940956115723, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -7.261940956115723, "logits_per_char": -1.4523881912231444, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1013, "native_id": "25136807f7b2e78b115698daa1677b4a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.677957057952881, "incorrect_loss_raw": 12.29546856880188, "correct_loss_per_char": 0.4451971371968587, "incorrect_loss_per_char": 1.3171272622214423, "correct_loss_per_token": 3.3389785289764404, "incorrect_loss_per_token": 7.40434455871582, "correct_loss_uncond": -10.31443452835083, "incorrect_loss_uncond": -3.5030875205993652}, "model_output": [{"sum_logits": -16.682331085205078, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.90981101989746, "logits_per_token": -8.341165542602539, "logits_per_char": -1.8535923428005643, "num_chars": 9}, {"sum_logits": -13.472793579101562, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.47633171081543, "logits_per_token": -6.736396789550781, "logits_per_char": -0.8420495986938477, "num_chars": 16}, {"sum_logits": -8.973867416381836, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.918428421020508, "logits_per_token": -4.486933708190918, "logits_per_char": -0.8973867416381835, "num_chars": 10}, {"sum_logits": -6.677957057952881, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.99239158630371, "logits_per_token": -3.3389785289764404, "logits_per_char": -0.4451971371968587, "num_chars": 15}, {"sum_logits": -10.052882194519043, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -10.052882194519043, "logits_per_char": -1.6754803657531738, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1014, "native_id": "bc10bf2bfae26a2226823d42956f6cf0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.2590718269348145, "incorrect_loss_raw": 9.667657494544983, "correct_loss_per_char": 0.4839381217956543, "incorrect_loss_per_char": 0.9760941704114278, "correct_loss_per_token": 2.4196906089782715, "incorrect_loss_per_token": 7.219607949256897, "correct_loss_uncond": -13.815104961395264, "incorrect_loss_uncond": -6.2017844915390015}, "model_output": [{"sum_logits": -19.584396362304688, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -24.456161499023438, "logits_per_token": -9.792198181152344, "logits_per_char": -1.305626424153646, "num_chars": 15}, {"sum_logits": -7.2590718269348145, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -21.074176788330078, "logits_per_token": -2.4196906089782715, "logits_per_char": -0.4839381217956543, "num_chars": 15}, {"sum_logits": -7.826270580291748, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.465144157409668, "logits_per_token": -7.826270580291748, "logits_per_char": -1.1180386543273926, "num_chars": 7}, {"sum_logits": -3.5471529960632324, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.10049819946289, "logits_per_token": -3.5471529960632324, "logits_per_char": -0.7094305992126465, "num_chars": 5}, {"sum_logits": -7.712810039520264, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.455964088439941, "logits_per_token": -7.712810039520264, "logits_per_char": -0.7712810039520264, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1015, "native_id": "5a6559db6bae37e3a8af7350be212219", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.183656215667725, "incorrect_loss_raw": 10.420003652572632, "correct_loss_per_char": 0.43197135130564374, "incorrect_loss_per_char": 0.7733082230787099, "correct_loss_per_token": 2.5918281078338623, "incorrect_loss_per_token": 4.456124226252238, "correct_loss_uncond": -12.907934665679932, "incorrect_loss_uncond": -7.300051927566528}, "model_output": [{"sum_logits": -14.328652381896973, "num_tokens": 3, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -18.04734992980957, "logits_per_token": -4.776217460632324, "logits_per_char": -1.023475170135498, "num_chars": 14}, {"sum_logits": -5.896738052368164, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -13.730541229248047, "logits_per_token": -5.896738052368164, "logits_per_char": -0.8423911503383091, "num_chars": 7}, {"sum_logits": -12.647375106811523, "num_tokens": 3, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -19.71219253540039, "logits_per_token": -4.215791702270508, "logits_per_char": -0.5498858742091967, "num_chars": 23}, {"sum_logits": -5.183656215667725, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -18.091590881347656, "logits_per_token": -2.5918281078338623, "logits_per_char": -0.43197135130564374, "num_chars": 12}, {"sum_logits": -8.807249069213867, "num_tokens": 3, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -19.390138626098633, "logits_per_token": -2.9357496897379556, "logits_per_char": -0.6774806976318359, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1016, "native_id": "7ae17f5aecacf18c94a47cc48deb6c36", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.9392595291137695, "incorrect_loss_raw": 8.568456172943115, "correct_loss_per_char": 0.35280425207955496, "incorrect_loss_per_char": 1.152492464724041, "correct_loss_per_token": 1.646419843037923, "incorrect_loss_per_token": 6.495185613632202, "correct_loss_uncond": -11.58718204498291, "incorrect_loss_uncond": -6.745786666870117}, "model_output": [{"sum_logits": -8.788983345031738, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -18.767902374267578, "logits_per_token": -4.394491672515869, "logits_per_char": -0.7324152787526449, "num_chars": 12}, {"sum_logits": -7.694672584533691, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.62524700164795, "logits_per_token": -7.694672584533691, "logits_per_char": -1.0992389406476701, "num_chars": 7}, {"sum_logits": -7.797181129455566, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.300804138183594, "logits_per_token": -3.898590564727783, "logits_per_char": -0.7797181129455566, "num_chars": 10}, {"sum_logits": -4.9392595291137695, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -16.52644157409668, "logits_per_token": -1.646419843037923, "logits_per_char": -0.35280425207955496, "num_chars": 14}, {"sum_logits": -9.992987632751465, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.563017845153809, "logits_per_token": -9.992987632751465, "logits_per_char": -1.9985975265502929, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1017, "native_id": "5d809e0ee19badc66071653630ea7c51", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.012821197509766, "incorrect_loss_raw": 8.91850996017456, "correct_loss_per_char": 0.45571101795543323, "incorrect_loss_per_char": 0.7648570201233433, "correct_loss_per_token": 2.506410598754883, "incorrect_loss_per_token": 5.2611260414123535, "correct_loss_uncond": -10.955278396606445, "incorrect_loss_uncond": -6.261874437332153}, "model_output": [{"sum_logits": -5.012821197509766, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.968099594116211, "logits_per_token": -2.506410598754883, "logits_per_char": -0.45571101795543323, "num_chars": 11}, {"sum_logits": -6.414968490600586, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.865673065185547, "logits_per_token": -6.414968490600586, "logits_per_char": -0.5831789536909624, "num_chars": 11}, {"sum_logits": -4.313288688659668, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.576176643371582, "logits_per_token": -2.156644344329834, "logits_per_char": -0.28755257924397787, "num_chars": 15}, {"sum_logits": -12.101419448852539, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.879993438720703, "logits_per_token": -6.0507097244262695, "logits_per_char": -1.5126774311065674, "num_chars": 8}, {"sum_logits": -12.84436321258545, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.399694442749023, "logits_per_token": -6.422181606292725, "logits_per_char": -0.6760191164518657, "num_chars": 19}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1018, "native_id": "ad0943fc37034cd2b7e485021f8b1b8c", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.807898998260498, "incorrect_loss_raw": 11.563309669494629, "correct_loss_per_char": 0.13464983304341635, "incorrect_loss_per_char": 1.210614194653251, "correct_loss_per_token": 0.807898998260498, "incorrect_loss_per_token": 7.155024290084839, "correct_loss_uncond": -11.081754207611084, "incorrect_loss_uncond": -4.445864677429199}, "model_output": [{"sum_logits": -5.073869705200195, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.386576652526855, "logits_per_token": -5.073869705200195, "logits_per_char": -0.6342337131500244, "num_chars": 8}, {"sum_logits": -17.94449806213379, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.691415786743164, "logits_per_token": -8.972249031066895, "logits_per_char": -1.6313180056485264, "num_chars": 11}, {"sum_logits": -5.9130859375, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.657292366027832, "logits_per_token": -5.9130859375, "logits_per_char": -0.8447265625, "num_chars": 7}, {"sum_logits": -0.807898998260498, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -0.807898998260498, "logits_per_char": -0.13464983304341635, "num_chars": 6}, {"sum_logits": -17.32178497314453, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.30141258239746, "logits_per_token": -8.660892486572266, "logits_per_char": -1.7321784973144532, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1019, "native_id": "c2a8c6814ed3e207771cfc23b3b42cf1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.032880783081055, "incorrect_loss_raw": 8.818833708763123, "correct_loss_per_char": 0.8594914845057896, "incorrect_loss_per_char": 1.2373023617501353, "correct_loss_per_token": 6.016440391540527, "incorrect_loss_per_token": 5.903260747591654, "correct_loss_uncond": -7.809164047241211, "incorrect_loss_uncond": -7.863120198249817}, "model_output": [{"sum_logits": -14.775796890258789, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.580894470214844, "logits_per_token": -7.3878984451293945, "logits_per_char": -2.4626328150431314, "num_chars": 6}, {"sum_logits": -12.032880783081055, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.842044830322266, "logits_per_token": -6.016440391540527, "logits_per_char": -0.8594914845057896, "num_chars": 14}, {"sum_logits": -6.411590099334717, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.148038864135742, "logits_per_token": -2.137196699778239, "logits_per_char": -0.3771523587843951, "num_chars": 17}, {"sum_logits": -5.725608825683594, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.307762145996094, "logits_per_token": -5.725608825683594, "logits_per_char": -0.7157011032104492, "num_chars": 8}, {"sum_logits": -8.36233901977539, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.691120147705078, "logits_per_token": -8.36233901977539, "logits_per_char": -1.3937231699625652, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1020, "native_id": "0b52cc905fff0ca69a45e6353d10e401", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.652987241744995, "incorrect_loss_raw": 11.62761640548706, "correct_loss_per_char": 0.521855320249285, "incorrect_loss_per_char": 1.0438195376368293, "correct_loss_per_token": 3.652987241744995, "incorrect_loss_per_token": 4.240963617960611, "correct_loss_uncond": -9.528908967971802, "incorrect_loss_uncond": -8.414414405822754}, "model_output": [{"sum_logits": -8.762195587158203, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.407630920410156, "logits_per_token": -4.381097793579102, "logits_per_char": -0.730182965596517, "num_chars": 12}, {"sum_logits": -19.667556762695312, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -23.87965965270996, "logits_per_token": -6.5558522542317705, "logits_per_char": -2.1852840847439237, "num_chars": 9}, {"sum_logits": -10.029129028320312, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.64649200439453, "logits_per_token": -3.343043009440104, "logits_per_char": -0.5278488962273848, "num_chars": 19}, {"sum_logits": -3.652987241744995, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.181896209716797, "logits_per_token": -3.652987241744995, "logits_per_char": -0.521855320249285, "num_chars": 7}, {"sum_logits": -8.051584243774414, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.23434066772461, "logits_per_token": -2.683861414591471, "logits_per_char": -0.7319622039794922, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1021, "native_id": "30d0c2006613eec41ae814d76c17a798", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 1.0754115581512451, "incorrect_loss_raw": 10.001094549894333, "correct_loss_per_char": 0.13442644476890564, "incorrect_loss_per_char": 0.8571435029308001, "correct_loss_per_token": 1.0754115581512451, "incorrect_loss_per_token": 4.177814707159996, "correct_loss_uncond": -12.501434564590454, "incorrect_loss_uncond": -7.723146229982376}, "model_output": [{"sum_logits": -7.536023139953613, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -11.10049819946289, "logits_per_token": -7.536023139953613, "logits_per_char": -1.5072046279907227, "num_chars": 5}, {"sum_logits": -28.235767364501953, "num_tokens": 4, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -28.20943832397461, "logits_per_token": -7.058941841125488, "logits_per_char": -1.5686537424723308, "num_chars": 18}, {"sum_logits": -1.9872485399246216, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.92875862121582, "logits_per_token": -0.9936242699623108, "logits_per_char": -0.16560404499371847, "num_chars": 12}, {"sum_logits": -2.2453391551971436, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.658267974853516, "logits_per_token": -1.1226695775985718, "logits_per_char": -0.18711159626642862, "num_chars": 12}, {"sum_logits": -1.0754115581512451, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -1.0754115581512451, "logits_per_char": -0.13442644476890564, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1022, "native_id": "f7a6d0d816d14210f3af5dabe21bf804", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.649401664733887, "incorrect_loss_raw": 6.7151588797569275, "correct_loss_per_char": 0.5166001849704318, "incorrect_loss_per_char": 0.7483201354742051, "correct_loss_per_token": 4.649401664733887, "incorrect_loss_per_token": 3.5524839758872986, "correct_loss_uncond": -8.384801864624023, "incorrect_loss_uncond": -8.473075687885284}, "model_output": [{"sum_logits": -4.649401664733887, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.03420352935791, "logits_per_token": -4.649401664733887, "logits_per_char": -0.5166001849704318, "num_chars": 9}, {"sum_logits": -9.61855697631836, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.287925720214844, "logits_per_token": -4.80927848815918, "logits_per_char": -1.202319622039795, "num_chars": 8}, {"sum_logits": -9.833209037780762, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.092269897460938, "logits_per_token": -4.916604518890381, "logits_per_char": -0.9833209037780761, "num_chars": 10}, {"sum_logits": -1.5592362880706787, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": true, "sum_logits_uncond": -11.587641716003418, "logits_per_token": -1.5592362880706787, "logits_per_char": -0.3898090720176697, "num_chars": 4}, {"sum_logits": -5.84963321685791, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.785100936889648, "logits_per_token": -2.924816608428955, "logits_per_char": -0.4178309440612793, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1023, "native_id": "c306ab28498b67c53decb9dde1d78bd5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.552179336547852, "incorrect_loss_raw": 9.189063310623169, "correct_loss_per_char": 2.138044834136963, "incorrect_loss_per_char": 0.8994674288309538, "correct_loss_per_token": 8.552179336547852, "incorrect_loss_per_token": 7.252528190612793, "correct_loss_uncond": -5.485659599304199, "incorrect_loss_uncond": -6.931469917297363}, "model_output": [{"sum_logits": -6.128820419311523, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.341278076171875, "logits_per_token": -3.0644102096557617, "logits_per_char": -0.40858802795410154, "num_chars": 15}, {"sum_logits": -11.54393196105957, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.434986114501953, "logits_per_token": -11.54393196105957, "logits_per_char": -1.4429914951324463, "num_chars": 8}, {"sum_logits": -7.022595405578613, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.24990463256836, "logits_per_token": -2.340865135192871, "logits_per_char": -0.5401996465829703, "num_chars": 13}, {"sum_logits": -12.060905456542969, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.455964088439941, "logits_per_token": -12.060905456542969, "logits_per_char": -1.206090545654297, "num_chars": 10}, {"sum_logits": -8.552179336547852, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.03783893585205, "logits_per_token": -8.552179336547852, "logits_per_char": -2.138044834136963, "num_chars": 4}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1024, "native_id": "637c710ec9582fd9b9e8eaa3f3fe83bb", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.306078910827637, "incorrect_loss_raw": 10.226498126983643, "correct_loss_per_char": 0.7006754345364041, "incorrect_loss_per_char": 0.9561778737802936, "correct_loss_per_token": 3.1530394554138184, "incorrect_loss_per_token": 6.584113955497742, "correct_loss_uncond": -13.233887672424316, "incorrect_loss_uncond": -5.871657133102417}, "model_output": [{"sum_logits": -9.310260772705078, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.549616813659668, "logits_per_token": -4.655130386352539, "logits_per_char": -1.0344734191894531, "num_chars": 9}, {"sum_logits": -6.306078910827637, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.539966583251953, "logits_per_token": -3.1530394554138184, "logits_per_char": -0.7006754345364041, "num_chars": 9}, {"sum_logits": -7.73330020904541, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.43128490447998, "logits_per_token": -3.866650104522705, "logits_per_char": -0.5523785863603864, "num_chars": 14}, {"sum_logits": -12.095512390136719, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.42523956298828, "logits_per_token": -6.047756195068359, "logits_per_char": -0.9304240300105169, "num_chars": 13}, {"sum_logits": -11.766919136047363, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.986479759216309, "logits_per_token": -11.766919136047363, "logits_per_char": -1.3074354595608182, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1025, "native_id": "9ae52783d8fdb5cc2e8caa01542c3341", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.06863021850586, "incorrect_loss_raw": 12.94540810585022, "correct_loss_per_char": 0.4318395342145647, "incorrect_loss_per_char": 0.7766617902945647, "correct_loss_per_token": 1.813726043701172, "incorrect_loss_per_token": 5.427886962890625, "correct_loss_uncond": -11.444334030151367, "incorrect_loss_uncond": -6.680122375488281}, "model_output": [{"sum_logits": -8.044645309448242, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.687851905822754, "logits_per_token": -4.022322654724121, "logits_per_char": -0.6703871091206869, "num_chars": 12}, {"sum_logits": -15.976847648620605, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -23.125892639160156, "logits_per_token": -7.988423824310303, "logits_per_char": -0.8876026471455892, "num_chars": 18}, {"sum_logits": -11.043066024780273, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.785003662109375, "logits_per_token": -5.521533012390137, "logits_per_char": -0.7887904303414481, "num_chars": 14}, {"sum_logits": -16.717073440551758, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.90337371826172, "logits_per_token": -4.1792683601379395, "logits_per_char": -0.7598669745705344, "num_chars": 22}, {"sum_logits": -9.06863021850586, "num_tokens": 5, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.512964248657227, "logits_per_token": -1.813726043701172, "logits_per_char": -0.4318395342145647, "num_chars": 21}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1026, "native_id": "4f23829b96b38b5633ecc3325281726d", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.052662372589111, "incorrect_loss_raw": 10.756516575813293, "correct_loss_per_char": 1.0087770620981853, "incorrect_loss_per_char": 1.5066573090023465, "correct_loss_per_token": 6.052662372589111, "incorrect_loss_per_token": 9.29895830154419, "correct_loss_uncond": -7.752304553985596, "incorrect_loss_uncond": -6.406103730201721}, "model_output": [{"sum_logits": -6.606313228607178, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.70644760131836, "logits_per_token": -6.606313228607178, "logits_per_char": -0.7340348031785753, "num_chars": 9}, {"sum_logits": -13.579473495483398, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.980780601501465, "logits_per_token": -13.579473495483398, "logits_per_char": -2.2632455825805664, "num_chars": 6}, {"sum_logits": -11.660466194152832, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -24.780929565429688, "logits_per_token": -5.830233097076416, "logits_per_char": -1.1660466194152832, "num_chars": 10}, {"sum_logits": -6.052662372589111, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.804966926574707, "logits_per_token": -6.052662372589111, "logits_per_char": -1.0087770620981853, "num_chars": 6}, {"sum_logits": -11.179813385009766, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.182323455810547, "logits_per_token": -11.179813385009766, "logits_per_char": -1.863302230834961, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1027, "native_id": "3fcdc0b03e3c8b10692d642676931f4b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.7417054176330566, "incorrect_loss_raw": 8.348170280456543, "correct_loss_per_char": 0.3427131772041321, "incorrect_loss_per_char": 0.8107772689877134, "correct_loss_per_token": 2.7417054176330566, "incorrect_loss_per_token": 4.9642510414123535, "correct_loss_uncond": -12.095274448394775, "incorrect_loss_uncond": -7.855320692062378}, "model_output": [{"sum_logits": -2.7417054176330566, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.836979866027832, "logits_per_token": -2.7417054176330566, "logits_per_char": -0.3427131772041321, "num_chars": 8}, {"sum_logits": -5.788941383361816, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.893854141235352, "logits_per_token": -5.788941383361816, "logits_per_char": -0.9648235638936361, "num_chars": 6}, {"sum_logits": -4.649005889892578, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -11.018102645874023, "logits_per_token": -4.649005889892578, "logits_per_char": -0.9298011779785156, "num_chars": 5}, {"sum_logits": -4.907164573669434, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.349780082702637, "logits_per_token": -4.907164573669434, "logits_per_char": -0.4461058703335849, "num_chars": 11}, {"sum_logits": -18.047569274902344, "num_tokens": 4, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -26.552227020263672, "logits_per_token": -4.511892318725586, "logits_per_char": -0.9023784637451172, "num_chars": 20}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1028, "native_id": "ddd606743cf71679438a85280f64593a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.9034318923950195, "incorrect_loss_raw": 7.483185052871704, "correct_loss_per_char": 1.1505719820658367, "incorrect_loss_per_char": 0.9611471516745431, "correct_loss_per_token": 6.9034318923950195, "incorrect_loss_per_token": 6.2245934009552, "correct_loss_uncond": -6.386122703552246, "incorrect_loss_uncond": -7.1870646476745605}, "model_output": [{"sum_logits": -6.9034318923950195, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.289554595947266, "logits_per_token": -6.9034318923950195, "logits_per_char": -1.1505719820658367, "num_chars": 6}, {"sum_logits": -10.068733215332031, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.414268493652344, "logits_per_token": -5.034366607666016, "logits_per_char": -1.006873321533203, "num_chars": 10}, {"sum_logits": -7.228046417236328, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.071504592895508, "logits_per_token": -7.228046417236328, "logits_per_char": -1.0325780596051897, "num_chars": 7}, {"sum_logits": -5.858791351318359, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.63223648071289, "logits_per_token": -5.858791351318359, "logits_per_char": -0.8369701930454799, "num_chars": 7}, {"sum_logits": -6.777169227600098, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -6.777169227600098, "logits_per_char": -0.9681670325142997, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1029, "native_id": "420641003ba20b966887dfac684efb17", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.1109232902526855, "incorrect_loss_raw": 15.658647060394287, "correct_loss_per_char": 0.4567692544725206, "incorrect_loss_per_char": 1.301256224305872, "correct_loss_per_token": 2.0554616451263428, "incorrect_loss_per_token": 8.638193329175312, "correct_loss_uncond": -13.921801090240479, "incorrect_loss_uncond": -1.4440648555755615}, "model_output": [{"sum_logits": -10.562417030334473, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.308795928955078, "logits_per_token": -5.281208515167236, "logits_per_char": -1.0562417030334472, "num_chars": 10}, {"sum_logits": -4.1109232902526855, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.032724380493164, "logits_per_token": -2.0554616451263428, "logits_per_char": -0.4567692544725206, "num_chars": 9}, {"sum_logits": -12.548983573913574, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.08215045928955, "logits_per_token": -12.548983573913574, "logits_per_char": -1.1408166885375977, "num_chars": 11}, {"sum_logits": -18.23407554626465, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.684635162353516, "logits_per_token": -6.078025182088216, "logits_per_char": -1.0725926791920382, "num_chars": 17}, {"sum_logits": -21.289112091064453, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.33526611328125, "logits_per_token": -10.644556045532227, "logits_per_char": -1.9353738264604048, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1030, "native_id": "064c3074a682893d49c3c5b4f1e89984", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.205521583557129, "incorrect_loss_raw": 11.256683588027954, "correct_loss_per_char": 1.2756901979446411, "incorrect_loss_per_char": 1.177360217619424, "correct_loss_per_token": 10.205521583557129, "incorrect_loss_per_token": 9.070350885391235, "correct_loss_uncond": -3.744077682495117, "incorrect_loss_uncond": -3.3281779289245605}, "model_output": [{"sum_logits": -5.155317306518555, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.40125560760498, "logits_per_token": -5.155317306518555, "logits_per_char": -1.0310634613037108, "num_chars": 5}, {"sum_logits": -8.037155151367188, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.261580467224121, "logits_per_token": -8.037155151367188, "logits_per_char": -0.8930172390407987, "num_chars": 9}, {"sum_logits": -10.205521583557129, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.949599266052246, "logits_per_token": -10.205521583557129, "logits_per_char": -1.2756901979446411, "num_chars": 8}, {"sum_logits": -17.49066162109375, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.183486938476562, "logits_per_token": -8.745330810546875, "logits_per_char": -1.5900601473721592, "num_chars": 11}, {"sum_logits": -14.343600273132324, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.493123054504395, "logits_per_token": -14.343600273132324, "logits_per_char": -1.195300022761027, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1031, "native_id": "c640116ca6905d5256edadb616b3f76e", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.7221789360046387, "incorrect_loss_raw": 8.918896079063416, "correct_loss_per_char": 0.3024643262227376, "incorrect_loss_per_char": 1.201802232199245, "correct_loss_per_token": 2.7221789360046387, "incorrect_loss_per_token": 6.188102364540101, "correct_loss_uncond": -11.426915645599365, "incorrect_loss_uncond": -7.748835206031799}, "model_output": [{"sum_logits": -11.449012756347656, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.146428108215332, "logits_per_token": -11.449012756347656, "logits_per_char": -1.9081687927246094, "num_chars": 6}, {"sum_logits": -7.076508045196533, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.043468475341797, "logits_per_token": -2.358836015065511, "logits_per_char": -0.8845635056495667, "num_chars": 8}, {"sum_logits": -7.841809272766113, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.496792793273926, "logits_per_token": -7.841809272766113, "logits_per_char": -0.9802261590957642, "num_chars": 8}, {"sum_logits": -2.7221789360046387, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.149094581604004, "logits_per_token": -2.7221789360046387, "logits_per_char": -0.3024643262227376, "num_chars": 9}, {"sum_logits": -9.30825424194336, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.984235763549805, "logits_per_token": -3.1027514139811196, "logits_per_char": -1.03425047132704, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1032, "native_id": "35ad89c198d5d6311a71c993bb7b6cba", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 2.2006757259368896, "incorrect_loss_raw": 9.867404103279114, "correct_loss_per_char": 0.44013514518737795, "incorrect_loss_per_char": 1.2104276474360582, "correct_loss_per_token": 2.2006757259368896, "incorrect_loss_per_token": 8.827369332313538, "correct_loss_uncond": -10.932185411453247, "incorrect_loss_uncond": -5.5729697942733765}, "model_output": [{"sum_logits": -13.900111198425293, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.405874252319336, "logits_per_token": -13.900111198425293, "logits_per_char": -1.7375138998031616, "num_chars": 8}, {"sum_logits": -13.43086051940918, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.57154655456543, "logits_per_token": -13.43086051940918, "logits_per_char": -1.918694359915597, "num_chars": 7}, {"sum_logits": -3.818366527557373, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.188199996948242, "logits_per_token": -3.818366527557373, "logits_per_char": -0.5454809325081962, "num_chars": 7}, {"sum_logits": -8.32027816772461, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.595874786376953, "logits_per_token": -4.160139083862305, "logits_per_char": -0.6400213975172776, "num_chars": 13}, {"sum_logits": -2.2006757259368896, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": true, "sum_logits_uncond": -13.132861137390137, "logits_per_token": -2.2006757259368896, "logits_per_char": -0.44013514518737795, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1033, "native_id": "916bbd27545446ca5d83d07c10d013ea", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.0161314010620117, "incorrect_loss_raw": 12.940938234329224, "correct_loss_per_char": 0.18328467282381924, "incorrect_loss_per_char": 1.554426936876206, "correct_loss_per_token": 2.0161314010620117, "incorrect_loss_per_token": 9.495222687721252, "correct_loss_uncond": -11.937490463256836, "incorrect_loss_uncond": -3.392529010772705}, "model_output": [{"sum_logits": -11.179298400878906, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -14.434986114501953, "logits_per_token": -11.179298400878906, "logits_per_char": -1.3974123001098633, "num_chars": 8}, {"sum_logits": -13.018730163574219, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -14.005568504333496, "logits_per_token": -13.018730163574219, "logits_per_char": -2.169788360595703, "num_chars": 6}, {"sum_logits": -10.391249656677246, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -17.353347778320312, "logits_per_token": -5.195624828338623, "logits_per_char": -0.742232118334089, "num_chars": 14}, {"sum_logits": -17.174474716186523, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -19.539966583251953, "logits_per_token": -8.587237358093262, "logits_per_char": -1.9082749684651692, "num_chars": 9}, {"sum_logits": -2.0161314010620117, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -13.953621864318848, "logits_per_token": -2.0161314010620117, "logits_per_char": -0.18328467282381924, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1034, "native_id": "e40fd2c17fe2cde4bd4af540d35fd518", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.01803970336914, "incorrect_loss_raw": 11.542424201965332, "correct_loss_per_char": 0.9018039703369141, "incorrect_loss_per_char": 0.9893393615943115, "correct_loss_per_token": 4.50901985168457, "incorrect_loss_per_token": 6.595151305198669, "correct_loss_uncond": -9.349056243896484, "incorrect_loss_uncond": -4.670786619186401}, "model_output": [{"sum_logits": -6.591513633728027, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -6.591513633728027, "logits_per_char": -1.3183027267456056, "num_chars": 5}, {"sum_logits": -19.84046173095703, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.399694442749023, "logits_per_token": -9.920230865478516, "logits_per_char": -1.0442348279451068, "num_chars": 19}, {"sum_logits": -11.441727638244629, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.928295135498047, "logits_per_token": -5.7208638191223145, "logits_per_char": -0.673042802249684, "num_chars": 17}, {"sum_logits": -8.29599380493164, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.270125389099121, "logits_per_token": -4.14799690246582, "logits_per_char": -0.921777089436849, "num_chars": 9}, {"sum_logits": -9.01803970336914, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.367095947265625, "logits_per_token": -4.50901985168457, "logits_per_char": -0.9018039703369141, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1035, "native_id": "98a04457025f18c2287d5c610ff8000d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 13.079057693481445, "incorrect_loss_raw": 9.059880256652832, "correct_loss_per_char": 0.9342184066772461, "incorrect_loss_per_char": 1.4379222075144449, "correct_loss_per_token": 6.539528846740723, "incorrect_loss_per_token": 7.574958682060242, "correct_loss_uncond": -7.54185676574707, "incorrect_loss_uncond": -5.074236154556274}, "model_output": [{"sum_logits": -8.971927642822266, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.442961692810059, "logits_per_token": -8.971927642822266, "logits_per_char": -1.2817039489746094, "num_chars": 7}, {"sum_logits": -11.879372596740723, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.46359634399414, "logits_per_token": -5.939686298370361, "logits_per_char": -0.9899477163950602, "num_chars": 12}, {"sum_logits": -7.340360641479492, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.693686485290527, "logits_per_token": -7.340360641479492, "logits_per_char": -1.4680721282958984, "num_chars": 5}, {"sum_logits": -8.047860145568848, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.9362211227417, "logits_per_token": -8.047860145568848, "logits_per_char": -2.011965036392212, "num_chars": 4}, {"sum_logits": -13.079057693481445, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.620914459228516, "logits_per_token": -6.539528846740723, "logits_per_char": -0.9342184066772461, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1036, "native_id": "f656a475f07d3adba9d1486eda8e834a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.571191310882568, "incorrect_loss_raw": 13.924652099609375, "correct_loss_per_char": 0.630932609240214, "incorrect_loss_per_char": 1.4491723708776167, "correct_loss_per_token": 3.785595655441284, "incorrect_loss_per_token": 9.859830856323242, "correct_loss_uncond": -8.36702585220337, "incorrect_loss_uncond": -2.0767319202423096}, "model_output": [{"sum_logits": -14.891698837280273, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.306278228759766, "logits_per_token": -4.963899612426758, "logits_per_char": -1.0636927740914481, "num_chars": 14}, {"sum_logits": -13.507265090942383, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.07653522491455, "logits_per_token": -13.507265090942383, "logits_per_char": -2.251210848490397, "num_chars": 6}, {"sum_logits": -14.636672973632812, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.925285339355469, "logits_per_token": -14.636672973632812, "logits_per_char": -1.3306066339666194, "num_chars": 11}, {"sum_logits": -12.662971496582031, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.697437286376953, "logits_per_token": -6.331485748291016, "logits_per_char": -1.151179226962003, "num_chars": 11}, {"sum_logits": -7.571191310882568, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.938217163085938, "logits_per_token": -3.785595655441284, "logits_per_char": -0.630932609240214, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1037, "native_id": "c865b3547c2a2e3c3916d7be6ab25752", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.05986213684082, "incorrect_loss_raw": 12.49208927154541, "correct_loss_per_char": 0.6733180152045356, "incorrect_loss_per_char": 1.66489231018793, "correct_loss_per_token": 6.05986213684082, "incorrect_loss_per_token": 10.848340392112732, "correct_loss_uncond": -8.549199104309082, "incorrect_loss_uncond": -2.0128958225250244}, "model_output": [{"sum_logits": -11.692809104919434, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.672751426696777, "logits_per_token": -11.692809104919434, "logits_per_char": -1.9488015174865723, "num_chars": 6}, {"sum_logits": -14.84662914276123, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.465144157409668, "logits_per_token": -14.84662914276123, "logits_per_char": -2.1209470203944614, "num_chars": 7}, {"sum_logits": -6.05986213684082, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.609061241149902, "logits_per_token": -6.05986213684082, "logits_per_char": -0.6733180152045356, "num_chars": 9}, {"sum_logits": -13.149991035461426, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.99239158630371, "logits_per_token": -6.574995517730713, "logits_per_char": -0.8766660690307617, "num_chars": 15}, {"sum_logits": -10.27892780303955, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -10.27892780303955, "logits_per_char": -1.713154633839925, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1038, "native_id": "abd30bab9b96f902fead5378d4f4a1e4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.580450057983398, "incorrect_loss_raw": 14.36021113395691, "correct_loss_per_char": 0.4737781286239624, "incorrect_loss_per_char": 1.0443857798328648, "correct_loss_per_token": 2.5268166859944663, "incorrect_loss_per_token": 8.734108130137127, "correct_loss_uncond": -10.392295837402344, "incorrect_loss_uncond": -3.3298654556274414}, "model_output": [{"sum_logits": -17.474300384521484, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.99380111694336, "logits_per_token": -8.737150192260742, "logits_per_char": -1.0921437740325928, "num_chars": 16}, {"sum_logits": -15.413741111755371, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.904881477355957, "logits_per_token": -15.413741111755371, "logits_per_char": -1.0275827407836915, "num_chars": 15}, {"sum_logits": -7.580450057983398, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.972745895385742, "logits_per_token": -2.5268166859944663, "logits_per_char": -0.4737781286239624, "num_chars": 16}, {"sum_logits": -8.945161819458008, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.86711883544922, "logits_per_token": -2.9817206064860025, "logits_per_char": -0.6389401299612862, "num_chars": 14}, {"sum_logits": -15.607641220092773, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.994504928588867, "logits_per_token": -7.803820610046387, "logits_per_char": -1.4188764745538884, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1039, "native_id": "a4b44a986e7f9045432e20ea75611df4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.343751907348633, "incorrect_loss_raw": 5.659007787704468, "correct_loss_per_char": 0.6418270697960486, "incorrect_loss_per_char": 0.6683143881105241, "correct_loss_per_token": 2.7812506357828775, "incorrect_loss_per_token": 4.372415125370026, "correct_loss_uncond": -9.204967498779297, "incorrect_loss_uncond": -8.743803024291992}, "model_output": [{"sum_logits": -5.700716018676758, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.21335506439209, "logits_per_token": -2.850358009338379, "logits_per_char": -0.40719400133405415, "num_chars": 14}, {"sum_logits": -4.171684741973877, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.848284721374512, "logits_per_token": -4.171684741973877, "logits_per_char": -0.5214605927467346, "num_chars": 8}, {"sum_logits": -8.343751907348633, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.54871940612793, "logits_per_token": -2.7812506357828775, "logits_per_char": -0.6418270697960486, "num_chars": 13}, {"sum_logits": -4.592025279998779, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.990900039672852, "logits_per_token": -2.2960126399993896, "logits_per_char": -0.3826687733332316, "num_chars": 12}, {"sum_logits": -8.171605110168457, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.558703422546387, "logits_per_token": -8.171605110168457, "logits_per_char": -1.3619341850280762, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1040, "native_id": "1f492f556fae64f72ce36b6caa242dd0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.7208170890808105, "incorrect_loss_raw": 11.026390790939331, "correct_loss_per_char": 0.30231300989786786, "incorrect_loss_per_char": 1.4633951116175878, "correct_loss_per_token": 2.7208170890808105, "incorrect_loss_per_token": 6.676481604576111, "correct_loss_uncond": -10.446861743927002, "incorrect_loss_uncond": -4.602693796157837}, "model_output": [{"sum_logits": -13.330634117126465, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.227184295654297, "logits_per_token": -6.665317058563232, "logits_per_char": -2.2217723528544107, "num_chars": 6}, {"sum_logits": -9.306289672851562, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.416067123413086, "logits_per_token": -9.306289672851562, "logits_per_char": -1.329469953264509, "num_chars": 7}, {"sum_logits": -2.7208170890808105, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.167678833007812, "logits_per_token": -2.7208170890808105, "logits_per_char": -0.30231300989786786, "num_chars": 9}, {"sum_logits": -7.116513252258301, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.293058395385742, "logits_per_token": -3.5582566261291504, "logits_per_char": -0.5083223751613072, "num_chars": 14}, {"sum_logits": -14.352126121520996, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.580028533935547, "logits_per_token": -7.176063060760498, "logits_per_char": -1.7940157651901245, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1041, "native_id": "d0c67c7ae6f2361fe237110455127866", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.9072389602661133, "incorrect_loss_raw": 14.429847478866577, "correct_loss_per_char": 0.43413766225179035, "incorrect_loss_per_char": 1.8355694157736642, "correct_loss_per_token": 3.9072389602661133, "incorrect_loss_per_token": 6.412984728813171, "correct_loss_uncond": -10.988659858703613, "incorrect_loss_uncond": -2.763244152069092}, "model_output": [{"sum_logits": -10.26901626586914, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.837525367736816, "logits_per_token": -5.13450813293457, "logits_per_char": -0.9335469332608309, "num_chars": 11}, {"sum_logits": -14.457172393798828, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.593149185180664, "logits_per_token": -7.228586196899414, "logits_per_char": -2.4095287322998047, "num_chars": 6}, {"sum_logits": -3.9072389602661133, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.895898818969727, "logits_per_token": -3.9072389602661133, "logits_per_char": -0.43413766225179035, "num_chars": 9}, {"sum_logits": -19.246536254882812, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.665525436401367, "logits_per_token": -6.4155120849609375, "logits_per_char": -2.749505179268973, "num_chars": 7}, {"sum_logits": -13.746665000915527, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.676166534423828, "logits_per_token": -6.873332500457764, "logits_per_char": -1.2496968182650479, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1042, "native_id": "7bb279e38a1c9eb47a0c7af979a131a2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.111861228942871, "incorrect_loss_raw": 12.408584117889404, "correct_loss_per_char": 0.5470662483802209, "incorrect_loss_per_char": 0.9000235040079463, "correct_loss_per_token": 3.5559306144714355, "incorrect_loss_per_token": 8.072135766347248, "correct_loss_uncond": -8.057926177978516, "incorrect_loss_uncond": -4.930665493011475}, "model_output": [{"sum_logits": -6.465910911560059, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.797895431518555, "logits_per_token": -6.465910911560059, "logits_per_char": -0.5878100828690962, "num_chars": 11}, {"sum_logits": -14.74356746673584, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.042802810668945, "logits_per_token": -4.914522488911946, "logits_per_char": -0.92147296667099, "num_chars": 16}, {"sum_logits": -13.391361236572266, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.31719970703125, "logits_per_token": -13.391361236572266, "logits_per_char": -1.3391361236572266, "num_chars": 10}, {"sum_logits": -7.111861228942871, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.169787406921387, "logits_per_token": -3.5559306144714355, "logits_per_char": -0.5470662483802209, "num_chars": 13}, {"sum_logits": -15.033496856689453, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.199100494384766, "logits_per_token": -7.516748428344727, "logits_per_char": -0.7516748428344726, "num_chars": 20}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1043, "native_id": "3095078e4771053d9d5fa8d4f5f3dc38", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.344500541687012, "incorrect_loss_raw": 11.097997188568115, "correct_loss_per_char": 0.4344500541687012, "incorrect_loss_per_char": 1.0744244992733, "correct_loss_per_token": 4.344500541687012, "incorrect_loss_per_token": 8.01724123954773, "correct_loss_uncond": -8.390007019042969, "incorrect_loss_uncond": -3.518010377883911}, "model_output": [{"sum_logits": -12.323023796081543, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.21495819091797, "logits_per_token": -6.1615118980407715, "logits_per_char": -0.7701889872550964, "num_chars": 16}, {"sum_logits": -7.827259063720703, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.132861137390137, "logits_per_token": -7.827259063720703, "logits_per_char": -1.5654518127441406, "num_chars": 5}, {"sum_logits": -4.344500541687012, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -4.344500541687012, "logits_per_char": -0.4344500541687012, "num_chars": 10}, {"sum_logits": -12.323023796081543, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.21495819091797, "logits_per_token": -6.1615118980407715, "logits_per_char": -0.7701889872550964, "num_chars": 16}, {"sum_logits": -11.918682098388672, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.901252746582031, "logits_per_token": -11.918682098388672, "logits_per_char": -1.191868209838867, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1044, "native_id": "b23edb651e623e5d1e03e8ed3937e8fc", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.7417964935302734, "incorrect_loss_raw": 10.34000539779663, "correct_loss_per_char": 0.6854491233825684, "incorrect_loss_per_char": 1.5901543753487724, "correct_loss_per_token": 2.7417964935302734, "incorrect_loss_per_token": 7.226066470146179, "correct_loss_uncond": -10.743060111999512, "incorrect_loss_uncond": -3.7951698303222656}, "model_output": [{"sum_logits": -5.055835723876953, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.62524700164795, "logits_per_token": -5.055835723876953, "logits_per_char": -0.7222622462681362, "num_chars": 7}, {"sum_logits": -2.7417964935302734, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.484856605529785, "logits_per_token": -2.7417964935302734, "logits_per_char": -0.6854491233825684, "num_chars": 4}, {"sum_logits": -11.392674446105957, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.758121490478516, "logits_per_token": -11.392674446105957, "logits_per_char": -2.2785348892211914, "num_chars": 5}, {"sum_logits": -10.653743743896484, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.367711067199707, "logits_per_token": -5.326871871948242, "logits_per_char": -1.7756239573160808, "num_chars": 6}, {"sum_logits": -14.257767677307129, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.789621353149414, "logits_per_token": -7.1288838386535645, "logits_per_char": -1.584196408589681, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1045, "native_id": "acf6b667e9353b1743b7c4f60a6a9017", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.323046684265137, "incorrect_loss_raw": 11.673657655715942, "correct_loss_per_char": 0.48820311228434243, "incorrect_loss_per_char": 1.2004891035625336, "correct_loss_per_token": 1.8307616710662842, "incorrect_loss_per_token": 6.614873170852661, "correct_loss_uncond": -14.26882266998291, "incorrect_loss_uncond": -6.836499214172363}, "model_output": [{"sum_logits": -6.2243547439575195, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.282496452331543, "logits_per_token": -6.2243547439575195, "logits_per_char": -1.0373924573262532, "num_chars": 6}, {"sum_logits": -15.803367614746094, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -22.47273826599121, "logits_per_token": -7.901683807373047, "logits_per_char": -1.2156436626727765, "num_chars": 13}, {"sum_logits": -7.323046684265137, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.591869354248047, "logits_per_token": -1.8307616710662842, "logits_per_char": -0.48820311228434243, "num_chars": 15}, {"sum_logits": -15.170467376708984, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -22.511512756347656, "logits_per_token": -7.585233688354492, "logits_per_char": -1.6856074863009982, "num_chars": 9}, {"sum_logits": -9.496440887451172, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.773880004882812, "logits_per_token": -4.748220443725586, "logits_per_char": -0.8633128079501066, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1046, "native_id": "15b090801256085ad465e74af47cbee9", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.89390230178833, "incorrect_loss_raw": 12.410861134529114, "correct_loss_per_char": 0.6267183910716664, "incorrect_loss_per_char": 1.066763622764213, "correct_loss_per_token": 3.446951150894165, "incorrect_loss_per_token": 8.645143628120422, "correct_loss_uncond": -12.968076229095459, "incorrect_loss_uncond": -3.113824963569641}, "model_output": [{"sum_logits": -12.71843147277832, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.905193328857422, "logits_per_token": -12.71843147277832, "logits_per_char": -1.1562210429798474, "num_chars": 11}, {"sum_logits": -6.7992730140686035, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.691494941711426, "logits_per_token": -6.7992730140686035, "logits_per_char": -0.755474779340956, "num_chars": 9}, {"sum_logits": -6.89390230178833, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.86197853088379, "logits_per_token": -3.446951150894165, "logits_per_char": -0.6267183910716664, "num_chars": 11}, {"sum_logits": -19.678375244140625, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.553241729736328, "logits_per_token": -9.839187622070312, "logits_per_char": -1.4055982317243303, "num_chars": 14}, {"sum_logits": -10.447364807128906, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.948814392089844, "logits_per_token": -5.223682403564453, "logits_per_char": -0.9497604370117188, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1047, "native_id": "790b3f583e9bc9424c771691ecc70c20", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.741326332092285, "incorrect_loss_raw": 11.368828415870667, "correct_loss_per_char": 0.4416404870840219, "incorrect_loss_per_char": 1.2497978977959088, "correct_loss_per_token": 2.8706631660461426, "incorrect_loss_per_token": 6.1229512095451355, "correct_loss_uncond": -14.221627235412598, "incorrect_loss_uncond": -5.218324542045593}, "model_output": [{"sum_logits": -8.620198249816895, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.782123565673828, "logits_per_token": -8.620198249816895, "logits_per_char": -1.724039649963379, "num_chars": 5}, {"sum_logits": -14.071215629577637, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -7.035607814788818, "logits_per_char": -1.5634684032864041, "num_chars": 9}, {"sum_logits": -7.448193073272705, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.714458465576172, "logits_per_token": -3.7240965366363525, "logits_per_char": -0.5320137909480503, "num_chars": 14}, {"sum_logits": -5.741326332092285, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.962953567504883, "logits_per_token": -2.8706631660461426, "logits_per_char": -0.4416404870840219, "num_chars": 13}, {"sum_logits": -15.33570671081543, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.57340431213379, "logits_per_token": -5.111902236938477, "logits_per_char": -1.1796697469858022, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1048, "native_id": "22b8219d43a38a1130e0a35ece152337", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.016063690185547, "incorrect_loss_raw": 8.020673155784607, "correct_loss_per_char": 0.5026772816975912, "incorrect_loss_per_char": 1.0637264456067765, "correct_loss_per_token": 3.016063690185547, "incorrect_loss_per_token": 8.020673155784607, "correct_loss_uncond": -8.787105560302734, "incorrect_loss_uncond": -5.894306063652039}, "model_output": [{"sum_logits": -3.016063690185547, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.803169250488281, "logits_per_token": -3.016063690185547, "logits_per_char": -0.5026772816975912, "num_chars": 6}, {"sum_logits": -7.617529392242432, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.071504592895508, "logits_per_token": -7.617529392242432, "logits_per_char": -1.0882184846060616, "num_chars": 7}, {"sum_logits": -5.184234619140625, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -5.184234619140625, "logits_per_char": -0.6480293273925781, "num_chars": 8}, {"sum_logits": -8.858476638793945, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.235330581665039, "logits_per_token": -8.858476638793945, "logits_per_char": -1.4764127731323242, "num_chars": 6}, {"sum_logits": -10.422451972961426, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.776235580444336, "logits_per_token": -10.422451972961426, "logits_per_char": -1.0422451972961426, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1049, "native_id": "5d4233146435ab0ca211e8ac9bfce76f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.247676372528076, "incorrect_loss_raw": 12.885308742523193, "correct_loss_per_char": 0.27063969771067303, "incorrect_loss_per_char": 1.231652447912428, "correct_loss_per_token": 3.247676372528076, "incorrect_loss_per_token": 7.659240961074829, "correct_loss_uncond": -11.726046085357666, "incorrect_loss_uncond": -6.557513236999512}, "model_output": [{"sum_logits": -3.247676372528076, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.973722457885742, "logits_per_token": -3.247676372528076, "logits_per_char": -0.27063969771067303, "num_chars": 12}, {"sum_logits": -13.962356567382812, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.354293823242188, "logits_per_token": -6.981178283691406, "logits_per_char": -1.1635297139485676, "num_chars": 12}, {"sum_logits": -15.526824951171875, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -23.66086196899414, "logits_per_token": -7.7634124755859375, "logits_per_char": -1.7252027723524306, "num_chars": 9}, {"sum_logits": -12.319360733032227, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.32114601135254, "logits_per_token": -6.159680366516113, "logits_per_char": -0.8212907155354817, "num_chars": 15}, {"sum_logits": -9.73269271850586, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.434986114501953, "logits_per_token": -9.73269271850586, "logits_per_char": -1.2165865898132324, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1050, "native_id": "be737cd4db844574ef594442ce6c9453", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.9732513427734375, "incorrect_loss_raw": 8.890872240066528, "correct_loss_per_char": 1.1946502685546876, "incorrect_loss_per_char": 1.2745677368981496, "correct_loss_per_token": 5.9732513427734375, "incorrect_loss_per_token": 7.504713296890259, "correct_loss_uncond": -6.975790977478027, "incorrect_loss_uncond": -5.518694162368774}, "model_output": [{"sum_logits": -5.9732513427734375, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.949042320251465, "logits_per_token": -5.9732513427734375, "logits_per_char": -1.1946502685546876, "num_chars": 5}, {"sum_logits": -13.807541847229004, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.699138641357422, "logits_per_token": -13.807541847229004, "logits_per_char": -1.3807541847229003, "num_chars": 10}, {"sum_logits": -5.459321022033691, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.207840919494629, "logits_per_token": -5.459321022033691, "logits_per_char": -1.0918642044067384, "num_chars": 5}, {"sum_logits": -5.207354545593262, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.04824161529541, "logits_per_token": -5.207354545593262, "logits_per_char": -1.0414709091186523, "num_chars": 5}, {"sum_logits": -11.089271545410156, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.68304443359375, "logits_per_token": -5.544635772705078, "logits_per_char": -1.584181649344308, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1051, "native_id": "550164b7cf4e03153484136f10122c70", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.031489372253418, "incorrect_loss_raw": 8.759827733039856, "correct_loss_per_char": 0.4394680857658386, "incorrect_loss_per_char": 0.609343519475725, "correct_loss_per_token": 2.343829790751139, "incorrect_loss_per_token": 3.7013190189997354, "correct_loss_uncond": -12.551840782165527, "incorrect_loss_uncond": -10.913256049156189}, "model_output": [{"sum_logits": -9.407105445861816, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -23.484886169433594, "logits_per_token": -4.703552722930908, "logits_per_char": -0.783925453821818, "num_chars": 12}, {"sum_logits": -6.330578327178955, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -17.824214935302734, "logits_per_token": -2.1101927757263184, "logits_per_char": -0.6330578327178955, "num_chars": 10}, {"sum_logits": -9.955698013305664, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.566043853759766, "logits_per_token": -3.318566004435221, "logits_per_char": -0.5530943340725369, "num_chars": 18}, {"sum_logits": -7.031489372253418, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.583330154418945, "logits_per_token": -2.343829790751139, "logits_per_char": -0.4394680857658386, "num_chars": 16}, {"sum_logits": -9.345929145812988, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.817190170288086, "logits_per_token": -4.672964572906494, "logits_per_char": -0.4672964572906494, "num_chars": 20}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1052, "native_id": "a617eb4d27edea93e7fd630ce00c8219", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.9192976951599121, "incorrect_loss_raw": 15.297071933746338, "correct_loss_per_char": 0.153216282526652, "incorrect_loss_per_char": 1.2773499200632283, "correct_loss_per_token": 0.9192976951599121, "incorrect_loss_per_token": 7.648535966873169, "correct_loss_uncond": -12.75571584701538, "incorrect_loss_uncond": -4.601862907409668}, "model_output": [{"sum_logits": -20.732860565185547, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -22.983531951904297, "logits_per_token": -10.366430282592773, "logits_per_char": -1.727738380432129, "num_chars": 12}, {"sum_logits": -15.176115036010742, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.051952362060547, "logits_per_token": -7.588057518005371, "logits_per_char": -1.5176115036010742, "num_chars": 10}, {"sum_logits": -0.9192976951599121, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -13.675013542175293, "logits_per_token": -0.9192976951599121, "logits_per_char": -0.153216282526652, "num_chars": 6}, {"sum_logits": -10.626005172729492, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -22.683271408081055, "logits_per_token": -5.313002586364746, "logits_per_char": -0.8173850132868841, "num_chars": 13}, {"sum_logits": -14.65330696105957, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.876983642578125, "logits_per_token": -7.326653480529785, "logits_per_char": -1.0466647829328264, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1053, "native_id": "bd47827418d5b8d7fb3502a398644435", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.028032302856445, "incorrect_loss_raw": 15.87478756904602, "correct_loss_per_char": 0.9028032302856446, "incorrect_loss_per_char": 1.1796251370793298, "correct_loss_per_token": 4.514016151428223, "incorrect_loss_per_token": 9.519938468933105, "correct_loss_uncond": -12.087043762207031, "incorrect_loss_uncond": -5.392409086227417}, "model_output": [{"sum_logits": -18.412046432495117, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -21.145263671875, "logits_per_token": -9.206023216247559, "logits_per_char": -1.416311264038086, "num_chars": 13}, {"sum_logits": -14.497701644897461, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.901641845703125, "logits_per_token": -7.2488508224487305, "logits_per_char": -0.9665134429931641, "num_chars": 15}, {"sum_logits": -12.660357475280762, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.28743553161621, "logits_per_token": -12.660357475280762, "logits_per_char": -1.0550297896067302, "num_chars": 12}, {"sum_logits": -9.028032302856445, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -21.115076065063477, "logits_per_token": -4.514016151428223, "logits_per_char": -0.9028032302856446, "num_chars": 10}, {"sum_logits": -17.929044723510742, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -25.734445571899414, "logits_per_token": -8.964522361755371, "logits_per_char": -1.2806460516793388, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1054, "native_id": "31487ab8b1e8f12e252590cc58bd19c2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.867556571960449, "incorrect_loss_raw": 10.392736673355103, "correct_loss_per_char": 0.6084445714950562, "incorrect_loss_per_char": 1.598115148809221, "correct_loss_per_token": 4.867556571960449, "incorrect_loss_per_token": 8.743278503417969, "correct_loss_uncond": -9.440205574035645, "incorrect_loss_uncond": -4.831791400909424}, "model_output": [{"sum_logits": -4.867556571960449, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.307762145996094, "logits_per_token": -4.867556571960449, "logits_per_char": -0.6084445714950562, "num_chars": 8}, {"sum_logits": -6.418120384216309, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.973722457885742, "logits_per_token": -6.418120384216309, "logits_per_char": -0.534843365351359, "num_chars": 12}, {"sum_logits": -10.6358642578125, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.71352767944336, "logits_per_token": -10.6358642578125, "logits_per_char": -2.1271728515625, "num_chars": 5}, {"sum_logits": -11.321296691894531, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.006192207336426, "logits_per_token": -11.321296691894531, "logits_per_char": -2.2642593383789062, "num_chars": 5}, {"sum_logits": -13.19566535949707, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.204669952392578, "logits_per_token": -6.597832679748535, "logits_per_char": -1.466185039944119, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1055, "native_id": "ce2fd94212243f843b3f357046051f57", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.548700332641602, "incorrect_loss_raw": 11.595630407333374, "correct_loss_per_char": 0.8185875415802002, "incorrect_loss_per_char": 1.4342302943720961, "correct_loss_per_token": 6.548700332641602, "incorrect_loss_per_token": 8.082636952400208, "correct_loss_uncond": -7.6837310791015625, "incorrect_loss_uncond": -3.883554458618164}, "model_output": [{"sum_logits": -6.548700332641602, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.232431411743164, "logits_per_token": -6.548700332641602, "logits_per_char": -0.8185875415802002, "num_chars": 8}, {"sum_logits": -7.830996513366699, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.421607971191406, "logits_per_token": -7.830996513366699, "logits_per_char": -1.3051660855611165, "num_chars": 6}, {"sum_logits": -10.447577476501465, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.452558517456055, "logits_per_token": -10.447577476501465, "logits_per_char": -1.7412629127502441, "num_chars": 6}, {"sum_logits": -13.189284324645996, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.79358959197998, "logits_per_token": -6.594642162322998, "logits_per_char": -1.1990258476950906, "num_chars": 11}, {"sum_logits": -14.914663314819336, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.24898338317871, "logits_per_token": -7.457331657409668, "logits_per_char": -1.4914663314819336, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1056, "native_id": "f87f40db71a56b5beda3194550202dc9_1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.986907958984375, "incorrect_loss_raw": 11.070649862289429, "correct_loss_per_char": 0.33224232991536456, "incorrect_loss_per_char": 1.3436749709977043, "correct_loss_per_token": 1.9934539794921875, "incorrect_loss_per_token": 7.534523129463196, "correct_loss_uncond": -15.108797073364258, "incorrect_loss_uncond": -5.110724687576294}, "model_output": [{"sum_logits": -8.62825870513916, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.986479759216309, "logits_per_token": -8.62825870513916, "logits_per_char": -0.9586954116821289, "num_chars": 9}, {"sum_logits": -7.365326881408691, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.641695976257324, "logits_per_token": -7.365326881408691, "logits_per_char": -1.4730653762817383, "num_chars": 5}, {"sum_logits": -3.986907958984375, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.095705032348633, "logits_per_token": -1.9934539794921875, "logits_per_char": -0.33224232991536456, "num_chars": 12}, {"sum_logits": -21.078765869140625, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.789621353149414, "logits_per_token": -10.539382934570312, "logits_per_char": -2.3420850965711804, "num_chars": 9}, {"sum_logits": -7.210247993469238, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.307701110839844, "logits_per_token": -3.605123996734619, "logits_per_char": -0.6008539994557699, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1057, "native_id": "0b25bbd9e9aa976655e1975e31331709", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.495598793029785, "incorrect_loss_raw": 13.353911638259888, "correct_loss_per_char": 0.8495598793029785, "incorrect_loss_per_char": 1.0630452553431193, "correct_loss_per_token": 4.247799396514893, "incorrect_loss_per_token": 6.466539899508159, "correct_loss_uncond": -10.075617790222168, "incorrect_loss_uncond": -7.17426061630249}, "model_output": [{"sum_logits": -17.776561737060547, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -26.973499298095703, "logits_per_token": -5.925520579020183, "logits_per_char": -0.9875867631700304, "num_chars": 18}, {"sum_logits": -8.495598793029785, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -18.571216583251953, "logits_per_token": -4.247799396514893, "logits_per_char": -0.8495598793029785, "num_chars": 10}, {"sum_logits": -4.242193222045898, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -11.818755149841309, "logits_per_token": -4.242193222045898, "logits_per_char": -0.8484386444091797, "num_chars": 5}, {"sum_logits": -15.117386817932129, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -21.693859100341797, "logits_per_token": -7.5586934089660645, "logits_per_char": -1.5117386817932128, "num_chars": 10}, {"sum_logits": -16.279504776000977, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -21.626575469970703, "logits_per_token": -8.139752388000488, "logits_per_char": -0.9044169320000542, "num_chars": 18}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1058, "native_id": "925232b4c9bba945a38ac7ef0f15f8d0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.965830326080322, "incorrect_loss_raw": 12.034658074378967, "correct_loss_per_char": 0.49715252717336017, "incorrect_loss_per_char": 1.2003993696636623, "correct_loss_per_token": 5.965830326080322, "incorrect_loss_per_token": 6.560708045959473, "correct_loss_uncond": -9.598156452178955, "incorrect_loss_uncond": -3.707051157951355}, "model_output": [{"sum_logits": -4.347032070159912, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -4.347032070159912, "logits_per_char": -0.8694064140319824, "num_chars": 5}, {"sum_logits": -17.075000762939453, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -16.279895782470703, "logits_per_token": -8.537500381469727, "logits_per_char": -1.5522727966308594, "num_chars": 11}, {"sum_logits": -5.965830326080322, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.563986778259277, "logits_per_token": -5.965830326080322, "logits_per_char": -0.49715252717336017, "num_chars": 12}, {"sum_logits": -13.243337631225586, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -19.153587341308594, "logits_per_token": -6.621668815612793, "logits_per_char": -0.8828891754150391, "num_chars": 15}, {"sum_logits": -13.473261833190918, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -6.736630916595459, "logits_per_char": -1.4970290925767686, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1059, "native_id": "3338109fcafaaa370c8900a53e1b3ed8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.485672950744629, "incorrect_loss_raw": 15.221028089523315, "correct_loss_per_char": 0.3918337821960449, "incorrect_loss_per_char": 1.1343121360046695, "correct_loss_per_token": 2.7428364753723145, "incorrect_loss_per_token": 5.604572852452596, "correct_loss_uncond": -13.078391075134277, "incorrect_loss_uncond": -7.482534408569336}, "model_output": [{"sum_logits": -2.8522844314575195, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.968234062194824, "logits_per_token": -2.8522844314575195, "logits_per_char": -0.2592985846779563, "num_chars": 11}, {"sum_logits": -5.485672950744629, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.564064025878906, "logits_per_token": -2.7428364753723145, "logits_per_char": -0.3918337821960449, "num_chars": 14}, {"sum_logits": -20.750225067138672, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.55420684814453, "logits_per_token": -10.375112533569336, "logits_per_char": -1.729185422261556, "num_chars": 12}, {"sum_logits": -24.27229881286621, "num_tokens": 5, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -33.48750686645508, "logits_per_token": -4.854459762573242, "logits_per_char": -1.1032863096757368, "num_chars": 22}, {"sum_logits": -13.00930404663086, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.804302215576172, "logits_per_token": -4.336434682210286, "logits_per_char": -1.4454782274034288, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1060, "native_id": "e172a93c72d305ee8262a8deb00d9fc3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.231347560882568, "incorrect_loss_raw": 11.118751883506775, "correct_loss_per_char": 0.8034830623202853, "incorrect_loss_per_char": 1.027639449110218, "correct_loss_per_token": 3.615673780441284, "incorrect_loss_per_token": 7.188904523849487, "correct_loss_uncond": -10.86232614517212, "incorrect_loss_uncond": -6.690303206443787}, "model_output": [{"sum_logits": -7.231347560882568, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.093673706054688, "logits_per_token": -3.615673780441284, "logits_per_char": -0.8034830623202853, "num_chars": 9}, {"sum_logits": -5.814899444580078, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.11971378326416, "logits_per_token": -5.814899444580078, "logits_per_char": -0.9691499074300131, "num_chars": 6}, {"sum_logits": -20.15336036682129, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -23.44028091430664, "logits_per_token": -10.076680183410645, "logits_per_char": -1.1854917862836052, "num_chars": 17}, {"sum_logits": -7.221329212188721, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.481456756591797, "logits_per_token": -7.221329212188721, "logits_per_char": -1.2035548686981201, "num_chars": 6}, {"sum_logits": -11.285418510437012, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.19476890563965, "logits_per_token": -5.642709255218506, "logits_per_char": -0.7523612340291341, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1061, "native_id": "f1c2e37abf17d9e4ad16eb40f966c79f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.3112993240356445, "incorrect_loss_raw": 13.436456203460693, "correct_loss_per_char": 0.5259416103363037, "incorrect_loss_per_char": 0.9350292054089633, "correct_loss_per_token": 3.1556496620178223, "incorrect_loss_per_token": 4.6458622217178345, "correct_loss_uncond": -13.54686450958252, "incorrect_loss_uncond": -9.962625980377197}, "model_output": [{"sum_logits": -6.522717475891113, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.822158813476562, "logits_per_token": -3.2613587379455566, "logits_per_char": -0.5929743159901012, "num_chars": 11}, {"sum_logits": -9.796266555786133, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.087093353271484, "logits_per_token": -4.898133277893066, "logits_per_char": -0.9796266555786133, "num_chars": 10}, {"sum_logits": -31.50336456298828, "num_tokens": 7, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -40.47215270996094, "logits_per_token": -4.500480651855469, "logits_per_char": -1.5751682281494142, "num_chars": 20}, {"sum_logits": -5.923476219177246, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.214923858642578, "logits_per_token": -5.923476219177246, "logits_per_char": -0.5923476219177246, "num_chars": 10}, {"sum_logits": -6.3112993240356445, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.858163833618164, "logits_per_token": -3.1556496620178223, "logits_per_char": -0.5259416103363037, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1062, "native_id": "d29252ddaf7c7ef491abcce342d7bb98", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.975556373596191, "incorrect_loss_raw": 11.919814586639404, "correct_loss_per_char": 1.3975556373596192, "incorrect_loss_per_char": 1.3962313652038574, "correct_loss_per_token": 6.987778186798096, "incorrect_loss_per_token": 6.256431043148041, "correct_loss_uncond": -5.497939109802246, "incorrect_loss_uncond": -5.762321710586548}, "model_output": [{"sum_logits": -17.888389587402344, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.86726951599121, "logits_per_token": -8.944194793701172, "logits_per_char": -2.236048698425293, "num_chars": 8}, {"sum_logits": -13.975556373596191, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.473495483398438, "logits_per_token": -6.987778186798096, "logits_per_char": -1.3975556373596192, "num_chars": 10}, {"sum_logits": -9.74413776397705, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.758121490478516, "logits_per_token": -9.74413776397705, "logits_per_char": -1.9488275527954102, "num_chars": 5}, {"sum_logits": -5.302835464477539, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.097567558288574, "logits_per_token": -2.6514177322387695, "logits_per_char": -0.6628544330596924, "num_chars": 8}, {"sum_logits": -14.743895530700684, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -26.005586624145508, "logits_per_token": -3.685973882675171, "logits_per_char": -0.7371947765350342, "num_chars": 20}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1063, "native_id": "8c3c6b34bdb650a6517bca3786406c99", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.251842498779297, "incorrect_loss_raw": 9.500421047210693, "correct_loss_per_char": 0.5578340383676382, "incorrect_loss_per_char": 1.1524157544486542, "correct_loss_per_token": 3.6259212493896484, "incorrect_loss_per_token": 6.6169657707214355, "correct_loss_uncond": -14.007686614990234, "incorrect_loss_uncond": -6.070185422897339}, "model_output": [{"sum_logits": -7.672553062438965, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.793344497680664, "logits_per_token": -7.672553062438965, "logits_per_char": -0.6393794218699137, "num_chars": 12}, {"sum_logits": -11.128962516784668, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.062950134277344, "logits_per_token": -5.564481258392334, "logits_per_char": -1.236551390753852, "num_chars": 9}, {"sum_logits": -11.938679695129395, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.502153396606445, "logits_per_token": -5.969339847564697, "logits_per_char": -0.918359976548415, "num_chars": 13}, {"sum_logits": -7.261488914489746, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.923977851867676, "logits_per_token": -7.261488914489746, "logits_per_char": -1.8153722286224365, "num_chars": 4}, {"sum_logits": -7.251842498779297, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.25952911376953, "logits_per_token": -3.6259212493896484, "logits_per_char": -0.5578340383676382, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1064, "native_id": "ff1bf2ec835c9df8695ae0cfb5281646", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.581874370574951, "incorrect_loss_raw": 12.135460615158081, "correct_loss_per_char": 0.6545534815107074, "incorrect_loss_per_char": 1.2538193543752034, "correct_loss_per_token": 2.2909371852874756, "incorrect_loss_per_token": 7.213754892349243, "correct_loss_uncond": -15.080603122711182, "incorrect_loss_uncond": -4.723593235015869}, "model_output": [{"sum_logits": -17.104511260986328, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.498720169067383, "logits_per_token": -8.552255630493164, "logits_per_char": -1.1403007507324219, "num_chars": 15}, {"sum_logits": -4.169938087463379, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.818624496459961, "logits_per_token": -4.169938087463379, "logits_per_char": -0.8339876174926758, "num_chars": 5}, {"sum_logits": -16.70185089111328, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.282243728637695, "logits_per_token": -5.567283630371094, "logits_per_char": -0.927880605061849, "num_chars": 18}, {"sum_logits": -4.581874370574951, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.662477493286133, "logits_per_token": -2.2909371852874756, "logits_per_char": -0.6545534815107074, "num_chars": 7}, {"sum_logits": -10.565542221069336, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.836627006530762, "logits_per_token": -10.565542221069336, "logits_per_char": -2.113108444213867, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1065, "native_id": "c7526b682e64f355384631b35cd78fc9", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.424424171447754, "incorrect_loss_raw": 12.717911720275879, "correct_loss_per_char": 1.0530530214309692, "incorrect_loss_per_char": 1.5506379329538964, "correct_loss_per_token": 4.212212085723877, "incorrect_loss_per_token": 10.854236960411072, "correct_loss_uncond": -12.969817161560059, "incorrect_loss_uncond": -1.8844850063323975}, "model_output": [{"sum_logits": -12.354511260986328, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -12.354511260986328, "logits_per_char": -1.544313907623291, "num_chars": 8}, {"sum_logits": -8.424424171447754, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.394241333007812, "logits_per_token": -4.212212085723877, "logits_per_char": -1.0530530214309692, "num_chars": 8}, {"sum_logits": -14.909398078918457, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.24630355834961, "logits_per_token": -7.4546990394592285, "logits_per_char": -2.129914011274065, "num_chars": 7}, {"sum_logits": -12.397539138793945, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.953621864318848, "logits_per_token": -12.397539138793945, "logits_per_char": -1.1270490126176314, "num_chars": 11}, {"sum_logits": -11.210198402404785, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.63281536102295, "logits_per_token": -11.210198402404785, "logits_per_char": -1.4012748003005981, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1066, "native_id": "0fba83d3997f048adcc31937221af77e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.745063781738281, "incorrect_loss_raw": 11.71711540222168, "correct_loss_per_char": 0.9635805402483258, "incorrect_loss_per_char": 1.5895577013492583, "correct_loss_per_token": 3.3725318908691406, "incorrect_loss_per_token": 7.216839869817098, "correct_loss_uncond": -12.639015197753906, "incorrect_loss_uncond": -3.6488020420074463}, "model_output": [{"sum_logits": -12.690056800842285, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.230504989624023, "logits_per_token": -4.230018933614095, "logits_per_char": -1.5862571001052856, "num_chars": 8}, {"sum_logits": -6.745063781738281, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.384078979492188, "logits_per_token": -3.3725318908691406, "logits_per_char": -0.9635805402483258, "num_chars": 7}, {"sum_logits": -10.43144416809082, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.322247505187988, "logits_per_token": -10.43144416809082, "logits_per_char": -2.086288833618164, "num_chars": 5}, {"sum_logits": -19.082128524780273, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.276350021362305, "logits_per_token": -9.541064262390137, "logits_per_char": -1.9082128524780273, "num_chars": 10}, {"sum_logits": -4.66483211517334, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -11.634567260742188, "logits_per_token": -4.66483211517334, "logits_per_char": -0.7774720191955566, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1067, "native_id": "a5456dc611aa93b81d7ab6ed8e160f85", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.386767864227295, "incorrect_loss_raw": 11.674206495285034, "correct_loss_per_char": 0.7096408738030328, "incorrect_loss_per_char": 1.6504612021976046, "correct_loss_per_token": 6.386767864227295, "incorrect_loss_per_token": 8.926267981529236, "correct_loss_uncond": -9.36051321029663, "incorrect_loss_uncond": -3.1772310733795166}, "model_output": [{"sum_logits": -12.519840240478516, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.85129165649414, "logits_per_token": -6.259920120239258, "logits_per_char": -2.503968048095703, "num_chars": 5}, {"sum_logits": -9.463667869567871, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.545339584350586, "logits_per_token": -4.7318339347839355, "logits_per_char": -1.3519525527954102, "num_chars": 7}, {"sum_logits": -13.445877075195312, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.552549362182617, "logits_per_token": -13.445877075195312, "logits_per_char": -1.493986341688368, "num_chars": 9}, {"sum_logits": -6.386767864227295, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.747281074523926, "logits_per_token": -6.386767864227295, "logits_per_char": -0.7096408738030328, "num_chars": 9}, {"sum_logits": -11.267440795898438, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.45656967163086, "logits_per_token": -11.267440795898438, "logits_per_char": -1.2519378662109375, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1068, "native_id": "11416df796f63d2f0dddc846b9c139d3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.410016059875488, "incorrect_loss_raw": 8.813942432403564, "correct_loss_per_char": 0.9508346716562907, "incorrect_loss_per_char": 1.6065329313278198, "correct_loss_per_token": 5.705008029937744, "incorrect_loss_per_token": 6.663222551345825, "correct_loss_uncond": -9.248095512390137, "incorrect_loss_uncond": -4.99557900428772}, "model_output": [{"sum_logits": -8.333629608154297, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.474409103393555, "logits_per_token": -4.166814804077148, "logits_per_char": -1.041703701019287, "num_chars": 8}, {"sum_logits": -10.404386520385742, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -10.404386520385742, "logits_per_char": -2.0808773040771484, "num_chars": 5}, {"sum_logits": -7.645624160766602, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.296318054199219, "logits_per_token": -7.645624160766602, "logits_per_char": -1.5291248321533204, "num_chars": 5}, {"sum_logits": -11.410016059875488, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.658111572265625, "logits_per_token": -5.705008029937744, "logits_per_char": -0.9508346716562907, "num_chars": 12}, {"sum_logits": -8.872129440307617, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.639236450195312, "logits_per_token": -4.436064720153809, "logits_per_char": -1.7744258880615233, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1069, "native_id": "c908d7c4633c5e6add9463bdd47cb27e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.0121355056762695, "incorrect_loss_raw": 12.869610667228699, "correct_loss_per_char": 0.7012135505676269, "incorrect_loss_per_char": 0.9486998779433113, "correct_loss_per_token": 3.5060677528381348, "incorrect_loss_per_token": 7.266181826591492, "correct_loss_uncond": -10.995903968811035, "incorrect_loss_uncond": -2.842821478843689}, "model_output": [{"sum_logits": -6.790458679199219, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.906187057495117, "logits_per_token": -3.3952293395996094, "logits_per_char": -0.8488073348999023, "num_chars": 8}, {"sum_logits": -6.651011943817139, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -6.651011943817139, "logits_per_char": -0.6651011943817139, "num_chars": 10}, {"sum_logits": -20.34832763671875, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.441818237304688, "logits_per_token": -10.174163818359375, "logits_per_char": -1.0174163818359374, "num_chars": 20}, {"sum_logits": -17.688644409179688, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.767215728759766, "logits_per_token": -8.844322204589844, "logits_per_char": -1.263474600655692, "num_chars": 14}, {"sum_logits": -7.0121355056762695, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.008039474487305, "logits_per_token": -3.5060677528381348, "logits_per_char": -0.7012135505676269, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1070, "native_id": "7e522a60756f854c5331125f998bc36b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.280501365661621, "incorrect_loss_raw": 12.679665565490723, "correct_loss_per_char": 0.6280501365661622, "incorrect_loss_per_char": 1.4992838804683988, "correct_loss_per_token": 6.280501365661621, "incorrect_loss_per_token": 9.102346738179524, "correct_loss_uncond": -8.092981338500977, "incorrect_loss_uncond": -2.527790069580078}, "model_output": [{"sum_logits": -13.637781143188477, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.782123565673828, "logits_per_token": -13.637781143188477, "logits_per_char": -2.7275562286376953, "num_chars": 5}, {"sum_logits": -21.463912963867188, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.202394485473633, "logits_per_token": -7.1546376546223955, "logits_per_char": -1.1924396091037326, "num_chars": 18}, {"sum_logits": -7.001045227050781, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.268458366394043, "logits_per_token": -7.001045227050781, "logits_per_char": -1.0001493181501115, "num_chars": 7}, {"sum_logits": -6.280501365661621, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.373482704162598, "logits_per_token": -6.280501365661621, "logits_per_char": -0.6280501365661622, "num_chars": 10}, {"sum_logits": -8.615922927856445, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -8.615922927856445, "logits_per_char": -1.0769903659820557, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1071, "native_id": "f4a75bf3f115b826a8097edfd0ff2781", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.954371929168701, "incorrect_loss_raw": 7.379197001457214, "correct_loss_per_char": 0.5302914619445801, "incorrect_loss_per_char": 0.8273133705059688, "correct_loss_per_token": 3.9771859645843506, "incorrect_loss_per_token": 4.2204607129096985, "correct_loss_uncond": -11.697330951690674, "incorrect_loss_uncond": -8.899728655815125}, "model_output": [{"sum_logits": -7.954371929168701, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.651702880859375, "logits_per_token": -3.9771859645843506, "logits_per_char": -0.5302914619445801, "num_chars": 15}, {"sum_logits": -4.2468976974487305, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.841452598571777, "logits_per_token": -4.2468976974487305, "logits_per_char": -0.7078162829081217, "num_chars": 6}, {"sum_logits": -6.287943363189697, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.065664291381836, "logits_per_token": -3.1439716815948486, "logits_per_char": -0.7859929203987122, "num_chars": 8}, {"sum_logits": -4.965024948120117, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.121492385864258, "logits_per_token": -2.4825124740600586, "logits_per_char": -0.41375207901000977, "num_chars": 12}, {"sum_logits": -14.016921997070312, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.087093353271484, "logits_per_token": -7.008460998535156, "logits_per_char": -1.4016921997070313, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1072, "native_id": "02f43014a135cbd39f23b044c99de96e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.426119804382324, "incorrect_loss_raw": 9.844187021255493, "correct_loss_per_char": 0.6426119804382324, "incorrect_loss_per_char": 1.0334944535815527, "correct_loss_per_token": 3.213059902191162, "incorrect_loss_per_token": 7.117597937583923, "correct_loss_uncond": -14.992022514343262, "incorrect_loss_uncond": -7.034489393234253}, "model_output": [{"sum_logits": -6.426119804382324, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.418142318725586, "logits_per_token": -3.213059902191162, "logits_per_char": -0.6426119804382324, "num_chars": 10}, {"sum_logits": -9.310781478881836, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.63223648071289, "logits_per_token": -9.310781478881836, "logits_per_char": -1.3301116398402624, "num_chars": 7}, {"sum_logits": -8.253253936767578, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.753023147583008, "logits_per_token": -8.253253936767578, "logits_per_char": -0.9170282151963975, "num_chars": 9}, {"sum_logits": -12.68994140625, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.11046600341797, "logits_per_token": -6.344970703125, "logits_per_char": -1.0574951171875, "num_chars": 12}, {"sum_logits": -9.122771263122559, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.018980026245117, "logits_per_token": -4.561385631561279, "logits_per_char": -0.8293428421020508, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1073, "native_id": "8cf478192696744b3427f7c109019af5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.420557022094727, "incorrect_loss_raw": 8.3619544506073, "correct_loss_per_char": 0.8015813093919021, "incorrect_loss_per_char": 0.9238528923554854, "correct_loss_per_token": 5.210278511047363, "incorrect_loss_per_token": 4.803290009498596, "correct_loss_uncond": -7.659612655639648, "incorrect_loss_uncond": -8.526020526885986}, "model_output": [{"sum_logits": -6.761471748352051, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.092327117919922, "logits_per_token": -3.3807358741760254, "logits_per_char": -0.6146792498501864, "num_chars": 11}, {"sum_logits": -8.183378219604492, "num_tokens": 4, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.811277389526367, "logits_per_token": -2.045844554901123, "logits_per_char": -0.4091689109802246, "num_chars": 20}, {"sum_logits": -9.070191383361816, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.027201652526855, "logits_per_token": -9.070191383361816, "logits_per_char": -1.8140382766723633, "num_chars": 5}, {"sum_logits": -9.43277645111084, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.62109375, "logits_per_token": -4.71638822555542, "logits_per_char": -0.8575251319191672, "num_chars": 11}, {"sum_logits": -10.420557022094727, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.080169677734375, "logits_per_token": -5.210278511047363, "logits_per_char": -0.8015813093919021, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1074, "native_id": "4ccd43cdff044bc4c644dadff1ff1e0b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.856359481811523, "incorrect_loss_raw": 13.285860776901245, "correct_loss_per_char": 1.0856359481811524, "incorrect_loss_per_char": 1.443363282415602, "correct_loss_per_token": 10.856359481811523, "incorrect_loss_per_token": 8.85496973991394, "correct_loss_uncond": -4.697579383850098, "incorrect_loss_uncond": -3.473926067352295}, "model_output": [{"sum_logits": -6.292942047119141, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.178605079650879, "logits_per_token": -6.292942047119141, "logits_per_char": -0.6992157830132378, "num_chars": 9}, {"sum_logits": -11.403372764587402, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.481456756591797, "logits_per_token": -11.403372764587402, "logits_per_char": -1.9005621274312336, "num_chars": 6}, {"sum_logits": -10.856359481811523, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.553938865661621, "logits_per_token": -10.856359481811523, "logits_per_char": -1.0856359481811524, "num_chars": 10}, {"sum_logits": -22.262256622314453, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -22.685270309448242, "logits_per_token": -11.131128311157227, "logits_per_char": -1.8551880518595378, "num_chars": 12}, {"sum_logits": -13.184871673583984, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.693815231323242, "logits_per_token": -6.592435836791992, "logits_per_char": -1.3184871673583984, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1075, "native_id": "7b7941b883328ad39048d4dfb1eb5623", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.70576000213623, "incorrect_loss_raw": 11.056618571281433, "correct_loss_per_char": 1.617626667022705, "incorrect_loss_per_char": 1.0739550007370127, "correct_loss_per_token": 9.70576000213623, "incorrect_loss_per_token": 7.607793927192688, "correct_loss_uncond": -1.9796943664550781, "incorrect_loss_uncond": -6.093969941139221}, "model_output": [{"sum_logits": -6.690457820892334, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.494048118591309, "logits_per_token": -6.690457820892334, "logits_per_char": -0.7433842023213705, "num_chars": 9}, {"sum_logits": -10.214189529418945, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.693439483642578, "logits_per_token": -5.107094764709473, "logits_per_char": -0.7295849663870675, "num_chars": 14}, {"sum_logits": -17.376407623291016, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -22.963191986083984, "logits_per_token": -8.688203811645508, "logits_per_char": -1.5796734202991833, "num_chars": 11}, {"sum_logits": -9.945419311523438, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.451674461364746, "logits_per_token": -9.945419311523438, "logits_per_char": -1.2431774139404297, "num_chars": 8}, {"sum_logits": -9.70576000213623, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -11.685454368591309, "logits_per_token": -9.70576000213623, "logits_per_char": -1.617626667022705, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1076, "native_id": "008b7ba0c039f6d0d542c6c90aae173c", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.862936019897461, "incorrect_loss_raw": 10.995080709457397, "correct_loss_per_char": 0.8057214563543146, "incorrect_loss_per_char": 0.9441995153315047, "correct_loss_per_token": 4.4314680099487305, "incorrect_loss_per_token": 5.497540354728699, "correct_loss_uncond": -11.789033889770508, "incorrect_loss_uncond": -7.379141569137573}, "model_output": [{"sum_logits": -12.81190299987793, "num_tokens": 2, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -16.144250869750977, "logits_per_token": -6.405951499938965, "logits_per_char": -1.0676585833231609, "num_chars": 12}, {"sum_logits": -7.225229263305664, "num_tokens": 2, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -18.06551170349121, "logits_per_token": -3.612614631652832, "logits_per_char": -0.555786866408128, "num_chars": 13}, {"sum_logits": -8.862936019897461, "num_tokens": 2, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -20.65196990966797, "logits_per_token": -4.4314680099487305, "logits_per_char": -0.8057214563543146, "num_chars": 11}, {"sum_logits": -13.513437271118164, "num_tokens": 2, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -19.539966583251953, "logits_per_token": -6.756718635559082, "logits_per_char": -1.5014930301242404, "num_chars": 9}, {"sum_logits": -10.429753303527832, "num_tokens": 2, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -19.747159957885742, "logits_per_token": -5.214876651763916, "logits_per_char": -0.6518595814704895, "num_chars": 16}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1077, "native_id": "4c968fa73699a38639ba3ffa1745bc21", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.649550437927246, "incorrect_loss_raw": 11.058051824569702, "correct_loss_per_char": 0.9299100875854492, "incorrect_loss_per_char": 1.407904018055309, "correct_loss_per_token": 4.649550437927246, "incorrect_loss_per_token": 7.61931312084198, "correct_loss_uncond": -6.368552207946777, "incorrect_loss_uncond": -4.373691082000732}, "model_output": [{"sum_logits": -10.104729652404785, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -10.104729652404785, "logits_per_char": -2.020945930480957, "num_chars": 5}, {"sum_logits": -4.649550437927246, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.018102645874023, "logits_per_token": -4.649550437927246, "logits_per_char": -0.9299100875854492, "num_chars": 5}, {"sum_logits": -9.425644874572754, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.686844825744629, "logits_per_token": -4.712822437286377, "logits_per_char": -0.8568768067793413, "num_chars": 11}, {"sum_logits": -6.617568016052246, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.465144157409668, "logits_per_token": -6.617568016052246, "logits_per_char": -0.9453668594360352, "num_chars": 7}, {"sum_logits": -18.084264755249023, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.74686050415039, "logits_per_token": -9.042132377624512, "logits_per_char": -1.8084264755249024, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1078, "native_id": "b1d5cdbf8ef7b3954a6a352bd4df5866", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.478484153747559, "incorrect_loss_raw": 9.657222747802734, "correct_loss_per_char": 0.8956968307495117, "incorrect_loss_per_char": 1.3395631216993236, "correct_loss_per_token": 4.478484153747559, "incorrect_loss_per_token": 7.704760432243347, "correct_loss_uncond": -9.909666061401367, "incorrect_loss_uncond": -5.2388646602630615}, "model_output": [{"sum_logits": -4.478484153747559, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.388150215148926, "logits_per_token": -4.478484153747559, "logits_per_char": -0.8956968307495117, "num_chars": 5}, {"sum_logits": -5.0794525146484375, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.312996864318848, "logits_per_token": -5.0794525146484375, "logits_per_char": -0.5643836127387153, "num_chars": 9}, {"sum_logits": -6.361551284790039, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.691120147705078, "logits_per_token": -6.361551284790039, "logits_per_char": -1.0602585474650066, "num_chars": 6}, {"sum_logits": -11.568188667297363, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.567468643188477, "logits_per_token": -11.568188667297363, "logits_per_char": -2.3136377334594727, "num_chars": 5}, {"sum_logits": -15.619698524475098, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.01276397705078, "logits_per_token": -7.809849262237549, "logits_per_char": -1.4199725931340998, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1079, "native_id": "c3bc395561113c96ec43afd715da5061", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.6228537559509277, "incorrect_loss_raw": 13.124820947647095, "correct_loss_per_char": 0.3746933937072754, "incorrect_loss_per_char": 1.679402550061544, "correct_loss_per_token": 2.6228537559509277, "incorrect_loss_per_token": 9.729065418243408, "correct_loss_uncond": -11.448050022125244, "incorrect_loss_uncond": -2.678300619125366}, "model_output": [{"sum_logits": -2.6228537559509277, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.070903778076172, "logits_per_token": -2.6228537559509277, "logits_per_char": -0.3746933937072754, "num_chars": 7}, {"sum_logits": -13.77104377746582, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.180763244628906, "logits_per_token": -6.88552188873291, "logits_per_char": -2.29517396291097, "num_chars": 6}, {"sum_logits": -14.53654956817627, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.703721046447754, "logits_per_token": -14.53654956817627, "logits_per_char": -1.8170686960220337, "num_chars": 8}, {"sum_logits": -14.145440101623535, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.321730613708496, "logits_per_token": -14.145440101623535, "logits_per_char": -1.768180012702942, "num_chars": 8}, {"sum_logits": -10.046250343322754, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.006271362304688, "logits_per_token": -3.348750114440918, "logits_per_char": -0.8371875286102295, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1080, "native_id": "d0bd5b5ee7319d1c4727e38d429dd54e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.157339572906494, "incorrect_loss_raw": 12.70052433013916, "correct_loss_per_char": 0.3595565954844157, "incorrect_loss_per_char": 1.3154245711336232, "correct_loss_per_token": 2.157339572906494, "incorrect_loss_per_token": 9.014330506324768, "correct_loss_uncond": -12.077991008758545, "incorrect_loss_uncond": -2.1034457683563232}, "model_output": [{"sum_logits": -14.860017776489258, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.023900032043457, "logits_per_token": -14.860017776489258, "logits_per_char": -1.8575022220611572, "num_chars": 8}, {"sum_logits": -11.002288818359375, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.786282539367676, "logits_per_token": -11.002288818359375, "logits_per_char": -1.2224765353732638, "num_chars": 9}, {"sum_logits": -11.290511131286621, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.665518760681152, "logits_per_token": -5.6452555656433105, "logits_per_char": -0.9408759276072184, "num_chars": 12}, {"sum_logits": -13.649279594421387, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.74017906188965, "logits_per_token": -4.549759864807129, "logits_per_char": -1.2408435994928533, "num_chars": 11}, {"sum_logits": -2.157339572906494, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -14.235330581665039, "logits_per_token": -2.157339572906494, "logits_per_char": -0.3595565954844157, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1081, "native_id": "81f5e741d970578867495ceea5a0c848", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.45694351196289, "incorrect_loss_raw": 12.607923746109009, "correct_loss_per_char": 0.6739378536448759, "incorrect_loss_per_char": 1.499820105613224, "correct_loss_per_token": 3.818981170654297, "incorrect_loss_per_token": 6.898539463678996, "correct_loss_uncond": -10.447303771972656, "incorrect_loss_uncond": -4.138176918029785}, "model_output": [{"sum_logits": -18.8599853515625, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -25.819990158081055, "logits_per_token": -4.714996337890625, "logits_per_char": -1.0477769639756944, "num_chars": 18}, {"sum_logits": -11.3482084274292, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.79052734375, "logits_per_token": -11.3482084274292, "logits_per_char": -1.8913680712382, "num_chars": 6}, {"sum_logits": -11.45694351196289, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.904247283935547, "logits_per_token": -3.818981170654297, "logits_per_char": -0.6739378536448759, "num_chars": 17}, {"sum_logits": -7.18467903137207, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.858872413635254, "logits_per_token": -7.18467903137207, "logits_per_char": -1.1974465052286785, "num_chars": 6}, {"sum_logits": -13.038822174072266, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.515012741088867, "logits_per_token": -4.346274058024089, "logits_per_char": -1.8626888820103236, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1082, "native_id": "6714593a8d1f8ae39930c1f0316e9ffc", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.6111733913421631, "incorrect_loss_raw": 13.010164737701416, "correct_loss_per_char": 0.10186223189036052, "incorrect_loss_per_char": 1.6465614352907454, "correct_loss_per_token": 0.6111733913421631, "incorrect_loss_per_token": 10.814368963241577, "correct_loss_uncond": -12.887178659439087, "incorrect_loss_uncond": -1.5909655094146729}, "model_output": [{"sum_logits": -12.274595260620117, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.150565147399902, "logits_per_token": -12.274595260620117, "logits_per_char": -2.0457658767700195, "num_chars": 6}, {"sum_logits": -9.941043853759766, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.944777488708496, "logits_per_token": -9.941043853759766, "logits_per_char": -1.2426304817199707, "num_chars": 8}, {"sum_logits": -17.56636619567871, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -17.06789207458496, "logits_per_token": -8.783183097839355, "logits_per_char": -1.2547404425484794, "num_chars": 14}, {"sum_logits": -0.6111733913421631, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": true, "sum_logits_uncond": -13.49835205078125, "logits_per_token": -0.6111733913421631, "logits_per_char": -0.10186223189036052, "num_chars": 6}, {"sum_logits": -12.25865364074707, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.241286277770996, "logits_per_token": -12.25865364074707, "logits_per_char": -2.0431089401245117, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1083, "native_id": "75cb55aec7e64f592c01eee5d4578dcd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.2739205360412598, "incorrect_loss_raw": 11.209311246871948, "correct_loss_per_char": 0.10616004467010498, "incorrect_loss_per_char": 1.7172097444534302, "correct_loss_per_token": 1.2739205360412598, "incorrect_loss_per_token": 11.209311246871948, "correct_loss_uncond": -12.822922229766846, "incorrect_loss_uncond": -1.1362383365631104}, "model_output": [{"sum_logits": -1.2739205360412598, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -14.096842765808105, "logits_per_token": -1.2739205360412598, "logits_per_char": -0.10616004467010498, "num_chars": 12}, {"sum_logits": -7.493107795715332, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.964545249938965, "logits_per_token": -7.493107795715332, "logits_per_char": -0.7493107795715332, "num_chars": 10}, {"sum_logits": -13.492992401123047, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.699138641357422, "logits_per_token": -13.492992401123047, "logits_per_char": -1.3492992401123047, "num_chars": 10}, {"sum_logits": -9.415504455566406, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -9.09980583190918, "logits_per_token": -9.415504455566406, "logits_per_char": -1.8831008911132812, "num_chars": 5}, {"sum_logits": -14.435640335083008, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.618708610534668, "logits_per_token": -14.435640335083008, "logits_per_char": -2.8871280670166017, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1084, "native_id": "0b30831fb1862bc62339bdf930cbc447", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 20.614898681640625, "incorrect_loss_raw": 11.359752416610718, "correct_loss_per_char": 1.717908223470052, "incorrect_loss_per_char": 1.201457346810235, "correct_loss_per_token": 6.871632893880208, "incorrect_loss_per_token": 8.205074071884155, "correct_loss_uncond": -9.613649368286133, "incorrect_loss_uncond": -5.299681901931763}, "model_output": [{"sum_logits": -14.987228393554688, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.218246459960938, "logits_per_token": -7.493614196777344, "logits_per_char": -1.4987228393554688, "num_chars": 10}, {"sum_logits": -20.614898681640625, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -30.228548049926758, "logits_per_token": -6.871632893880208, "logits_per_char": -1.717908223470052, "num_chars": 12}, {"sum_logits": -10.250198364257812, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.247764587402344, "logits_per_token": -5.125099182128906, "logits_per_char": -0.7321570260184151, "num_chars": 14}, {"sum_logits": -9.796213150024414, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.312996864318848, "logits_per_token": -9.796213150024414, "logits_per_char": -1.0884681277804904, "num_chars": 9}, {"sum_logits": -10.405369758605957, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.858729362487793, "logits_per_token": -10.405369758605957, "logits_per_char": -1.4864813940865653, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1085, "native_id": "29c194d032a266a7160bff6f546a4d9d", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.83375358581543, "incorrect_loss_raw": 9.694681286811829, "correct_loss_per_char": 1.2084383964538574, "incorrect_loss_per_char": 1.1859935110523587, "correct_loss_per_token": 4.83375358581543, "incorrect_loss_per_token": 6.347804546356201, "correct_loss_uncond": -6.718132019042969, "incorrect_loss_uncond": -4.639686942100525}, "model_output": [{"sum_logits": -7.169957637786865, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.973722457885742, "logits_per_token": -7.169957637786865, "logits_per_char": -0.5974964698155721, "num_chars": 12}, {"sum_logits": -11.874038696289062, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.974644660949707, "logits_per_token": -5.937019348144531, "logits_per_char": -1.696291242327009, "num_chars": 7}, {"sum_logits": -14.900975227355957, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.837220191955566, "logits_per_token": -7.4504876136779785, "logits_per_char": -1.2417479356129963, "num_chars": 12}, {"sum_logits": -4.83375358581543, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.551885604858398, "logits_per_token": -4.83375358581543, "logits_per_char": -1.2084383964538574, "num_chars": 4}, {"sum_logits": -4.83375358581543, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.551885604858398, "logits_per_token": -4.83375358581543, "logits_per_char": -1.2084383964538574, "num_chars": 4}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1086, "native_id": "ea33206992fb7ad1c3476e9673bb4a9c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.430071830749512, "incorrect_loss_raw": 9.31001877784729, "correct_loss_per_char": 0.6191726525624593, "incorrect_loss_per_char": 1.1071805440462552, "correct_loss_per_token": 3.715035915374756, "incorrect_loss_per_token": 6.630401849746704, "correct_loss_uncond": -9.091872215270996, "incorrect_loss_uncond": -6.024583101272583}, "model_output": [{"sum_logits": -11.901252746582031, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.956235885620117, "logits_per_token": -5.950626373291016, "logits_per_char": -0.9154809805063101, "num_chars": 13}, {"sum_logits": -9.79358959197998, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.099899291992188, "logits_per_token": -9.79358959197998, "logits_per_char": -1.9587179183959962, "num_chars": 5}, {"sum_logits": -9.535682678222656, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.959651947021484, "logits_per_token": -4.767841339111328, "logits_per_char": -0.9535682678222657, "num_chars": 10}, {"sum_logits": -7.430071830749512, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.521944046020508, "logits_per_token": -3.715035915374756, "logits_per_char": -0.6191726525624593, "num_chars": 12}, {"sum_logits": -6.009550094604492, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.322620391845703, "logits_per_token": -6.009550094604492, "logits_per_char": -0.6009550094604492, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1087, "native_id": "2b7dd91da5dde1560ace2cd82af926de", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.705089569091797, "incorrect_loss_raw": 14.515364646911621, "correct_loss_per_char": 0.3087574640909831, "incorrect_loss_per_char": 1.183772872639941, "correct_loss_per_token": 1.8525447845458984, "incorrect_loss_per_token": 6.029363989830017, "correct_loss_uncond": -15.076883316040039, "incorrect_loss_uncond": -5.8677849769592285}, "model_output": [{"sum_logits": -10.720908164978027, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.44711685180664, "logits_per_token": -5.360454082489014, "logits_per_char": -1.0720908164978027, "num_chars": 10}, {"sum_logits": -13.945475578308105, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.34377670288086, "logits_per_token": -4.648491859436035, "logits_per_char": -0.929698371887207, "num_chars": 15}, {"sum_logits": -17.860910415649414, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -22.997364044189453, "logits_per_token": -8.930455207824707, "logits_per_char": -1.6237191286954014, "num_chars": 11}, {"sum_logits": -15.534164428710938, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.744340896606445, "logits_per_token": -5.1780548095703125, "logits_per_char": -1.1095831734793526, "num_chars": 14}, {"sum_logits": -3.705089569091797, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.781972885131836, "logits_per_token": -1.8525447845458984, "logits_per_char": -0.3087574640909831, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1088, "native_id": "eb50f536830ba18ab987c7ff652e2aba", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.302632331848145, "incorrect_loss_raw": 14.69739556312561, "correct_loss_per_char": 0.5151316165924072, "incorrect_loss_per_char": 1.3246911037535893, "correct_loss_per_token": 5.151316165924072, "incorrect_loss_per_token": 10.086343606313068, "correct_loss_uncond": -8.208765983581543, "incorrect_loss_uncond": -2.784071207046509}, "model_output": [{"sum_logits": -10.302632331848145, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.511398315429688, "logits_per_token": -5.151316165924072, "logits_per_char": -0.5151316165924072, "num_chars": 20}, {"sum_logits": -16.150922775268555, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.971073150634766, "logits_per_token": -8.075461387634277, "logits_per_char": -1.3459102312723796, "num_chars": 12}, {"sum_logits": -15.553119659423828, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.739477157592773, "logits_per_token": -5.184373219807942, "logits_per_char": -1.1109371185302734, "num_chars": 14}, {"sum_logits": -13.600314140319824, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.31043529510498, "logits_per_token": -13.600314140319824, "logits_per_char": -1.9429020200456892, "num_chars": 7}, {"sum_logits": -13.485225677490234, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.904881477355957, "logits_per_token": -13.485225677490234, "logits_per_char": -0.8990150451660156, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1089, "native_id": "6bc3ebcfd04965c25bde71339955746c", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.9961490631103516, "incorrect_loss_raw": 10.039045095443726, "correct_loss_per_char": 0.33290545145670575, "incorrect_loss_per_char": 1.3876792575631822, "correct_loss_per_token": 2.9961490631103516, "incorrect_loss_per_token": 10.039045095443726, "correct_loss_uncond": -9.59763240814209, "incorrect_loss_uncond": -3.511929988861084}, "model_output": [{"sum_logits": -9.215362548828125, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.751547813415527, "logits_per_token": -9.215362548828125, "logits_per_char": -1.1519203186035156, "num_chars": 8}, {"sum_logits": -2.9961490631103516, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.593781471252441, "logits_per_token": -2.9961490631103516, "logits_per_char": -0.33290545145670575, "num_chars": 9}, {"sum_logits": -9.883465766906738, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.920620918273926, "logits_per_token": -9.883465766906738, "logits_per_char": -1.4119236809866769, "num_chars": 7}, {"sum_logits": -12.544455528259277, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.485124588012695, "logits_per_token": -12.544455528259277, "logits_per_char": -1.5680569410324097, "num_chars": 8}, {"sum_logits": -8.512896537780762, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.04660701751709, "logits_per_token": -8.512896537780762, "logits_per_char": -1.418816089630127, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1090, "native_id": "163898952cb6baf3a6440696e1352e86", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.04820442199707, "incorrect_loss_raw": 10.173940181732178, "correct_loss_per_char": 0.3780127763748169, "incorrect_loss_per_char": 1.2415808250033666, "correct_loss_per_token": 3.024102210998535, "incorrect_loss_per_token": 8.424927552541098, "correct_loss_uncond": -12.843193054199219, "incorrect_loss_uncond": -4.362260103225708}, "model_output": [{"sum_logits": -10.494075775146484, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.256317138671875, "logits_per_token": -3.4980252583821616, "logits_per_char": -0.5830042097303603, "num_chars": 18}, {"sum_logits": -6.04820442199707, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.89139747619629, "logits_per_token": -3.024102210998535, "logits_per_char": -0.3780127763748169, "num_chars": 16}, {"sum_logits": -10.157800674438477, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.23062801361084, "logits_per_token": -10.157800674438477, "logits_per_char": -1.128644519382053, "num_chars": 9}, {"sum_logits": -9.835260391235352, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.465568542480469, "logits_per_token": -9.835260391235352, "logits_per_char": -0.7025185993739537, "num_chars": 14}, {"sum_logits": -10.208623886108398, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.19228744506836, "logits_per_token": -10.208623886108398, "logits_per_char": -2.5521559715270996, "num_chars": 4}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1091, "native_id": "aa984e2b487d08889bc0c73bab5ac945", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.185202717781067, "incorrect_loss_raw": 8.720227479934692, "correct_loss_per_char": 0.19753378629684448, "incorrect_loss_per_char": 1.2679928225098234, "correct_loss_per_token": 1.185202717781067, "incorrect_loss_per_token": 8.720227479934692, "correct_loss_uncond": -12.839601397514343, "incorrect_loss_uncond": -5.39374852180481}, "model_output": [{"sum_logits": -5.76262903213501, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.76987361907959, "logits_per_token": -5.76262903213501, "logits_per_char": -1.4406572580337524, "num_chars": 4}, {"sum_logits": -11.778083801269531, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.481456756591797, "logits_per_token": -11.778083801269531, "logits_per_char": -1.9630139668782551, "num_chars": 6}, {"sum_logits": -12.790230751037598, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.530681610107422, "logits_per_token": -12.790230751037598, "logits_per_char": -1.1627482500943271, "num_chars": 11}, {"sum_logits": -1.185202717781067, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": true, "sum_logits_uncond": -14.02480411529541, "logits_per_token": -1.185202717781067, "logits_per_char": -0.19753378629684448, "num_chars": 6}, {"sum_logits": -4.549966335296631, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.6738920211792, "logits_per_token": -4.549966335296631, "logits_per_char": -0.505551815032959, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1092, "native_id": "d78baca23e0a636a8961e17119047e63", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.004430770874023, "incorrect_loss_raw": 8.615419387817383, "correct_loss_per_char": 1.0008861541748046, "incorrect_loss_per_char": 1.5053266207377116, "correct_loss_per_token": 5.004430770874023, "incorrect_loss_per_token": 8.615419387817383, "correct_loss_uncond": -8.823691368103027, "incorrect_loss_uncond": -3.8956825733184814}, "model_output": [{"sum_logits": -8.395450592041016, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.506343841552734, "logits_per_token": -8.395450592041016, "logits_per_char": -1.6790901184082032, "num_chars": 5}, {"sum_logits": -7.048554420471191, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.289554595947266, "logits_per_token": -7.048554420471191, "logits_per_char": -1.174759070078532, "num_chars": 6}, {"sum_logits": -5.004430770874023, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -5.004430770874023, "logits_per_char": -1.0008861541748046, "num_chars": 5}, {"sum_logits": -10.536643028259277, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.326245307922363, "logits_per_token": -10.536643028259277, "logits_per_char": -2.1073286056518556, "num_chars": 5}, {"sum_logits": -8.481029510498047, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -11.922264099121094, "logits_per_token": -8.481029510498047, "logits_per_char": -1.0601286888122559, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1093, "native_id": "ac6378b5e8462dc1bde1155d706213d8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.16236686706543, "incorrect_loss_raw": 10.266832709312439, "correct_loss_per_char": 0.9301972389221191, "incorrect_loss_per_char": 0.8296655742895036, "correct_loss_per_token": 3.7207889556884766, "incorrect_loss_per_token": 3.826375901699066, "correct_loss_uncond": -7.433027267456055, "incorrect_loss_uncond": -8.041947960853577}, "model_output": [{"sum_logits": -3.9167160987854004, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.848213195800781, "logits_per_token": -1.9583580493927002, "logits_per_char": -0.3263930082321167, "num_chars": 12}, {"sum_logits": -8.264373779296875, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.38298225402832, "logits_per_token": -2.754791259765625, "logits_per_char": -1.0330467224121094, "num_chars": 8}, {"sum_logits": -11.16236686706543, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.595394134521484, "logits_per_token": -3.7207889556884766, "logits_per_char": -0.9301972389221191, "num_chars": 12}, {"sum_logits": -13.483176231384277, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.616514205932617, "logits_per_token": -6.741588115692139, "logits_per_char": -1.225743293762207, "num_chars": 11}, {"sum_logits": -15.403064727783203, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -22.387413024902344, "logits_per_token": -3.850766181945801, "logits_per_char": -0.7334792727515811, "num_chars": 21}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1094, "native_id": "c1aebf059c5102f4e773f7fe4afe13f0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.5775227546691895, "incorrect_loss_raw": 12.35992980003357, "correct_loss_per_char": 0.657752275466919, "incorrect_loss_per_char": 0.9406465191369061, "correct_loss_per_token": 6.5775227546691895, "incorrect_loss_per_token": 8.996346592903137, "correct_loss_uncond": -6.077465534210205, "incorrect_loss_uncond": -3.3481969833374023}, "model_output": [{"sum_logits": -13.27999210357666, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.32223892211914, "logits_per_token": -6.63999605178833, "logits_per_char": -1.1066660086313884, "num_chars": 12}, {"sum_logits": -13.628673553466797, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.109146118164062, "logits_per_token": -6.814336776733398, "logits_per_char": -0.801686679615694, "num_chars": 17}, {"sum_logits": -8.656892776489258, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.96721363067627, "logits_per_token": -8.656892776489258, "logits_per_char": -0.7869902524081144, "num_chars": 11}, {"sum_logits": -6.5775227546691895, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.654988288879395, "logits_per_token": -6.5775227546691895, "logits_per_char": -0.657752275466919, "num_chars": 10}, {"sum_logits": -13.874160766601562, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.433908462524414, "logits_per_token": -13.874160766601562, "logits_per_char": -1.067243135892428, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1095, "native_id": "1017807310a25d3ea4a4ec305e91cba3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.3022751808166504, "incorrect_loss_raw": 7.96835994720459, "correct_loss_per_char": 0.36691946453518337, "incorrect_loss_per_char": 1.1089944375885858, "correct_loss_per_token": 1.6511375904083252, "incorrect_loss_per_token": 6.356530070304871, "correct_loss_uncond": -12.164884090423584, "incorrect_loss_uncond": -6.0583696365356445}, "model_output": [{"sum_logits": -9.658729553222656, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.317315101623535, "logits_per_token": -9.658729553222656, "logits_per_char": -1.073192172580295, "num_chars": 9}, {"sum_logits": -8.596426010131836, "num_tokens": 4, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.90019416809082, "logits_per_token": -2.149106502532959, "logits_per_char": -0.8596426010131836, "num_chars": 10}, {"sum_logits": -3.3022751808166504, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.467159271240234, "logits_per_token": -1.6511375904083252, "logits_per_char": -0.36691946453518337, "num_chars": 9}, {"sum_logits": -6.615416049957275, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -11.481525421142578, "logits_per_token": -6.615416049957275, "logits_per_char": -1.102569341659546, "num_chars": 6}, {"sum_logits": -7.002868175506592, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.407883644104004, "logits_per_token": -7.002868175506592, "logits_per_char": -1.4005736351013183, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1096, "native_id": "7192c9f5c513aac9042bad595ff5af9f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.138856887817383, "incorrect_loss_raw": 10.97992491722107, "correct_loss_per_char": 1.0154285430908203, "incorrect_loss_per_char": 1.0917481909959745, "correct_loss_per_token": 4.569428443908691, "incorrect_loss_per_token": 6.74465811252594, "correct_loss_uncond": -11.12745475769043, "incorrect_loss_uncond": -5.069015979766846}, "model_output": [{"sum_logits": -10.037565231323242, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.124846458435059, "logits_per_token": -10.037565231323242, "logits_per_char": -0.8364637692769369, "num_chars": 12}, {"sum_logits": -14.658546447753906, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.36722183227539, "logits_per_token": -7.329273223876953, "logits_per_char": -1.1275804959810698, "num_chars": 13}, {"sum_logits": -9.435113906860352, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.048652648925781, "logits_per_token": -4.717556953430176, "logits_per_char": -1.179389238357544, "num_chars": 8}, {"sum_logits": -9.788474082946777, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.65504264831543, "logits_per_token": -4.894237041473389, "logits_per_char": -1.2235592603683472, "num_chars": 8}, {"sum_logits": -9.138856887817383, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.266311645507812, "logits_per_token": -4.569428443908691, "logits_per_char": -1.0154285430908203, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1097, "native_id": "7c05e8d5a057085455eea243fbd1cd90", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.757291078567505, "incorrect_loss_raw": 16.571141481399536, "correct_loss_per_char": 0.26837793418339323, "incorrect_loss_per_char": 1.3646089951197307, "correct_loss_per_token": 1.8786455392837524, "incorrect_loss_per_token": 8.063116192817688, "correct_loss_uncond": -15.573818922042847, "incorrect_loss_uncond": -2.3095920085906982}, "model_output": [{"sum_logits": -26.014066696166992, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -25.42670440673828, "logits_per_token": -6.503516674041748, "logits_per_char": -1.2387650807698567, "num_chars": 21}, {"sum_logits": -14.219844818115234, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.339872360229492, "logits_per_token": -7.109922409057617, "logits_per_char": -1.4219844818115235, "num_chars": 10}, {"sum_logits": -11.227396965026855, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.080846786499023, "logits_per_token": -11.227396965026855, "logits_per_char": -1.8712328275044758, "num_chars": 6}, {"sum_logits": -3.757291078567505, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.33111000061035, "logits_per_token": -1.8786455392837524, "logits_per_char": -0.26837793418339323, "num_chars": 14}, {"sum_logits": -14.823257446289062, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.67551040649414, "logits_per_token": -7.411628723144531, "logits_per_char": -0.9264535903930664, "num_chars": 16}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1098, "native_id": "3cb91a71a6567da870eedf37becc97ef", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.82262659072876, "incorrect_loss_raw": 12.779125690460205, "correct_loss_per_char": 0.6518855492273966, "incorrect_loss_per_char": 1.5084816544947, "correct_loss_per_token": 3.91131329536438, "incorrect_loss_per_token": 11.023666858673096, "correct_loss_uncond": -8.540839672088623, "incorrect_loss_uncond": -3.375406265258789}, "model_output": [{"sum_logits": -7.82262659072876, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.363466262817383, "logits_per_token": -3.91131329536438, "logits_per_char": -0.6518855492273966, "num_chars": 12}, {"sum_logits": -14.043670654296875, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.07818603515625, "logits_per_token": -7.0218353271484375, "logits_per_char": -1.2766973322088069, "num_chars": 11}, {"sum_logits": -9.754256248474121, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.317315101623535, "logits_per_token": -9.754256248474121, "logits_per_char": -1.083806249830458, "num_chars": 9}, {"sum_logits": -14.123481750488281, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -14.123481750488281, "logits_per_char": -2.3539136250813804, "num_chars": 6}, {"sum_logits": -13.195094108581543, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.151235580444336, "logits_per_token": -13.195094108581543, "logits_per_char": -1.3195094108581542, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1099, "native_id": "9b4bbf3c4d24ecdb4b27320afb706808", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.780584335327148, "incorrect_loss_raw": 11.790165901184082, "correct_loss_per_char": 1.1780584335327149, "incorrect_loss_per_char": 1.1939550755070707, "correct_loss_per_token": 3.9268614451090493, "incorrect_loss_per_token": 7.263315836588542, "correct_loss_uncond": -7.691047668457031, "incorrect_loss_uncond": -5.462536811828613}, "model_output": [{"sum_logits": -11.780584335327148, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.47163200378418, "logits_per_token": -3.9268614451090493, "logits_per_char": -1.1780584335327149, "num_chars": 10}, {"sum_logits": -8.936250686645508, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.87074089050293, "logits_per_token": -2.978750228881836, "logits_per_char": -0.744687557220459, "num_chars": 12}, {"sum_logits": -11.019296646118164, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.893854141235352, "logits_per_token": -11.019296646118164, "logits_per_char": -1.836549441019694, "num_chars": 6}, {"sum_logits": -18.224849700927734, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -22.004837036132812, "logits_per_token": -6.074949900309245, "logits_per_char": -1.0720499824075138, "num_chars": 17}, {"sum_logits": -8.980266571044922, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.241378784179688, "logits_per_token": -8.980266571044922, "logits_per_char": -1.1225333213806152, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1100, "native_id": "43df3a316880d8bab346c06bd43b94dd", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.3374111652374268, "incorrect_loss_raw": 13.640608549118042, "correct_loss_per_char": 0.5843527913093567, "incorrect_loss_per_char": 1.1983844741234075, "correct_loss_per_token": 2.3374111652374268, "incorrect_loss_per_token": 11.387033224105835, "correct_loss_uncond": -9.648789167404175, "incorrect_loss_uncond": -3.8050544261932373}, "model_output": [{"sum_logits": -9.17776107788086, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.270478248596191, "logits_per_token": -9.17776107788086, "logits_per_char": -1.5296268463134766, "num_chars": 6}, {"sum_logits": -18.028602600097656, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -26.371509552001953, "logits_per_token": -9.014301300048828, "logits_per_char": -1.060506035299862, "num_chars": 17}, {"sum_logits": -14.55353832244873, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.484378814697266, "logits_per_token": -14.55353832244873, "logits_per_char": -1.0395384516034807, "num_chars": 14}, {"sum_logits": -2.3374111652374268, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.986200332641602, "logits_per_token": -2.3374111652374268, "logits_per_char": -0.5843527913093567, "num_chars": 4}, {"sum_logits": -12.802532196044922, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.656285285949707, "logits_per_token": -12.802532196044922, "logits_per_char": -1.163866563276811, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1101, "native_id": "858a5eaa587fe0e266722228671a6bd1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.992656707763672, "incorrect_loss_raw": 11.472084283828735, "correct_loss_per_char": 0.635696064342152, "incorrect_loss_per_char": 1.2681946777162096, "correct_loss_per_token": 6.992656707763672, "incorrect_loss_per_token": 6.685869137446086, "correct_loss_uncond": -7.379673957824707, "incorrect_loss_uncond": -5.971617698669434}, "model_output": [{"sum_logits": -6.992656707763672, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.372330665588379, "logits_per_token": -6.992656707763672, "logits_per_char": -0.635696064342152, "num_chars": 11}, {"sum_logits": -10.377501487731934, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.455964088439941, "logits_per_token": -10.377501487731934, "logits_per_char": -1.0377501487731933, "num_chars": 10}, {"sum_logits": -15.739046096801758, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.903865814208984, "logits_per_token": -7.869523048400879, "logits_per_char": -2.2484351566859653, "num_chars": 7}, {"sum_logits": -11.43513298034668, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.15621566772461, "logits_per_token": -5.71756649017334, "logits_per_char": -0.9529277483622233, "num_chars": 12}, {"sum_logits": -8.33665657043457, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.25876235961914, "logits_per_token": -2.77888552347819, "logits_per_char": -0.8336656570434571, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1102, "native_id": "34005ef0caafefc8585c9fcd50e94557", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.092726707458496, "incorrect_loss_raw": 13.901575803756714, "correct_loss_per_char": 0.7923376219613212, "incorrect_loss_per_char": 1.437788447784999, "correct_loss_per_token": 5.546363353729248, "incorrect_loss_per_token": 8.684660911560059, "correct_loss_uncond": -7.969613075256348, "incorrect_loss_uncond": -1.460162878036499}, "model_output": [{"sum_logits": -11.477607727050781, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.375931739807129, "logits_per_token": -11.477607727050781, "logits_per_char": -2.295521545410156, "num_chars": 5}, {"sum_logits": -11.092726707458496, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.062339782714844, "logits_per_token": -5.546363353729248, "logits_per_char": -0.7923376219613212, "num_chars": 14}, {"sum_logits": -16.069900512695312, "num_tokens": 4, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.795400619506836, "logits_per_token": -4.017475128173828, "logits_per_char": -0.8927722507052951, "num_chars": 18}, {"sum_logits": -17.630468368530273, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -17.481952667236328, "logits_per_token": -8.815234184265137, "logits_per_char": -1.2593191691807337, "num_chars": 14}, {"sum_logits": -10.428326606750488, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.793669700622559, "logits_per_token": -10.428326606750488, "logits_per_char": -1.303540825843811, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1103, "native_id": "f61d83f90b92a8d537989e55ee70542d", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.986910820007324, "incorrect_loss_raw": 11.36432695388794, "correct_loss_per_char": 0.6351737109097567, "incorrect_loss_per_char": 1.4339038673073352, "correct_loss_per_token": 6.986910820007324, "incorrect_loss_per_token": 7.333270311355591, "correct_loss_uncond": -7.575228691101074, "incorrect_loss_uncond": -3.7560739517211914}, "model_output": [{"sum_logits": -14.718057632446289, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.055910110473633, "logits_per_token": -7.3590288162231445, "logits_per_char": -1.338005239313299, "num_chars": 11}, {"sum_logits": -5.759472846984863, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -11.129728317260742, "logits_per_token": -5.759472846984863, "logits_per_char": -0.9599121411641439, "num_chars": 6}, {"sum_logits": -17.5303955078125, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.789621353149414, "logits_per_token": -8.76519775390625, "logits_per_char": -1.9478217230902777, "num_chars": 9}, {"sum_logits": -6.986910820007324, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.562139511108398, "logits_per_token": -6.986910820007324, "logits_per_char": -0.6351737109097567, "num_chars": 11}, {"sum_logits": -7.4493818283081055, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.506343841552734, "logits_per_token": -7.4493818283081055, "logits_per_char": -1.4898763656616212, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1104, "native_id": "3bf06235a537adc9d85431846595b800", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.391163349151611, "incorrect_loss_raw": 11.647956609725952, "correct_loss_per_char": 1.0651938915252686, "incorrect_loss_per_char": 1.9582458813985189, "correct_loss_per_token": 6.391163349151611, "incorrect_loss_per_token": 11.647956609725952, "correct_loss_uncond": -7.322702884674072, "incorrect_loss_uncond": -2.3521475791931152}, "model_output": [{"sum_logits": -11.131836891174316, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.866124153137207, "logits_per_token": -11.131836891174316, "logits_per_char": -1.8553061485290527, "num_chars": 6}, {"sum_logits": -6.391163349151611, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.713866233825684, "logits_per_token": -6.391163349151611, "logits_per_char": -1.0651938915252686, "num_chars": 6}, {"sum_logits": -9.683233261108398, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.682783126831055, "logits_per_token": -9.683233261108398, "logits_per_char": -1.9366466522216796, "num_chars": 5}, {"sum_logits": -15.062752723693848, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.249432563781738, "logits_per_token": -15.062752723693848, "logits_per_char": -2.510458787282308, "num_chars": 6}, {"sum_logits": -10.714003562927246, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.20207691192627, "logits_per_token": -10.714003562927246, "logits_per_char": -1.5305719375610352, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1105, "native_id": "79ec11d8072ce42779adfe0a19bd5374", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.02652645111084, "incorrect_loss_raw": 12.206364631652832, "correct_loss_per_char": 1.00294738345676, "incorrect_loss_per_char": 0.9645708931816948, "correct_loss_per_token": 9.02652645111084, "incorrect_loss_per_token": 6.258426944414775, "correct_loss_uncond": -4.0164031982421875, "incorrect_loss_uncond": -4.487459421157837}, "model_output": [{"sum_logits": -15.31293773651123, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.601680755615234, "logits_per_token": -7.656468868255615, "logits_per_char": -1.0208625157674154, "num_chars": 15}, {"sum_logits": -10.314325332641602, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.50890350341797, "logits_per_token": -3.438108444213867, "logits_per_char": -1.03143253326416, "num_chars": 10}, {"sum_logits": -13.88859748840332, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.990819931030273, "logits_per_token": -4.62953249613444, "logits_per_char": -0.7715887493557401, "num_chars": 18}, {"sum_logits": -9.309597969055176, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.6738920211792, "logits_per_token": -9.309597969055176, "logits_per_char": -1.034399774339464, "num_chars": 9}, {"sum_logits": -9.02652645111084, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.042929649353027, "logits_per_token": -9.02652645111084, "logits_per_char": -1.00294738345676, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1106, "native_id": "2982d0eae1bf880f5930341af7665716", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.111048698425293, "incorrect_loss_raw": 11.49071979522705, "correct_loss_per_char": 0.6111048698425293, "incorrect_loss_per_char": 1.2065713736746047, "correct_loss_per_token": 6.111048698425293, "incorrect_loss_per_token": 6.081020633379619, "correct_loss_uncond": -7.692811012268066, "incorrect_loss_uncond": -4.035830497741699}, "model_output": [{"sum_logits": -13.59778118133545, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -6.798890590667725, "logits_per_char": -1.5108645757039387, "num_chars": 9}, {"sum_logits": -11.129929542541504, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.835020065307617, "logits_per_token": -3.7099765141805015, "logits_per_char": -1.236658838060167, "num_chars": 9}, {"sum_logits": -11.129929542541504, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.835020065307617, "logits_per_token": -3.7099765141805015, "logits_per_char": -1.236658838060167, "num_chars": 9}, {"sum_logits": -6.111048698425293, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.80385971069336, "logits_per_token": -6.111048698425293, "logits_per_char": -0.6111048698425293, "num_chars": 10}, {"sum_logits": -10.105238914489746, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -10.105238914489746, "logits_per_char": -0.8421032428741455, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1107, "native_id": "ba9132ebf2bc3ad21e6a0631dc4e0a77", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.238076210021973, "incorrect_loss_raw": 7.7356812953948975, "correct_loss_per_char": 0.5670978372747247, "incorrect_loss_per_char": 1.1372574784538962, "correct_loss_per_token": 3.1190381050109863, "incorrect_loss_per_token": 7.7356812953948975, "correct_loss_uncond": -13.236336708068848, "incorrect_loss_uncond": -5.580337285995483}, "model_output": [{"sum_logits": -11.332003593444824, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.641080856323242, "logits_per_token": -11.332003593444824, "logits_per_char": -1.8886672655741374, "num_chars": 6}, {"sum_logits": -7.208124160766602, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.953621864318848, "logits_per_token": -7.208124160766602, "logits_per_char": -0.6552840146151456, "num_chars": 11}, {"sum_logits": -9.797718048095703, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.321028709411621, "logits_per_token": -9.797718048095703, "logits_per_char": -1.6329530080159504, "num_chars": 6}, {"sum_logits": -2.604879379272461, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.348342895507812, "logits_per_token": -2.604879379272461, "logits_per_char": -0.37212562561035156, "num_chars": 7}, {"sum_logits": -6.238076210021973, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.47441291809082, "logits_per_token": -3.1190381050109863, "logits_per_char": -0.5670978372747247, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1108, "native_id": "d06de16a4aaeaef32b398c1213257b4a", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.644013404846191, "incorrect_loss_raw": 18.46912431716919, "correct_loss_per_char": 0.6261184355791878, "incorrect_loss_per_char": 1.385551724650643, "correct_loss_per_token": 3.548004468282064, "incorrect_loss_per_token": 8.316752672195435, "correct_loss_uncond": -11.67133617401123, "incorrect_loss_uncond": -0.7932572364807129}, "model_output": [{"sum_logits": -22.027427673339844, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.910371780395508, "logits_per_token": -7.342475891113281, "logits_per_char": -1.4684951782226563, "num_chars": 15}, {"sum_logits": -17.100528717041016, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.468902587890625, "logits_per_token": -8.550264358520508, "logits_per_char": -1.0687830448150635, "num_chars": 16}, {"sum_logits": -10.644013404846191, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -22.315349578857422, "logits_per_token": -3.548004468282064, "logits_per_char": -0.6261184355791878, "num_chars": 17}, {"sum_logits": -20.331905364990234, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.71221160888672, "logits_per_token": -10.165952682495117, "logits_per_char": -1.6943254470825195, "num_chars": 12}, {"sum_logits": -14.416635513305664, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.958040237426758, "logits_per_token": -7.208317756652832, "logits_per_char": -1.310603228482333, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1109, "native_id": "eee9476bf29498b7d74b043afe316fc6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.392817497253418, "incorrect_loss_raw": 12.610625267028809, "correct_loss_per_char": 0.898802916208903, "incorrect_loss_per_char": 1.314063504210904, "correct_loss_per_token": 5.392817497253418, "incorrect_loss_per_token": 7.135069211324057, "correct_loss_uncond": -8.589282989501953, "incorrect_loss_uncond": -2.828528881072998}, "model_output": [{"sum_logits": -19.59621810913086, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.72678565979004, "logits_per_token": -6.53207270304362, "logits_per_char": -1.507401393010066, "num_chars": 13}, {"sum_logits": -11.802383422851562, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.290284156799316, "logits_per_token": -11.802383422851562, "logits_per_char": -1.3113759358723958, "num_chars": 9}, {"sum_logits": -13.257118225097656, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.835020065307617, "logits_per_token": -4.419039408365886, "logits_per_char": -1.4730131361219618, "num_chars": 9}, {"sum_logits": -5.392817497253418, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.982100486755371, "logits_per_token": -5.392817497253418, "logits_per_char": -0.898802916208903, "num_chars": 6}, {"sum_logits": -5.786781311035156, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.904526710510254, "logits_per_token": -5.786781311035156, "logits_per_char": -0.9644635518391927, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1110, "native_id": "a85441d6a0e3f871d81a9f19b31360b7", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.185725212097168, "incorrect_loss_raw": 10.60383415222168, "correct_loss_per_char": 0.7441568374633789, "incorrect_loss_per_char": 0.9239412256649563, "correct_loss_per_token": 4.092862606048584, "incorrect_loss_per_token": 5.811294158299764, "correct_loss_uncond": -11.201922416687012, "incorrect_loss_uncond": -6.961521625518799}, "model_output": [{"sum_logits": -11.190983772277832, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.72293472290039, "logits_per_token": -5.595491886138916, "logits_per_char": -1.1190983772277832, "num_chars": 10}, {"sum_logits": -14.341728210449219, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.046890258789062, "logits_per_token": -4.780576070149739, "logits_per_char": -0.8963580131530762, "num_chars": 16}, {"sum_logits": -8.855592727661133, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.874704360961914, "logits_per_token": -8.855592727661133, "logits_per_char": -1.1069490909576416, "num_chars": 8}, {"sum_logits": -8.185725212097168, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.38764762878418, "logits_per_token": -4.092862606048584, "logits_per_char": -0.7441568374633789, "num_chars": 11}, {"sum_logits": -8.027031898498535, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.616893768310547, "logits_per_token": -4.013515949249268, "logits_per_char": -0.5733594213213239, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1111, "native_id": "f11a2975898033893d6a38f75d791fdf", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.61107063293457, "incorrect_loss_raw": 11.644697666168213, "correct_loss_per_char": 1.6527676582336426, "incorrect_loss_per_char": 0.9611648303830725, "correct_loss_per_token": 6.61107063293457, "incorrect_loss_per_token": 5.184483528137207, "correct_loss_uncond": -5.530178070068359, "incorrect_loss_uncond": -6.794782638549805}, "model_output": [{"sum_logits": -15.308767318725586, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.07769203186035, "logits_per_token": -5.102922439575195, "logits_per_char": -1.1775974860558143, "num_chars": 13}, {"sum_logits": -6.820788383483887, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.5550479888916, "logits_per_token": -3.4103941917419434, "logits_per_char": -0.5246760294987605, "num_chars": 13}, {"sum_logits": -9.3959321975708, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.488216400146484, "logits_per_token": -4.6979660987854, "logits_per_char": -0.46979660987854005, "num_chars": 20}, {"sum_logits": -15.053302764892578, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.636964797973633, "logits_per_token": -7.526651382446289, "logits_per_char": -1.6725891960991754, "num_chars": 9}, {"sum_logits": -6.61107063293457, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.14124870300293, "logits_per_token": -6.61107063293457, "logits_per_char": -1.6527676582336426, "num_chars": 4}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1112, "native_id": "a2977fd575faba162d04a490dabd1b9b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.410737991333008, "incorrect_loss_raw": 8.524300336837769, "correct_loss_per_char": 0.34107379913330077, "incorrect_loss_per_char": 1.022637956766855, "correct_loss_per_token": 3.410737991333008, "incorrect_loss_per_token": 6.2456817626953125, "correct_loss_uncond": -10.346187591552734, "incorrect_loss_uncond": -7.191141128540039}, "model_output": [{"sum_logits": -4.976934432983398, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.206927299499512, "logits_per_token": -4.976934432983398, "logits_per_char": -0.7109906332833427, "num_chars": 7}, {"sum_logits": -6.2052812576293945, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.327664375305176, "logits_per_token": -6.2052812576293945, "logits_per_char": -0.7756601572036743, "num_chars": 8}, {"sum_logits": -3.410737991333008, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.756925582885742, "logits_per_token": -3.410737991333008, "logits_per_char": -0.34107379913330077, "num_chars": 10}, {"sum_logits": -4.686037063598633, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.564410209655762, "logits_per_token": -4.686037063598633, "logits_per_char": -0.7810061772664388, "num_chars": 6}, {"sum_logits": -18.22894859313965, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -23.76276397705078, "logits_per_token": -9.114474296569824, "logits_per_char": -1.8228948593139649, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1113, "native_id": "cd39e442204d3edf7acc185fd59c8a44", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.128602027893066, "incorrect_loss_raw": 7.059588670730591, "correct_loss_per_char": 0.6410752534866333, "incorrect_loss_per_char": 0.920944162571069, "correct_loss_per_token": 5.128602027893066, "incorrect_loss_per_token": 5.33602249622345, "correct_loss_uncond": -9.95816707611084, "incorrect_loss_uncond": -7.3662497997283936}, "model_output": [{"sum_logits": -6.1443681716918945, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.77853012084961, "logits_per_token": -6.1443681716918945, "logits_per_char": -1.0240613619486492, "num_chars": 6}, {"sum_logits": -4.907073020935059, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -11.10049819946289, "logits_per_token": -4.907073020935059, "logits_per_char": -0.9814146041870118, "num_chars": 5}, {"sum_logits": -13.788529396057129, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -19.389339447021484, "logits_per_token": -6.8942646980285645, "logits_per_char": -1.2535026723688298, "num_chars": 11}, {"sum_logits": -5.128602027893066, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.086769104003906, "logits_per_token": -5.128602027893066, "logits_per_char": -0.6410752534866333, "num_chars": 8}, {"sum_logits": -3.3983840942382812, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.434986114501953, "logits_per_token": -3.3983840942382812, "logits_per_char": -0.42479801177978516, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1114, "native_id": "c77e1039d78cdff197a370fcda0f2b9f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.9093246459960938, "incorrect_loss_raw": 12.659615755081177, "correct_loss_per_char": 0.3182207743326823, "incorrect_loss_per_char": 2.036000200680324, "correct_loss_per_token": 1.9093246459960938, "incorrect_loss_per_token": 12.659615755081177, "correct_loss_uncond": -13.288569450378418, "incorrect_loss_uncond": -1.2429826259613037}, "model_output": [{"sum_logits": -1.9093246459960938, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -15.197894096374512, "logits_per_token": -1.9093246459960938, "logits_per_char": -0.3182207743326823, "num_chars": 6}, {"sum_logits": -12.421207427978516, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.317763328552246, "logits_per_token": -12.421207427978516, "logits_per_char": -1.7744582039969308, "num_chars": 7}, {"sum_logits": -11.859172821044922, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.893854141235352, "logits_per_token": -11.859172821044922, "logits_per_char": -1.976528803507487, "num_chars": 6}, {"sum_logits": -11.859172821044922, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.893854141235352, "logits_per_token": -11.859172821044922, "logits_per_char": -1.976528803507487, "num_chars": 6}, {"sum_logits": -14.498909950256348, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.504921913146973, "logits_per_token": -14.498909950256348, "logits_per_char": -2.416484991709391, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1115, "native_id": "f537f6bb8527724e0b1e1c1051326cd5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 12.030311584472656, "incorrect_loss_raw": 11.950100660324097, "correct_loss_per_char": 1.3367012871636286, "incorrect_loss_per_char": 1.2597543166233942, "correct_loss_per_token": 6.015155792236328, "incorrect_loss_per_token": 8.34760594367981, "correct_loss_uncond": -7.587398529052734, "incorrect_loss_uncond": -4.298241138458252}, "model_output": [{"sum_logits": -7.421138763427734, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -7.421138763427734, "logits_per_char": -0.9276423454284668, "num_chars": 8}, {"sum_logits": -11.559306144714355, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.239578247070312, "logits_per_token": -11.559306144714355, "logits_per_char": -2.3118612289428713, "num_chars": 5}, {"sum_logits": -13.316301345825195, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.97499656677246, "logits_per_token": -6.658150672912598, "logits_per_char": -1.0243308727557843, "num_chars": 13}, {"sum_logits": -12.030311584472656, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.61771011352539, "logits_per_token": -6.015155792236328, "logits_per_char": -1.3367012871636286, "num_chars": 9}, {"sum_logits": -15.503656387329102, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.201946258544922, "logits_per_token": -7.751828193664551, "logits_per_char": -0.7751828193664551, "num_chars": 20}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1116, "native_id": "d3b145911a76fd6fbe9a23ab027be024", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.431494235992432, "incorrect_loss_raw": 6.906843543052673, "correct_loss_per_char": 0.7759277479989188, "incorrect_loss_per_char": 1.3056812822818755, "correct_loss_per_token": 5.431494235992432, "incorrect_loss_per_token": 6.906843543052673, "correct_loss_uncond": -7.177042484283447, "incorrect_loss_uncond": -6.777500510215759}, "model_output": [{"sum_logits": -5.431494235992432, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.608536720275879, "logits_per_token": -5.431494235992432, "logits_per_char": -0.7759277479989188, "num_chars": 7}, {"sum_logits": -4.414244174957275, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.136726379394531, "logits_per_token": -4.414244174957275, "logits_per_char": -0.8828488349914551, "num_chars": 5}, {"sum_logits": -6.443827152252197, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.616278648376465, "logits_per_token": -6.443827152252197, "logits_per_char": -1.2887654304504395, "num_chars": 5}, {"sum_logits": -10.651338577270508, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.858223915100098, "logits_per_token": -10.651338577270508, "logits_per_char": -1.5216197967529297, "num_chars": 7}, {"sum_logits": -6.117964267730713, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.126147270202637, "logits_per_token": -6.117964267730713, "logits_per_char": -1.5294910669326782, "num_chars": 4}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1117, "native_id": "dc2fa76467ff342abdb4cf142f92dddd", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.491807460784912, "incorrect_loss_raw": 9.703383684158325, "correct_loss_per_char": 0.17798624719892228, "incorrect_loss_per_char": 0.8200752678371611, "correct_loss_per_token": 1.245903730392456, "incorrect_loss_per_token": 4.126728534698486, "correct_loss_uncond": -13.385176181793213, "incorrect_loss_uncond": -10.557167291641235}, "model_output": [{"sum_logits": -11.59941291809082, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.99085807800293, "logits_per_token": -2.899853229522705, "logits_per_char": -0.8285294941493443, "num_chars": 14}, {"sum_logits": -2.491807460784912, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -15.876983642578125, "logits_per_token": -1.245903730392456, "logits_per_char": -0.17798624719892228, "num_chars": 14}, {"sum_logits": -10.148611068725586, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.587451934814453, "logits_per_token": -5.074305534362793, "logits_per_char": -0.8457175890604655, "num_chars": 12}, {"sum_logits": -6.621411323547363, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.746070861816406, "logits_per_token": -3.3107056617736816, "logits_per_char": -0.7357123692830404, "num_chars": 9}, {"sum_logits": -10.444099426269531, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.717823028564453, "logits_per_token": -5.222049713134766, "logits_per_char": -0.8703416188557943, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1118, "native_id": "246249cd7976358051a9811ff9c30736", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.392333030700684, "incorrect_loss_raw": 14.250348925590515, "correct_loss_per_char": 1.2784666061401366, "incorrect_loss_per_char": 1.535839198296998, "correct_loss_per_token": 6.392333030700684, "incorrect_loss_per_token": 8.711534559726715, "correct_loss_uncond": -7.191396713256836, "incorrect_loss_uncond": -2.6282745599746704}, "model_output": [{"sum_logits": -21.15045166015625, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.089855194091797, "logits_per_token": -10.575225830078125, "logits_per_char": -1.6269578200120192, "num_chars": 13}, {"sum_logits": -16.795347213745117, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.646121978759766, "logits_per_token": -8.397673606872559, "logits_per_char": -1.399612267812093, "num_chars": 12}, {"sum_logits": -6.364716053009033, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.095733642578125, "logits_per_token": -3.1823580265045166, "logits_per_char": -0.5786105502735485, "num_chars": 11}, {"sum_logits": -6.392333030700684, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.58372974395752, "logits_per_token": -6.392333030700684, "logits_per_char": -1.2784666061401366, "num_chars": 5}, {"sum_logits": -12.69088077545166, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.682783126831055, "logits_per_token": -12.69088077545166, "logits_per_char": -2.538176155090332, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1119, "native_id": "32be8cbc1b5a967310bcab8b80563481", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.4793853759765625, "incorrect_loss_raw": 12.736956596374512, "correct_loss_per_char": 0.34793853759765625, "incorrect_loss_per_char": 1.2183070321488223, "correct_loss_per_token": 1.7396926879882812, "incorrect_loss_per_token": 6.558272043863933, "correct_loss_uncond": -11.784721374511719, "incorrect_loss_uncond": -4.545115947723389}, "model_output": [{"sum_logits": -16.39747428894043, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.36315155029297, "logits_per_token": -5.4658247629801435, "logits_per_char": -0.9645573111141429, "num_chars": 17}, {"sum_logits": -3.4793853759765625, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.264106750488281, "logits_per_token": -1.7396926879882812, "logits_per_char": -0.34793853759765625, "num_chars": 10}, {"sum_logits": -10.608821868896484, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.890987396240234, "logits_per_token": -5.304410934448242, "logits_per_char": -1.3261027336120605, "num_chars": 8}, {"sum_logits": -16.957355499267578, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -21.514148712158203, "logits_per_token": -8.478677749633789, "logits_per_char": -1.8841506110297308, "num_chars": 9}, {"sum_logits": -6.984174728393555, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -11.360002517700195, "logits_per_token": -6.984174728393555, "logits_per_char": -0.6984174728393555, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1120, "native_id": "ad769851a59375865607452d3bf2a45d", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.909889221191406, "incorrect_loss_raw": 11.069064617156982, "correct_loss_per_char": 0.7424907684326172, "incorrect_loss_per_char": 1.966670999924342, "correct_loss_per_token": 4.454944610595703, "incorrect_loss_per_token": 11.069064617156982, "correct_loss_uncond": -5.806463241577148, "incorrect_loss_uncond": -0.7695236206054688}, "model_output": [{"sum_logits": -10.959569931030273, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.25329303741455, "logits_per_token": -10.959569931030273, "logits_per_char": -2.7398924827575684, "num_chars": 4}, {"sum_logits": -8.909889221191406, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.716352462768555, "logits_per_token": -4.454944610595703, "logits_per_char": -0.7424907684326172, "num_chars": 12}, {"sum_logits": -10.959569931030273, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.25329303741455, "logits_per_token": -10.959569931030273, "logits_per_char": -2.7398924827575684, "num_chars": 4}, {"sum_logits": -12.57133960723877, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.332226753234863, "logits_per_token": -12.57133960723877, "logits_per_char": -1.5714174509048462, "num_chars": 8}, {"sum_logits": -9.785778999328613, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.51554012298584, "logits_per_token": -9.785778999328613, "logits_per_char": -0.8154815832773844, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1121, "native_id": "5ea6b94d1a911365b06cf776919413e8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 2.7269701957702637, "incorrect_loss_raw": 8.01345181465149, "correct_loss_per_char": 0.20976693813617414, "incorrect_loss_per_char": 0.8207533977076433, "correct_loss_per_token": 2.7269701957702637, "incorrect_loss_per_token": 5.341252525647482, "correct_loss_uncond": -12.537217617034912, "incorrect_loss_uncond": -8.501246452331543}, "model_output": [{"sum_logits": -11.029196739196777, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.671432495117188, "logits_per_token": -5.514598369598389, "logits_per_char": -1.2254663043551974, "num_chars": 9}, {"sum_logits": -2.7269701957702637, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.264187812805176, "logits_per_token": -2.7269701957702637, "logits_per_char": -0.20976693813617414, "num_chars": 13}, {"sum_logits": -5.984933853149414, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.550498962402344, "logits_per_token": -5.984933853149414, "logits_per_char": -0.6649926503499349, "num_chars": 9}, {"sum_logits": -7.761298179626465, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.694089889526367, "logits_per_token": -2.5870993932088218, "logits_per_char": -0.35278628089211206, "num_chars": 22}, {"sum_logits": -7.278378486633301, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.14277172088623, "logits_per_token": -7.278378486633301, "logits_per_char": -1.0397683552333288, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1122, "native_id": "820df15b615d221e38a71fcc44461085", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.489980697631836, "incorrect_loss_raw": 10.420149087905884, "correct_loss_per_char": 0.2489980697631836, "incorrect_loss_per_char": 0.7905330856641133, "correct_loss_per_token": 2.489980697631836, "incorrect_loss_per_token": 5.210074543952942, "correct_loss_uncond": -13.724943161010742, "incorrect_loss_uncond": -9.190743207931519}, "model_output": [{"sum_logits": -7.496977806091309, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.705928802490234, "logits_per_token": -3.7484889030456543, "logits_per_char": -0.624748150507609, "num_chars": 12}, {"sum_logits": -8.947364807128906, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.95168685913086, "logits_per_token": -4.473682403564453, "logits_per_char": -0.7456137339274088, "num_chars": 12}, {"sum_logits": -6.561212539672852, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.781972885131836, "logits_per_token": -3.280606269836426, "logits_per_char": -0.5467677116394043, "num_chars": 12}, {"sum_logits": -2.489980697631836, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.214923858642578, "logits_per_token": -2.489980697631836, "logits_per_char": -0.2489980697631836, "num_chars": 10}, {"sum_logits": -18.67504119873047, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.00398063659668, "logits_per_token": -9.337520599365234, "logits_per_char": -1.2450027465820312, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1123, "native_id": "0a4a00ba435397c4a0496dd2c2426be7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.105558395385742, "incorrect_loss_raw": 4.048574447631836, "correct_loss_per_char": 0.729365485055106, "incorrect_loss_per_char": 0.7687542353357587, "correct_loss_per_token": 2.552779197692871, "incorrect_loss_per_token": 4.048574447631836, "correct_loss_uncond": -9.303497314453125, "incorrect_loss_uncond": -7.8708367347717285}, "model_output": [{"sum_logits": -3.068681240081787, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.759605407714844, "logits_per_token": -3.068681240081787, "logits_per_char": -0.6137362480163574, "num_chars": 5}, {"sum_logits": -5.9091386795043945, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.9099760055542, "logits_per_token": -5.9091386795043945, "logits_per_char": -0.8441626685006278, "num_chars": 7}, {"sum_logits": -3.7400288581848145, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -9.575843811035156, "logits_per_token": -3.7400288581848145, "logits_per_char": -0.7480057716369629, "num_chars": 5}, {"sum_logits": -3.4764490127563477, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.432219505310059, "logits_per_token": -3.4764490127563477, "logits_per_char": -0.8691122531890869, "num_chars": 4}, {"sum_logits": -5.105558395385742, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.409055709838867, "logits_per_token": -2.552779197692871, "logits_per_char": -0.729365485055106, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1124, "native_id": "a7f29f4aebe0e3bcb77038fea71bf28c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.9228079319000244, "incorrect_loss_raw": 9.777676820755005, "correct_loss_per_char": 0.49035099148750305, "incorrect_loss_per_char": 1.1676229452475524, "correct_loss_per_token": 3.9228079319000244, "incorrect_loss_per_token": 8.424427111943563, "correct_loss_uncond": -10.029152631759644, "incorrect_loss_uncond": -5.182257652282715}, "model_output": [{"sum_logits": -8.049102783203125, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.315649032592773, "logits_per_token": -8.049102783203125, "logits_per_char": -1.3415171305338542, "num_chars": 6}, {"sum_logits": -8.119498252868652, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.046520233154297, "logits_per_token": -2.7064994176228843, "logits_per_char": -1.0149372816085815, "num_chars": 8}, {"sum_logits": -3.9228079319000244, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.951960563659668, "logits_per_token": -3.9228079319000244, "logits_per_char": -0.49035099148750305, "num_chars": 8}, {"sum_logits": -11.517498970031738, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.187653541564941, "logits_per_token": -11.517498970031738, "logits_per_char": -0.8859614592332107, "num_chars": 13}, {"sum_logits": -11.424607276916504, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.289915084838867, "logits_per_token": -11.424607276916504, "logits_per_char": -1.428075909614563, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1125, "native_id": "ecd32cc0c17d4738a27bba3399f04591", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.289580345153809, "incorrect_loss_raw": 10.248813033103943, "correct_loss_per_char": 0.23831001917521158, "incorrect_loss_per_char": 1.1695770075178555, "correct_loss_per_token": 2.1447901725769043, "incorrect_loss_per_token": 5.907854437828064, "correct_loss_uncond": -14.361285209655762, "incorrect_loss_uncond": -6.123343110084534}, "model_output": [{"sum_logits": -6.26758337020874, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.113859176635742, "logits_per_token": -6.26758337020874, "logits_per_char": -0.6963981522454156, "num_chars": 9}, {"sum_logits": -11.621896743774414, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.121906280517578, "logits_per_token": -5.810948371887207, "logits_per_char": -0.8939920572134165, "num_chars": 13}, {"sum_logits": -4.289580345153809, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.65086555480957, "logits_per_token": -2.1447901725769043, "logits_per_char": -0.23831001917521158, "num_chars": 18}, {"sum_logits": -9.963744163513184, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.204988479614258, "logits_per_token": -4.981872081756592, "logits_per_char": -1.9927488327026368, "num_chars": 5}, {"sum_logits": -13.142027854919434, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.047870635986328, "logits_per_token": -6.571013927459717, "logits_per_char": -1.0951689879099529, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1126, "native_id": "8b2af2d865b7dc500427786c846eacaf", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.856448650360107, "incorrect_loss_raw": 8.212979793548584, "correct_loss_per_char": 0.48564486503601073, "incorrect_loss_per_char": 0.6983392473423119, "correct_loss_per_token": 2.4282243251800537, "incorrect_loss_per_token": 5.745875597000122, "correct_loss_uncond": -13.646610736846924, "incorrect_loss_uncond": -7.519085645675659}, "model_output": [{"sum_logits": -9.695720672607422, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.11849594116211, "logits_per_token": -4.847860336303711, "logits_per_char": -0.8079767227172852, "num_chars": 12}, {"sum_logits": -3.628305435180664, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.357250213623047, "logits_per_token": -3.628305435180664, "logits_per_char": -0.453538179397583, "num_chars": 8}, {"sum_logits": -9.486780166625977, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.941094398498535, "logits_per_token": -9.486780166625977, "logits_per_char": -0.8624345606023615, "num_chars": 11}, {"sum_logits": -4.856448650360107, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.50305938720703, "logits_per_token": -2.4282243251800537, "logits_per_char": -0.48564486503601073, "num_chars": 10}, {"sum_logits": -10.041112899780273, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -16.51142120361328, "logits_per_token": -5.020556449890137, "logits_per_char": -0.6694075266520182, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1127, "native_id": "383282aace64dd49138bac2392f8b38e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.525849342346191, "incorrect_loss_raw": 10.257176160812378, "correct_loss_per_char": 0.8157311677932739, "incorrect_loss_per_char": 1.4092415344147455, "correct_loss_per_token": 6.525849342346191, "incorrect_loss_per_token": 7.146687746047974, "correct_loss_uncond": -7.909136772155762, "incorrect_loss_uncond": -4.97450852394104}, "model_output": [{"sum_logits": -8.834588050842285, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.904526710510254, "logits_per_token": -8.834588050842285, "logits_per_char": -1.4724313418070476, "num_chars": 6}, {"sum_logits": -6.525849342346191, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.434986114501953, "logits_per_token": -6.525849342346191, "logits_per_char": -0.8157311677932739, "num_chars": 8}, {"sum_logits": -9.713245391845703, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -17.265960693359375, "logits_per_token": -4.856622695922852, "logits_per_char": -1.618874231974284, "num_chars": 6}, {"sum_logits": -15.170661926269531, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -16.041133880615234, "logits_per_token": -7.585330963134766, "logits_per_char": -1.0836187090192522, "num_chars": 14}, {"sum_logits": -7.310209274291992, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.715117454528809, "logits_per_token": -7.310209274291992, "logits_per_char": -1.4620418548583984, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1128, "native_id": "eaf6838d29bcd4ebf408da2f75aa65c3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.545831680297852, "incorrect_loss_raw": 8.268121004104614, "correct_loss_per_char": 0.924305280049642, "incorrect_loss_per_char": 0.985367008617946, "correct_loss_per_token": 5.545831680297852, "incorrect_loss_per_token": 8.268121004104614, "correct_loss_uncond": -7.391714096069336, "incorrect_loss_uncond": -5.103172302246094}, "model_output": [{"sum_logits": -9.606134414672852, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.892389297485352, "logits_per_token": -9.606134414672852, "logits_per_char": -1.2007668018341064, "num_chars": 8}, {"sum_logits": -5.545831680297852, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.937545776367188, "logits_per_token": -5.545831680297852, "logits_per_char": -0.924305280049642, "num_chars": 6}, {"sum_logits": -6.912387847900391, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.88126277923584, "logits_per_token": -6.912387847900391, "logits_per_char": -0.9874839782714844, "num_chars": 7}, {"sum_logits": -6.27850341796875, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.009203910827637, "logits_per_token": -6.27850341796875, "logits_per_char": -0.8969290597098214, "num_chars": 7}, {"sum_logits": -10.275458335876465, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.702317237854004, "logits_per_token": -10.275458335876465, "logits_per_char": -0.8562881946563721, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1129, "native_id": "7c8bc9c0e56389eef033bca40c88c151", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.683422088623047, "incorrect_loss_raw": 9.822516441345215, "correct_loss_per_char": 0.2236185073852539, "incorrect_loss_per_char": 1.4917518774668375, "correct_loss_per_token": 1.3417110443115234, "incorrect_loss_per_token": 8.375600099563599, "correct_loss_uncond": -13.974845886230469, "incorrect_loss_uncond": -4.68606424331665}, "model_output": [{"sum_logits": -11.57533073425293, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.885549545288086, "logits_per_token": -5.787665367126465, "logits_per_char": -1.1575330734252929, "num_chars": 10}, {"sum_logits": -13.343685150146484, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.349469184875488, "logits_per_token": -13.343685150146484, "logits_per_char": -2.223947525024414, "num_chars": 6}, {"sum_logits": -8.660491943359375, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.698805809020996, "logits_per_token": -8.660491943359375, "logits_per_char": -1.4434153238932292, "num_chars": 6}, {"sum_logits": -2.683422088623047, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.658267974853516, "logits_per_token": -1.3417110443115234, "logits_per_char": -0.2236185073852539, "num_chars": 12}, {"sum_logits": -5.71055793762207, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -11.10049819946289, "logits_per_token": -5.71055793762207, "logits_per_char": -1.142111587524414, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1130, "native_id": "ca60a46c9007e4b6213f50bfb5342fdd", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 15.01469898223877, "incorrect_loss_raw": 9.06408941745758, "correct_loss_per_char": 1.2512249151865642, "incorrect_loss_per_char": 1.2539152428271279, "correct_loss_per_token": 5.004899660746257, "incorrect_loss_per_token": 9.06408941745758, "correct_loss_uncond": -6.961333274841309, "incorrect_loss_uncond": -4.23151433467865}, "model_output": [{"sum_logits": -11.68895435333252, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.949599266052246, "logits_per_token": -11.68895435333252, "logits_per_char": -1.461119294166565, "num_chars": 8}, {"sum_logits": -15.01469898223877, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -21.976032257080078, "logits_per_token": -5.004899660746257, "logits_per_char": -1.2512249151865642, "num_chars": 12}, {"sum_logits": -8.598408699035645, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.224496841430664, "logits_per_token": -8.598408699035645, "logits_per_char": -0.9553787443372939, "num_chars": 9}, {"sum_logits": -10.406129837036133, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.907820701599121, "logits_per_token": -10.406129837036133, "logits_per_char": -1.4865899767194475, "num_chars": 7}, {"sum_logits": -5.562864780426025, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.10049819946289, "logits_per_token": -5.562864780426025, "logits_per_char": -1.1125729560852051, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1131, "native_id": "f50209f04d11690d7c8f30e29b35ff02", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.5559258460998535, "incorrect_loss_raw": 10.722448825836182, "correct_loss_per_char": 0.5050841678272594, "incorrect_loss_per_char": 0.6961344063115052, "correct_loss_per_token": 2.7779629230499268, "incorrect_loss_per_token": 3.379360695679982, "correct_loss_uncond": -13.112071514129639, "incorrect_loss_uncond": -9.658087730407715}, "model_output": [{"sum_logits": -11.534563064575195, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.605184555053711, "logits_per_token": -3.8448543548583984, "logits_per_char": -0.9612135887145996, "num_chars": 12}, {"sum_logits": -5.5559258460998535, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.667997360229492, "logits_per_token": -2.7779629230499268, "logits_per_char": -0.5050841678272594, "num_chars": 11}, {"sum_logits": -9.349867820739746, "num_tokens": 4, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.732297897338867, "logits_per_token": -2.3374669551849365, "logits_per_char": -0.6678477014814105, "num_chars": 14}, {"sum_logits": -9.603464126586914, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -22.489356994628906, "logits_per_token": -3.2011547088623047, "logits_per_char": -0.5649096545051125, "num_chars": 17}, {"sum_logits": -12.401900291442871, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -26.6953067779541, "logits_per_token": -4.13396676381429, "logits_per_char": -0.5905666805448986, "num_chars": 21}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1132, "native_id": "d725f1c2e150a3221de31612123f3f46", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.918367385864258, "incorrect_loss_raw": 12.89456844329834, "correct_loss_per_char": 0.6575963762071397, "incorrect_loss_per_char": 0.87592370027031, "correct_loss_per_token": 2.959183692932129, "incorrect_loss_per_token": 5.361802260080974, "correct_loss_uncond": -11.827703475952148, "incorrect_loss_uncond": -8.814489126205444}, "model_output": [{"sum_logits": -5.918367385864258, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.746070861816406, "logits_per_token": -2.959183692932129, "logits_per_char": -0.6575963762071397, "num_chars": 9}, {"sum_logits": -14.765277862548828, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -23.38888931274414, "logits_per_token": -4.921759287516276, "logits_per_char": -0.8685457566205192, "num_chars": 17}, {"sum_logits": -10.760858535766602, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.57349395751953, "logits_per_token": -5.380429267883301, "logits_per_char": -0.5978254742092557, "num_chars": 18}, {"sum_logits": -19.876155853271484, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -27.591163635253906, "logits_per_token": -4.969038963317871, "logits_per_char": -1.4197254180908203, "num_chars": 14}, {"sum_logits": -6.175981521606445, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.282683372497559, "logits_per_token": -6.175981521606445, "logits_per_char": -0.6175981521606445, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1133, "native_id": "f7735d721dfdc94621154951d4eaa4cf", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.630217552185059, "incorrect_loss_raw": 8.949504137039185, "correct_loss_per_char": 0.5630217552185058, "incorrect_loss_per_char": 1.2571017643073936, "correct_loss_per_token": 5.630217552185059, "incorrect_loss_per_token": 8.949504137039185, "correct_loss_uncond": -7.104290008544922, "incorrect_loss_uncond": -4.551271677017212}, "model_output": [{"sum_logits": -11.135486602783203, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.695878028869629, "logits_per_token": -11.135486602783203, "logits_per_char": -1.012316963889382, "num_chars": 11}, {"sum_logits": -8.625251770019531, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.132861137390137, "logits_per_token": -8.625251770019531, "logits_per_char": -1.7250503540039062, "num_chars": 5}, {"sum_logits": -5.630217552185059, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -5.630217552185059, "logits_per_char": -0.5630217552185058, "num_chars": 10}, {"sum_logits": -5.137504577636719, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.247040748596191, "logits_per_token": -5.137504577636719, "logits_per_char": -0.7339292253766742, "num_chars": 7}, {"sum_logits": -10.899773597717285, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.927323341369629, "logits_per_token": -10.899773597717285, "logits_per_char": -1.5571105139596122, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1134, "native_id": "eaf980db7e945b1cf6d648fa55ddcb5e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.157231330871582, "incorrect_loss_raw": 9.397018909454346, "correct_loss_per_char": 0.4619145923190647, "incorrect_loss_per_char": 1.3176472306251525, "correct_loss_per_token": 4.157231330871582, "incorrect_loss_per_token": 9.397018909454346, "correct_loss_uncond": -9.827252388000488, "incorrect_loss_uncond": -4.888932943344116}, "model_output": [{"sum_logits": -5.385683059692383, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.84918212890625, "logits_per_token": -5.385683059692383, "logits_per_char": -0.6732103824615479, "num_chars": 8}, {"sum_logits": -4.157231330871582, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.98448371887207, "logits_per_token": -4.157231330871582, "logits_per_char": -0.4619145923190647, "num_chars": 9}, {"sum_logits": -11.467517852783203, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.944446563720703, "logits_per_token": -11.467517852783203, "logits_per_char": -2.2935035705566404, "num_chars": 5}, {"sum_logits": -16.244064331054688, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.676286697387695, "logits_per_token": -16.244064331054688, "logits_per_char": -1.8048960367838542, "num_chars": 9}, {"sum_logits": -4.490810394287109, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.6738920211792, "logits_per_token": -4.490810394287109, "logits_per_char": -0.4989789326985677, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1135, "native_id": "8bbfe8cd056d612e9d3190f278bef287", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.601971626281738, "incorrect_loss_raw": 14.670220851898193, "correct_loss_per_char": 1.4004929065704346, "incorrect_loss_per_char": 1.567360628862432, "correct_loss_per_token": 5.601971626281738, "incorrect_loss_per_token": 9.097298463185627, "correct_loss_uncond": -8.811445236206055, "incorrect_loss_uncond": -2.2715935707092285}, "model_output": [{"sum_logits": -11.853954315185547, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.359978675842285, "logits_per_token": -11.853954315185547, "logits_per_char": -1.0776322104714133, "num_chars": 11}, {"sum_logits": -19.559621810913086, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -22.004837036132812, "logits_per_token": -6.519873936971028, "logits_per_char": -1.1505659888772404, "num_chars": 17}, {"sum_logits": -5.601971626281738, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.413416862487793, "logits_per_token": -5.601971626281738, "logits_per_char": -1.4004929065704346, "num_chars": 4}, {"sum_logits": -8.763423919677734, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.780011177062988, "logits_per_token": -8.763423919677734, "logits_per_char": -2.1908559799194336, "num_chars": 4}, {"sum_logits": -18.503883361816406, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.6224308013916, "logits_per_token": -9.251941680908203, "logits_per_char": -1.8503883361816407, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1136, "native_id": "aa7c4c351cf8d59792aa68e3de339db4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.6403605937957764, "incorrect_loss_raw": 14.83467960357666, "correct_loss_per_char": 0.3309418721632524, "incorrect_loss_per_char": 1.1013174593448638, "correct_loss_per_token": 1.8201802968978882, "incorrect_loss_per_token": 5.6993452707926435, "correct_loss_uncond": -13.539544343948364, "incorrect_loss_uncond": -2.0449466705322266}, "model_output": [{"sum_logits": -17.802474975585938, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.423994064331055, "logits_per_token": -5.9341583251953125, "logits_per_char": -1.1868316650390625, "num_chars": 15}, {"sum_logits": -3.6403605937957764, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.17990493774414, "logits_per_token": -1.8201802968978882, "logits_per_char": -0.3309418721632524, "num_chars": 11}, {"sum_logits": -23.087724685668945, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.918804168701172, "logits_per_token": -7.695908228556315, "logits_per_char": -1.442982792854309, "num_chars": 16}, {"sum_logits": -4.526712417602539, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.327245712280273, "logits_per_token": -4.526712417602539, "logits_per_char": -0.9053424835205078, "num_chars": 5}, {"sum_logits": -13.921806335449219, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.848461151123047, "logits_per_token": -4.640602111816406, "logits_per_char": -0.8701128959655762, "num_chars": 16}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1137, "native_id": "23df3bac9cfcb156f4cfd8a05f21c5e2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.6103363037109375, "incorrect_loss_raw": 10.583885550498962, "correct_loss_per_char": 0.7344818115234375, "incorrect_loss_per_char": 0.9678438524405162, "correct_loss_per_token": 3.3051681518554688, "incorrect_loss_per_token": 5.291942775249481, "correct_loss_uncond": -9.676460266113281, "incorrect_loss_uncond": -7.1243637800216675}, "model_output": [{"sum_logits": -6.6103363037109375, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.28679656982422, "logits_per_token": -3.3051681518554688, "logits_per_char": -0.7344818115234375, "num_chars": 9}, {"sum_logits": -14.904438972473145, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.657578468322754, "logits_per_token": -7.452219486236572, "logits_per_char": -0.993629264831543, "num_chars": 15}, {"sum_logits": -14.302787780761719, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.498720169067383, "logits_per_token": -7.151393890380859, "logits_per_char": -0.9535191853841146, "num_chars": 15}, {"sum_logits": -6.331814765930176, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.86726951599121, "logits_per_token": -3.165907382965088, "logits_per_char": -0.791476845741272, "num_chars": 8}, {"sum_logits": -6.7965006828308105, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.809429168701172, "logits_per_token": -3.3982503414154053, "logits_per_char": -1.132750113805135, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1138, "native_id": "d21777d771dc6fd08e769d378651817e", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.07356071472168, "incorrect_loss_raw": 13.73872447013855, "correct_loss_per_char": 0.91577824679288, "incorrect_loss_per_char": 1.1390573631633414, "correct_loss_per_token": 5.03678035736084, "incorrect_loss_per_token": 6.781487345695496, "correct_loss_uncond": -7.699018478393555, "incorrect_loss_uncond": -3.2713935375213623}, "model_output": [{"sum_logits": -11.983376502990723, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.352895736694336, "logits_per_token": -5.991688251495361, "logits_per_char": -1.0893978639082476, "num_chars": 11}, {"sum_logits": -17.666662216186523, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -22.09073829650879, "logits_per_token": -8.833331108093262, "logits_per_char": -1.7666662216186524, "num_chars": 10}, {"sum_logits": -19.50589370727539, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -18.35116958618164, "logits_per_token": -6.501964569091797, "logits_per_char": -0.9752946853637695, "num_chars": 20}, {"sum_logits": -5.7989654541015625, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.245668411254883, "logits_per_token": -5.7989654541015625, "logits_per_char": -0.7248706817626953, "num_chars": 8}, {"sum_logits": -10.07356071472168, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.772579193115234, "logits_per_token": -5.03678035736084, "logits_per_char": -0.91577824679288, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1139, "native_id": "611a4cc0e288b8a11afa923f48cb2ab4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.507726669311523, "incorrect_loss_raw": 11.314836144447327, "correct_loss_per_char": 0.5362661906651088, "incorrect_loss_per_char": 1.293987096560122, "correct_loss_per_token": 3.7538633346557617, "incorrect_loss_per_token": 5.635477860768637, "correct_loss_uncond": -9.897855758666992, "incorrect_loss_uncond": -6.8656007051467896}, "model_output": [{"sum_logits": -22.930110931396484, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.749122619628906, "logits_per_token": -7.643370310465495, "logits_per_char": -2.8662638664245605, "num_chars": 8}, {"sum_logits": -9.46712589263916, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.608536720275879, "logits_per_token": -9.46712589263916, "logits_per_char": -1.3524465560913086, "num_chars": 7}, {"sum_logits": -6.864275932312012, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -17.053464889526367, "logits_per_token": -3.432137966156006, "logits_per_char": -0.6240250847556374, "num_chars": 11}, {"sum_logits": -5.99783182144165, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.310623168945312, "logits_per_token": -1.9992772738138835, "logits_per_char": -0.33321287896898055, "num_chars": 18}, {"sum_logits": -7.507726669311523, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -17.405582427978516, "logits_per_token": -3.7538633346557617, "logits_per_char": -0.5362661906651088, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1140, "native_id": "8e7941ce31996ca83cc0a68f7313c96d", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.154313802719116, "incorrect_loss_raw": 8.608582973480225, "correct_loss_per_char": 0.2692892253398895, "incorrect_loss_per_char": 0.9330038550354185, "correct_loss_per_token": 2.154313802719116, "incorrect_loss_per_token": 8.608582973480225, "correct_loss_uncond": -14.074297189712524, "incorrect_loss_uncond": -5.906564474105835}, "model_output": [{"sum_logits": -7.39952278137207, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.70338249206543, "logits_per_token": -7.39952278137207, "logits_per_char": -1.057074683053153, "num_chars": 7}, {"sum_logits": -2.154313802719116, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.22861099243164, "logits_per_token": -2.154313802719116, "logits_per_char": -0.2692892253398895, "num_chars": 8}, {"sum_logits": -4.804155349731445, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.723129272460938, "logits_per_token": -4.804155349731445, "logits_per_char": -0.48041553497314454, "num_chars": 10}, {"sum_logits": -14.023356437683105, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.552626609802246, "logits_per_token": -14.023356437683105, "logits_per_char": -1.168613036473592, "num_chars": 12}, {"sum_logits": -8.207297325134277, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.081451416015625, "logits_per_token": -8.207297325134277, "logits_per_char": -1.0259121656417847, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1141, "native_id": "ea02772e27f5bd40eced3b65e8c6427f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 9.495162963867188, "incorrect_loss_raw": 8.893835663795471, "correct_loss_per_char": 0.7303971510667068, "incorrect_loss_per_char": 1.6313396380061196, "correct_loss_per_token": 9.495162963867188, "incorrect_loss_per_token": 8.893835663795471, "correct_loss_uncond": -6.213669776916504, "incorrect_loss_uncond": -4.949686169624329}, "model_output": [{"sum_logits": -9.242897033691406, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.19228744506836, "logits_per_token": -9.242897033691406, "logits_per_char": -2.3107242584228516, "num_chars": 4}, {"sum_logits": -9.495162963867188, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.708832740783691, "logits_per_token": -9.495162963867188, "logits_per_char": -0.7303971510667068, "num_chars": 13}, {"sum_logits": -12.204948425292969, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.223045349121094, "logits_per_token": -12.204948425292969, "logits_per_char": -2.0341580708821616, "num_chars": 6}, {"sum_logits": -7.312479019165039, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.218299865722656, "logits_per_token": -7.312479019165039, "logits_per_char": -1.0446398598807198, "num_chars": 7}, {"sum_logits": -6.815018177032471, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.74045467376709, "logits_per_token": -6.815018177032471, "logits_per_char": -1.1358363628387451, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1142, "native_id": "de54d03e69d9765872f95ff06ed21499", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.63016414642334, "incorrect_loss_raw": 13.089394569396973, "correct_loss_per_char": 0.6878688676016671, "incorrect_loss_per_char": 2.2115831658953713, "correct_loss_per_token": 4.81508207321167, "incorrect_loss_per_token": 13.089394569396973, "correct_loss_uncond": -7.851788520812988, "incorrect_loss_uncond": -1.3966410160064697}, "model_output": [{"sum_logits": -13.26414680480957, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.76987361907959, "logits_per_token": -13.26414680480957, "logits_per_char": -3.3160367012023926, "num_chars": 4}, {"sum_logits": -9.63016414642334, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.481952667236328, "logits_per_token": -4.81508207321167, "logits_per_char": -0.6878688676016671, "num_chars": 14}, {"sum_logits": -15.085147857666016, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.126542091369629, "logits_per_token": -15.085147857666016, "logits_per_char": -2.5141913096110025, "num_chars": 6}, {"sum_logits": -13.029979705810547, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.98448371887207, "logits_per_token": -13.029979705810547, "logits_per_char": -1.4477755228678386, "num_chars": 9}, {"sum_logits": -10.978303909301758, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.06324291229248, "logits_per_token": -10.978303909301758, "logits_per_char": -1.568329129900251, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1143, "native_id": "b231a732a3fdf0621391e7e385f8d651", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.106030464172363, "incorrect_loss_raw": 8.76522946357727, "correct_loss_per_char": 0.5106030464172363, "incorrect_loss_per_char": 0.8292830893487643, "correct_loss_per_token": 2.5530152320861816, "incorrect_loss_per_token": 6.548430681228638, "correct_loss_uncond": -12.959639549255371, "incorrect_loss_uncond": -7.1493141651153564}, "model_output": [{"sum_logits": -8.356535911560059, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -21.066099166870117, "logits_per_token": -4.178267955780029, "logits_per_char": -0.4642519950866699, "num_chars": 18}, {"sum_logits": -9.377854347229004, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.892102241516113, "logits_per_token": -4.688927173614502, "logits_per_char": -0.852532213384455, "num_chars": 11}, {"sum_logits": -11.713765144348145, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.758963584899902, "logits_per_token": -11.713765144348145, "logits_per_char": -1.0648877403952859, "num_chars": 11}, {"sum_logits": -5.106030464172363, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.065670013427734, "logits_per_token": -2.5530152320861816, "logits_per_char": -0.5106030464172363, "num_chars": 10}, {"sum_logits": -5.612762451171875, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -11.941009521484375, "logits_per_token": -5.612762451171875, "logits_per_char": -0.9354604085286459, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1144, "native_id": "b9121c3228f961c5ad68958c702cd94b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.103409767150879, "incorrect_loss_raw": 11.977980375289917, "correct_loss_per_char": 0.9184917970137163, "incorrect_loss_per_char": 1.168234089442662, "correct_loss_per_token": 5.0517048835754395, "incorrect_loss_per_token": 7.052144289016724, "correct_loss_uncond": -9.699782371520996, "incorrect_loss_uncond": -6.104006767272949}, "model_output": [{"sum_logits": -10.440742492675781, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.74686050415039, "logits_per_token": -5.220371246337891, "logits_per_char": -1.044074249267578, "num_chars": 10}, {"sum_logits": -8.505232810974121, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.858729362487793, "logits_per_token": -8.505232810974121, "logits_per_char": -1.2150332587105888, "num_chars": 7}, {"sum_logits": -10.103409767150879, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.803192138671875, "logits_per_token": -5.0517048835754395, "logits_per_char": -0.9184917970137163, "num_chars": 11}, {"sum_logits": -13.750283241271973, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.116256713867188, "logits_per_token": -6.875141620635986, "logits_per_char": -1.1458569367726643, "num_chars": 12}, {"sum_logits": -15.215662956237793, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.606101989746094, "logits_per_token": -7.6078314781188965, "logits_per_char": -1.2679719130198162, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1145, "native_id": "4015ab002ff8c233d1c7ef26f5156b88", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.191345691680908, "incorrect_loss_raw": 12.129733085632324, "correct_loss_per_char": 0.6537586992437189, "incorrect_loss_per_char": 1.077543095354632, "correct_loss_per_token": 3.595672845840454, "incorrect_loss_per_token": 6.365748405456543, "correct_loss_uncond": -11.9159255027771, "incorrect_loss_uncond": -7.028166055679321}, "model_output": [{"sum_logits": -8.776336669921875, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.211520195007324, "logits_per_token": -8.776336669921875, "logits_per_char": -1.2537623814174108, "num_chars": 7}, {"sum_logits": -12.738563537597656, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.787853240966797, "logits_per_token": -3.184640884399414, "logits_per_char": -1.0615469614664714, "num_chars": 12}, {"sum_logits": -14.991376876831055, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.80244255065918, "logits_per_token": -7.495688438415527, "logits_per_char": -1.0708126340593611, "num_chars": 14}, {"sum_logits": -12.012655258178711, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.82978057861328, "logits_per_token": -6.0063276290893555, "logits_per_char": -0.9240504044752854, "num_chars": 13}, {"sum_logits": -7.191345691680908, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.107271194458008, "logits_per_token": -3.595672845840454, "logits_per_char": -0.6537586992437189, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1146, "native_id": "0197ade3bb26d163ab2e284c960c626f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.243806838989258, "incorrect_loss_raw": 8.491132020950317, "correct_loss_per_char": 0.5406344731648763, "incorrect_loss_per_char": 1.120191631920926, "correct_loss_per_token": 3.243806838989258, "incorrect_loss_per_token": 4.753126800060272, "correct_loss_uncond": -8.892265319824219, "incorrect_loss_uncond": -8.587512016296387}, "model_output": [{"sum_logits": -3.243806838989258, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.136072158813477, "logits_per_token": -3.243806838989258, "logits_per_char": -0.5406344731648763, "num_chars": 6}, {"sum_logits": -9.085731506347656, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.409378051757812, "logits_per_token": -4.542865753173828, "logits_per_char": -0.8259755914861505, "num_chars": 11}, {"sum_logits": -13.016227722167969, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.185766220092773, "logits_per_token": -6.508113861083984, "logits_per_char": -1.8594611031668526, "num_chars": 7}, {"sum_logits": -4.060486316680908, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.17259693145752, "logits_per_token": -4.060486316680908, "logits_per_char": -1.015121579170227, "num_chars": 4}, {"sum_logits": -7.802082538604736, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.54683494567871, "logits_per_token": -3.901041269302368, "logits_per_char": -0.7802082538604737, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1147, "native_id": "a90f9197a13c64089c9ba95bcba275ad", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.615322589874268, "incorrect_loss_raw": 7.362247824668884, "correct_loss_per_char": 0.5512768824895223, "incorrect_loss_per_char": 0.7303665990781303, "correct_loss_per_token": 3.307661294937134, "incorrect_loss_per_token": 4.436782538890839, "correct_loss_uncond": -11.495143413543701, "incorrect_loss_uncond": -11.862284064292908}, "model_output": [{"sum_logits": -6.045269012451172, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.495186805725098, "logits_per_token": -6.045269012451172, "logits_per_char": -0.6716965569390191, "num_chars": 9}, {"sum_logits": -6.398448467254639, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.709285736083984, "logits_per_token": -3.1992242336273193, "logits_per_char": -0.6398448467254638, "num_chars": 10}, {"sum_logits": -7.039011001586914, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.62916374206543, "logits_per_token": -3.519505500793457, "logits_per_char": -0.7039011001586915, "num_chars": 10}, {"sum_logits": -6.615322589874268, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.11046600341797, "logits_per_token": -3.307661294937134, "logits_per_char": -0.5512768824895223, "num_chars": 12}, {"sum_logits": -9.966262817382812, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.064491271972656, "logits_per_token": -4.983131408691406, "logits_per_char": -0.9060238924893466, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1148, "native_id": "684204df916cc58d47293960f9c6ed9f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.999050617218018, "incorrect_loss_raw": 8.872476816177368, "correct_loss_per_char": 0.8570072310311454, "incorrect_loss_per_char": 0.8934411606509169, "correct_loss_per_token": 5.999050617218018, "incorrect_loss_per_token": 7.344042539596558, "correct_loss_uncond": -7.610733509063721, "incorrect_loss_uncond": -5.4792046546936035}, "model_output": [{"sum_logits": -12.227474212646484, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.325485229492188, "logits_per_token": -6.113737106323242, "logits_per_char": -0.940574939434345, "num_chars": 13}, {"sum_logits": -7.160300254821777, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.184654235839844, "logits_per_token": -7.160300254821777, "logits_per_char": -0.5966916879018148, "num_chars": 12}, {"sum_logits": -6.155488967895508, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.897298812866211, "logits_per_token": -6.155488967895508, "logits_per_char": -0.6155488967895508, "num_chars": 10}, {"sum_logits": -9.946643829345703, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.999287605285645, "logits_per_token": -9.946643829345703, "logits_per_char": -1.4209491184779577, "num_chars": 7}, {"sum_logits": -5.999050617218018, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.609784126281738, "logits_per_token": -5.999050617218018, "logits_per_char": -0.8570072310311454, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1149, "native_id": "a2aa95861ef74bf1ecfc55db505e3982", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.10763931274414, "incorrect_loss_raw": 11.762188911437988, "correct_loss_per_char": 1.0071759541829428, "incorrect_loss_per_char": 1.2337625148968818, "correct_loss_per_token": 7.55381965637207, "incorrect_loss_per_token": 6.094048500061035, "correct_loss_uncond": -6.410484313964844, "incorrect_loss_uncond": -5.683632850646973}, "model_output": [{"sum_logits": -12.642812728881836, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.390138626098633, "logits_per_token": -4.214270909627278, "logits_per_char": -0.9725240560678335, "num_chars": 13}, {"sum_logits": -10.434444427490234, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.289554595947266, "logits_per_token": -10.434444427490234, "logits_per_char": -1.7390740712483723, "num_chars": 6}, {"sum_logits": -13.549623489379883, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.33009147644043, "logits_per_token": -4.516541163126628, "logits_per_char": -1.3549623489379883, "num_chars": 10}, {"sum_logits": -15.10763931274414, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.518123626708984, "logits_per_token": -7.55381965637207, "logits_per_char": -1.0071759541829428, "num_chars": 15}, {"sum_logits": -10.421875, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.773502349853516, "logits_per_token": -5.2109375, "logits_per_char": -0.8684895833333334, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1150, "native_id": "8555dd9667d010018961a2f7d1c22704", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.410581111907959, "incorrect_loss_raw": 7.019425392150879, "correct_loss_per_char": 0.6821162223815918, "incorrect_loss_per_char": 0.9286355963345042, "correct_loss_per_token": 3.410581111907959, "incorrect_loss_per_token": 6.131734132766724, "correct_loss_uncond": -9.81321382522583, "incorrect_loss_uncond": -6.881047487258911}, "model_output": [{"sum_logits": -13.005961418151855, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.363737106323242, "logits_per_token": -13.005961418151855, "logits_per_char": -1.857994488307408, "num_chars": 7}, {"sum_logits": -3.1434741020202637, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.21864128112793, "logits_per_token": -3.1434741020202637, "logits_per_char": -0.6286948204040528, "num_chars": 5}, {"sum_logits": -7.101530075073242, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.236899375915527, "logits_per_token": -3.550765037536621, "logits_per_char": -0.7890588972303603, "num_chars": 9}, {"sum_logits": -3.410581111907959, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.223794937133789, "logits_per_token": -3.410581111907959, "logits_per_char": -0.6821162223815918, "num_chars": 5}, {"sum_logits": -4.826735973358154, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.782613754272461, "logits_per_token": -4.826735973358154, "logits_per_char": -0.43879417939619586, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1151, "native_id": "84a761f516efce04ab27d7ca8dd25255", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.376445770263672, "incorrect_loss_raw": 8.116453409194946, "correct_loss_per_char": 0.6443419823279748, "incorrect_loss_per_char": 0.735474104983638, "correct_loss_per_token": 2.7921485900878906, "incorrect_loss_per_token": 6.346230387687683, "correct_loss_uncond": -9.791975021362305, "incorrect_loss_uncond": -7.9825544357299805}, "model_output": [{"sum_logits": -9.385905265808105, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.208698272705078, "logits_per_token": -4.692952632904053, "logits_per_char": -0.5866190791130066, "num_chars": 16}, {"sum_logits": -8.376445770263672, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.168420791625977, "logits_per_token": -2.7921485900878906, "logits_per_char": -0.6443419823279748, "num_chars": 13}, {"sum_logits": -12.726640701293945, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.357338905334473, "logits_per_token": -12.726640701293945, "logits_per_char": -1.4140711890326605, "num_chars": 9}, {"sum_logits": -5.577388763427734, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.23681640625, "logits_per_token": -5.577388763427734, "logits_per_char": -0.5070353421297941, "num_chars": 11}, {"sum_logits": -4.77587890625, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.593177795410156, "logits_per_token": -2.387939453125, "logits_per_char": -0.4341708096590909, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1152, "native_id": "45a6becd307342669d9d17474e50b97a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.40104866027832, "incorrect_loss_raw": 13.621239185333252, "correct_loss_per_char": 0.6118263917810777, "incorrect_loss_per_char": 1.4623031859353426, "correct_loss_per_token": 2.60026216506958, "incorrect_loss_per_token": 7.454137523969014, "correct_loss_uncond": -16.779062271118164, "incorrect_loss_uncond": -4.753964424133301}, "model_output": [{"sum_logits": -6.177891731262207, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.15621566772461, "logits_per_token": -3.0889458656311035, "logits_per_char": -0.5148243109385172, "num_chars": 12}, {"sum_logits": -11.115622520446777, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.800455093383789, "logits_per_token": -11.115622520446777, "logits_per_char": -2.2231245040893555, "num_chars": 5}, {"sum_logits": -10.40104866027832, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -27.180110931396484, "logits_per_token": -2.60026216506958, "logits_per_char": -0.6118263917810777, "num_chars": 17}, {"sum_logits": -19.289005279541016, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.545101165771484, "logits_per_token": -9.644502639770508, "logits_per_char": -1.4837696368877704, "num_chars": 13}, {"sum_logits": -17.902437210083008, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -22.999042510986328, "logits_per_token": -5.967479070027669, "logits_per_char": -1.627494291825728, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1153, "native_id": "c509c499bace6de324b39c0d4d0c30fa", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.952758312225342, "incorrect_loss_raw": 8.692553460597992, "correct_loss_per_char": 0.7075369017464774, "incorrect_loss_per_char": 1.3586884101231893, "correct_loss_per_token": 4.952758312225342, "incorrect_loss_per_token": 8.692553460597992, "correct_loss_uncond": -9.118746280670166, "incorrect_loss_uncond": -4.627144873142242}, "model_output": [{"sum_logits": -3.0126495361328125, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.973722457885742, "logits_per_token": -3.0126495361328125, "logits_per_char": -0.2510541280110677, "num_chars": 12}, {"sum_logits": -3.856553792953491, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.10049819946289, "logits_per_token": -3.856553792953491, "logits_per_char": -0.7713107585906982, "num_chars": 5}, {"sum_logits": -12.33016586303711, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.0025053024292, "logits_per_token": -12.33016586303711, "logits_per_char": -2.466033172607422, "num_chars": 5}, {"sum_logits": -15.570844650268555, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.202067375183105, "logits_per_token": -15.570844650268555, "logits_per_char": -1.9463555812835693, "num_chars": 8}, {"sum_logits": -4.952758312225342, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.071504592895508, "logits_per_token": -4.952758312225342, "logits_per_char": -0.7075369017464774, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1154, "native_id": "77ddc9134bb27f9962aa2ed5ec5a5ef9", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.69566535949707, "incorrect_loss_raw": 14.589364290237427, "correct_loss_per_char": 0.4347832679748535, "incorrect_loss_per_char": 1.4654207229614258, "correct_loss_per_token": 4.347832679748535, "incorrect_loss_per_token": 9.646298805872599, "correct_loss_uncond": -8.31403923034668, "incorrect_loss_uncond": -3.0808403491973877}, "model_output": [{"sum_logits": -19.059814453125, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.116256713867188, "logits_per_token": -9.5299072265625, "logits_per_char": -1.58831787109375, "num_chars": 12}, {"sum_logits": -8.69566535949707, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.00970458984375, "logits_per_token": -4.347832679748535, "logits_per_char": -0.4347832679748535, "num_chars": 20}, {"sum_logits": -12.835777282714844, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.901809692382812, "logits_per_token": -12.835777282714844, "logits_per_char": -2.139296213785807, "num_chars": 6}, {"sum_logits": -11.098333358764648, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.58857536315918, "logits_per_token": -11.098333358764648, "logits_per_char": -1.109833335876465, "num_chars": 10}, {"sum_logits": -15.363532066345215, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.074176788330078, "logits_per_token": -5.121177355448405, "logits_per_char": -1.024235471089681, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1155, "native_id": "715583129369c0c5c9f499c93a1c095e", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 12.818323135375977, "incorrect_loss_raw": 13.312939405441284, "correct_loss_per_char": 1.4242581261528864, "incorrect_loss_per_char": 1.3146704906175115, "correct_loss_per_token": 4.272774378458659, "incorrect_loss_per_token": 8.36933223406474, "correct_loss_uncond": -4.196496963500977, "incorrect_loss_uncond": -2.0357441902160645}, "model_output": [{"sum_logits": -12.739245414733887, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.79974365234375, "logits_per_token": -4.246415138244629, "logits_per_char": -1.819892202104841, "num_chars": 7}, {"sum_logits": -16.92239761352539, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.148038864135742, "logits_per_token": -5.640799204508464, "logits_per_char": -0.9954351537367877, "num_chars": 17}, {"sum_logits": -12.129833221435547, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.558809280395508, "logits_per_token": -12.129833221435547, "logits_per_char": -1.010819435119629, "num_chars": 12}, {"sum_logits": -11.460281372070312, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.888142585754395, "logits_per_token": -11.460281372070312, "logits_per_char": -1.432535171508789, "num_chars": 8}, {"sum_logits": -12.818323135375977, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.014820098876953, "logits_per_token": -4.272774378458659, "logits_per_char": -1.4242581261528864, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1156, "native_id": "a478e8b7c049781574f7fbb11ba1eec0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.971956253051758, "incorrect_loss_raw": 8.525449752807617, "correct_loss_per_char": 0.7746618058946397, "incorrect_loss_per_char": 1.05928052555431, "correct_loss_per_token": 6.971956253051758, "incorrect_loss_per_token": 7.288973331451416, "correct_loss_uncond": -8.024286270141602, "incorrect_loss_uncond": -5.416352987289429}, "model_output": [{"sum_logits": -9.89181137084961, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.580192565917969, "logits_per_token": -4.945905685424805, "logits_per_char": -0.8243176142374674, "num_chars": 12}, {"sum_logits": -6.971956253051758, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.99624252319336, "logits_per_token": -6.971956253051758, "logits_per_char": -0.7746618058946397, "num_chars": 9}, {"sum_logits": -8.212953567504883, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.164139747619629, "logits_per_token": -8.212953567504883, "logits_per_char": -0.7466321425004439, "num_chars": 11}, {"sum_logits": -5.319126129150391, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.81769847869873, "logits_per_token": -5.319126129150391, "logits_per_char": -0.8865210215250651, "num_chars": 6}, {"sum_logits": -10.677907943725586, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.205180168151855, "logits_per_token": -10.677907943725586, "logits_per_char": -1.7796513239542644, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1157, "native_id": "f427f9de6bf580314531baf86de8acbc", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.417630195617676, "incorrect_loss_raw": 10.760899305343628, "correct_loss_per_char": 0.6310900279453823, "incorrect_loss_per_char": 1.5520539204279582, "correct_loss_per_token": 4.417630195617676, "incorrect_loss_per_token": 10.760899305343628, "correct_loss_uncond": -9.373640060424805, "incorrect_loss_uncond": -2.6142489910125732}, "model_output": [{"sum_logits": -11.208124160766602, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.163476943969727, "logits_per_token": -11.208124160766602, "logits_per_char": -1.8680206934611003, "num_chars": 6}, {"sum_logits": -4.417630195617676, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.79127025604248, "logits_per_token": -4.417630195617676, "logits_per_char": -0.6310900279453823, "num_chars": 7}, {"sum_logits": -10.020859718322754, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.0183687210083, "logits_per_token": -10.020859718322754, "logits_per_char": -2.004171943664551, "num_chars": 5}, {"sum_logits": -15.632144927978516, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.214923858642578, "logits_per_token": -15.632144927978516, "logits_per_char": -1.5632144927978515, "num_chars": 10}, {"sum_logits": -6.182468414306641, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.1038236618042, "logits_per_token": -6.182468414306641, "logits_per_char": -0.7728085517883301, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1158, "native_id": "0f7425ecbe369bf41a230aab92d84132", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.054463386535645, "incorrect_loss_raw": 7.792156100273132, "correct_loss_per_char": 0.7545386155446371, "incorrect_loss_per_char": 0.9276398179507015, "correct_loss_per_token": 4.527231693267822, "incorrect_loss_per_token": 5.517573809623718, "correct_loss_uncond": -7.640406608581543, "incorrect_loss_uncond": -7.207139611244202}, "model_output": [{"sum_logits": -11.37291145324707, "num_tokens": 5, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -18.798311233520508, "logits_per_token": -2.274582290649414, "logits_per_char": -0.6318284140692817, "num_chars": 18}, {"sum_logits": -9.054463386535645, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -16.694869995117188, "logits_per_token": -4.527231693267822, "logits_per_char": -0.7545386155446371, "num_chars": 12}, {"sum_logits": -5.554196357727051, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -11.685454368591309, "logits_per_token": -5.554196357727051, "logits_per_char": -0.9256993929545084, "num_chars": 6}, {"sum_logits": -11.330195426940918, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -15.182879447937012, "logits_per_token": -11.330195426940918, "logits_per_char": -1.888365904490153, "num_chars": 6}, {"sum_logits": -2.9113211631774902, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -14.330537796020508, "logits_per_token": -2.9113211631774902, "logits_per_char": -0.26466556028886273, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1159, "native_id": "c872c08a95dd28a16479b76f240a4ad5", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.593733787536621, "incorrect_loss_raw": 10.745498299598694, "correct_loss_per_char": 0.7656222979227701, "incorrect_loss_per_char": 0.9717505932727577, "correct_loss_per_token": 4.593733787536621, "incorrect_loss_per_token": 8.620450615882874, "correct_loss_uncond": -7.0917205810546875, "incorrect_loss_uncond": -3.963464617729187}, "model_output": [{"sum_logits": -9.145593643188477, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -13.986492156982422, "logits_per_token": -9.145593643188477, "logits_per_char": -0.6097062428792318, "num_chars": 15}, {"sum_logits": -10.163322448730469, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.192317008972168, "logits_per_token": -10.163322448730469, "logits_per_char": -1.016332244873047, "num_chars": 10}, {"sum_logits": -6.672695636749268, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -13.609784126281738, "logits_per_token": -6.672695636749268, "logits_per_char": -0.9532422338213239, "num_chars": 7}, {"sum_logits": -4.593733787536621, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -11.685454368591309, "logits_per_token": -4.593733787536621, "logits_per_char": -0.7656222979227701, "num_chars": 6}, {"sum_logits": -17.000381469726562, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.047258377075195, "logits_per_token": -8.500190734863281, "logits_per_char": -1.307721651517428, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1160, "native_id": "08d908ed723f813574992195d61386a2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.262746810913086, "incorrect_loss_raw": 12.415280938148499, "correct_loss_per_char": 1.1147951646284624, "incorrect_loss_per_char": 1.6140941220702545, "correct_loss_per_token": 6.131373405456543, "incorrect_loss_per_token": 8.916231195131939, "correct_loss_uncond": -10.02110481262207, "incorrect_loss_uncond": -5.218558192253113}, "model_output": [{"sum_logits": -7.0270304679870605, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.1416015625, "logits_per_token": -2.34234348932902, "logits_per_char": -0.585585872332255, "num_chars": 12}, {"sum_logits": -12.262746810913086, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -22.283851623535156, "logits_per_token": -6.131373405456543, "logits_per_char": -1.1147951646284624, "num_chars": 11}, {"sum_logits": -15.097681045532227, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.04529857635498, "logits_per_token": -15.097681045532227, "logits_per_char": -1.8872101306915283, "num_chars": 8}, {"sum_logits": -13.967267990112305, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -24.009336471557617, "logits_per_token": -4.655755996704102, "logits_per_char": -1.269751635464755, "num_chars": 11}, {"sum_logits": -13.569144248962402, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.339119911193848, "logits_per_token": -13.569144248962402, "logits_per_char": -2.7138288497924803, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1161, "native_id": "5365fd00ef8cec62ee5685e246a939db", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 12.115768432617188, "incorrect_loss_raw": 14.30668044090271, "correct_loss_per_char": 0.7572355270385742, "incorrect_loss_per_char": 1.0604480881316989, "correct_loss_per_token": 6.057884216308594, "incorrect_loss_per_token": 7.418830156326294, "correct_loss_uncond": -5.930667877197266, "incorrect_loss_uncond": -3.827746629714966}, "model_output": [{"sum_logits": -16.257354736328125, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -20.903133392333984, "logits_per_token": -8.128677368164062, "logits_per_char": -0.9563149844898897, "num_chars": 17}, {"sum_logits": -9.580611228942871, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -16.006271362304688, "logits_per_token": -3.1935370763142905, "logits_per_char": -0.7983842690785726, "num_chars": 12}, {"sum_logits": -19.55347442626953, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.835399627685547, "logits_per_token": -6.517824808756511, "logits_per_char": -1.303564961751302, "num_chars": 15}, {"sum_logits": -11.835281372070312, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.792903900146484, "logits_per_token": -11.835281372070312, "logits_per_char": -1.1835281372070312, "num_chars": 10}, {"sum_logits": -12.115768432617188, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -18.046436309814453, "logits_per_token": -6.057884216308594, "logits_per_char": -0.7572355270385742, "num_chars": 16}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1162, "native_id": "5649bd90dbb57e223fd843b7a4563a0f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.335150241851807, "incorrect_loss_raw": 10.089512705802917, "correct_loss_per_char": 1.2670300483703614, "incorrect_loss_per_char": 1.1667599507740567, "correct_loss_per_token": 6.335150241851807, "incorrect_loss_per_token": 8.086931109428406, "correct_loss_uncond": -4.682952404022217, "incorrect_loss_uncond": -5.476981282234192}, "model_output": [{"sum_logits": -9.267852783203125, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.635601997375488, "logits_per_token": -9.267852783203125, "logits_per_char": -1.3239789690290178, "num_chars": 7}, {"sum_logits": -8.108001708984375, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.755424499511719, "logits_per_token": -8.108001708984375, "logits_per_char": -1.0135002136230469, "num_chars": 8}, {"sum_logits": -6.961543560028076, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.465144157409668, "logits_per_token": -6.961543560028076, "logits_per_char": -0.9945062228611538, "num_chars": 7}, {"sum_logits": -6.335150241851807, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -11.018102645874023, "logits_per_token": -6.335150241851807, "logits_per_char": -1.2670300483703614, "num_chars": 5}, {"sum_logits": -16.020652770996094, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.409805297851562, "logits_per_token": -8.010326385498047, "logits_per_char": -1.3350543975830078, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1163, "native_id": "0a2195ae8d4706abc5721578c9991466", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.7540178298950195, "incorrect_loss_raw": 8.991424798965454, "correct_loss_per_char": 0.39616815249125165, "incorrect_loss_per_char": 0.89740189470426, "correct_loss_per_token": 2.3770089149475098, "incorrect_loss_per_token": 5.226674318313599, "correct_loss_uncond": -14.027955055236816, "incorrect_loss_uncond": -8.937087774276733}, "model_output": [{"sum_logits": -5.847695350646973, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.214923858642578, "logits_per_token": -5.847695350646973, "logits_per_char": -0.5847695350646973, "num_chars": 10}, {"sum_logits": -4.7540178298950195, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.781972885131836, "logits_per_token": -2.3770089149475098, "logits_per_char": -0.39616815249125165, "num_chars": 12}, {"sum_logits": -9.797834396362305, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.567115783691406, "logits_per_token": -4.898917198181152, "logits_per_char": -0.9797834396362305, "num_chars": 10}, {"sum_logits": -11.520729064941406, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.821346282958984, "logits_per_token": -5.760364532470703, "logits_per_char": -1.0473390059037642, "num_chars": 11}, {"sum_logits": -8.799440383911133, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.11066436767578, "logits_per_token": -4.399720191955566, "logits_per_char": -0.9777155982123481, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1164, "native_id": "5d15989039d46156b417c149728591de", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.632022380828857, "incorrect_loss_raw": 13.698144435882568, "correct_loss_per_char": 0.5146691534254286, "incorrect_loss_per_char": 1.6175787321158819, "correct_loss_per_token": 2.3160111904144287, "incorrect_loss_per_token": 6.059871435165405, "correct_loss_uncond": -11.992672443389893, "incorrect_loss_uncond": -3.6480212211608887}, "model_output": [{"sum_logits": -15.07474136352539, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.175716400146484, "logits_per_token": -7.537370681762695, "logits_per_char": -2.153534480503627, "num_chars": 7}, {"sum_logits": -4.632022380828857, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.62469482421875, "logits_per_token": -2.3160111904144287, "logits_per_char": -0.5146691534254286, "num_chars": 9}, {"sum_logits": -13.799872398376465, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.980632781982422, "logits_per_token": -6.899936199188232, "logits_per_char": -1.724984049797058, "num_chars": 8}, {"sum_logits": -13.290751457214355, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.441293716430664, "logits_per_token": -6.645375728607178, "logits_per_char": -1.3290751457214356, "num_chars": 10}, {"sum_logits": -12.627212524414062, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.787019729614258, "logits_per_token": -3.1568031311035156, "logits_per_char": -1.2627212524414062, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1165, "native_id": "6eb57102b44ab74163d8f9821cbdabd0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.020294189453125, "incorrect_loss_raw": 8.630706548690796, "correct_loss_per_char": 0.456390380859375, "incorrect_loss_per_char": 0.7611512032422152, "correct_loss_per_token": 5.020294189453125, "incorrect_loss_per_token": 3.8637967308362327, "correct_loss_uncond": -7.908784866333008, "incorrect_loss_uncond": -9.565784454345703}, "model_output": [{"sum_logits": -5.020294189453125, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.929079055786133, "logits_per_token": -5.020294189453125, "logits_per_char": -0.456390380859375, "num_chars": 11}, {"sum_logits": -6.194587707519531, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.258064270019531, "logits_per_token": -3.0972938537597656, "logits_per_char": -0.5631443370472301, "num_chars": 11}, {"sum_logits": -12.451136589050293, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -23.22765350341797, "logits_per_token": -3.1127841472625732, "logits_per_char": -0.8300757726033529, "num_chars": 15}, {"sum_logits": -9.947989463806152, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.86443519592285, "logits_per_token": -3.3159964879353843, "logits_per_char": -0.6631992975870769, "num_chars": 15}, {"sum_logits": -5.929112434387207, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.435811042785645, "logits_per_token": -5.929112434387207, "logits_per_char": -0.9881854057312012, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1166, "native_id": "63861ac5e633db9090704ae315ef6f93", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.303500175476074, "incorrect_loss_raw": 11.519696950912476, "correct_loss_per_char": 0.3290714536394392, "incorrect_loss_per_char": 1.3726396600405375, "correct_loss_per_token": 2.303500175476074, "incorrect_loss_per_token": 8.603802760442097, "correct_loss_uncond": -12.231581687927246, "incorrect_loss_uncond": -4.4756019115448}, "model_output": [{"sum_logits": -5.633700370788574, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.82812213897705, "logits_per_token": -5.633700370788574, "logits_per_char": -1.126740074157715, "num_chars": 5}, {"sum_logits": -17.495365142822266, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -25.08257484436035, "logits_per_token": -5.831788380940755, "logits_per_char": -1.0934603214263916, "num_chars": 16}, {"sum_logits": -13.310291290283203, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.429417610168457, "logits_per_token": -13.310291290283203, "logits_per_char": -1.6637864112854004, "num_chars": 8}, {"sum_logits": -9.63943099975586, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.641080856323242, "logits_per_token": -9.63943099975586, "logits_per_char": -1.6065718332926433, "num_chars": 6}, {"sum_logits": -2.303500175476074, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": true, "sum_logits_uncond": -14.53508186340332, "logits_per_token": -2.303500175476074, "logits_per_char": -0.3290714536394392, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1167, "native_id": "8058c566a4f488033d00e6520b17caea", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.001065254211426, "incorrect_loss_raw": 8.207290530204773, "correct_loss_per_char": 0.5000887711842855, "incorrect_loss_per_char": 0.7375852086823502, "correct_loss_per_token": 6.001065254211426, "incorrect_loss_per_token": 5.983409782250722, "correct_loss_uncond": -7.836835861206055, "incorrect_loss_uncond": -7.527157187461853}, "model_output": [{"sum_logits": -8.728829383850098, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -13.089706420898438, "logits_per_token": -8.728829383850098, "logits_per_char": -0.6234878131321498, "num_chars": 14}, {"sum_logits": -8.787681579589844, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -13.703721046447754, "logits_per_token": -8.787681579589844, "logits_per_char": -1.0984601974487305, "num_chars": 8}, {"sum_logits": -7.877466678619385, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -16.95277214050293, "logits_per_token": -3.9387333393096924, "logits_per_char": -0.6564555565516154, "num_chars": 12}, {"sum_logits": -6.001065254211426, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -13.83790111541748, "logits_per_token": -6.001065254211426, "logits_per_char": -0.5000887711842855, "num_chars": 12}, {"sum_logits": -7.435184478759766, "num_tokens": 3, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -19.191591262817383, "logits_per_token": -2.4783948262532554, "logits_per_char": -0.571937267596905, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1168, "native_id": "57b83653d82b27d32bc39228130f3516", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.368607521057129, "incorrect_loss_raw": 11.707643747329712, "correct_loss_per_char": 1.2960759401321411, "incorrect_loss_per_char": 1.2803267956984163, "correct_loss_per_token": 10.368607521057129, "incorrect_loss_per_token": 9.965015649795532, "correct_loss_uncond": -4.357715606689453, "incorrect_loss_uncond": -3.851104974746704}, "model_output": [{"sum_logits": -6.625310897827148, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.255719184875488, "logits_per_token": -6.625310897827148, "logits_per_char": -0.7361456553141276, "num_chars": 9}, {"sum_logits": -13.108220100402832, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.76192283630371, "logits_per_token": -13.108220100402832, "logits_per_char": -1.1916563727638938, "num_chars": 11}, {"sum_logits": -13.15601921081543, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.202067375183105, "logits_per_token": -13.15601921081543, "logits_per_char": -1.6445024013519287, "num_chars": 8}, {"sum_logits": -13.941024780273438, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.01528549194336, "logits_per_token": -6.970512390136719, "logits_per_char": -1.5490027533637152, "num_chars": 9}, {"sum_logits": -10.368607521057129, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.726323127746582, "logits_per_token": -10.368607521057129, "logits_per_char": -1.2960759401321411, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1169, "native_id": "410f907f817dd7aa8e73291a918d3d86", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.997918128967285, "incorrect_loss_raw": 9.176578879356384, "correct_loss_per_char": 0.8329863548278809, "incorrect_loss_per_char": 1.1304300826091271, "correct_loss_per_token": 4.997918128967285, "incorrect_loss_per_token": 7.380693256855011, "correct_loss_uncond": -10.214426040649414, "incorrect_loss_uncond": -6.634406208992004}, "model_output": [{"sum_logits": -13.527568817138672, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.733631134033203, "logits_per_token": -13.527568817138672, "logits_per_char": -2.2545948028564453, "num_chars": 6}, {"sum_logits": -8.811661720275879, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.663012504577637, "logits_per_token": -8.811661720275879, "logits_per_char": -1.1014577150344849, "num_chars": 8}, {"sum_logits": -7.208782196044922, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.160451889038086, "logits_per_token": -3.604391098022461, "logits_per_char": -0.5149130140032087, "num_chars": 14}, {"sum_logits": -4.997918128967285, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.2123441696167, "logits_per_token": -4.997918128967285, "logits_per_char": -0.8329863548278809, "num_chars": 6}, {"sum_logits": -7.1583027839660645, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.686844825744629, "logits_per_token": -3.5791513919830322, "logits_per_char": -0.6507547985423695, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1170, "native_id": "506c2dbfe7b00a82bfdf0507a8de88fb", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.482246398925781, "incorrect_loss_raw": 11.927778244018555, "correct_loss_per_char": 1.1852807998657227, "incorrect_loss_per_char": 1.3686434047562734, "correct_loss_per_token": 3.1607487996419272, "incorrect_loss_per_token": 7.667614380518596, "correct_loss_uncond": -6.33201789855957, "incorrect_loss_uncond": -3.9423632621765137}, "model_output": [{"sum_logits": -10.874994277954102, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.241706848144531, "logits_per_token": -10.874994277954102, "logits_per_char": -1.5535706111363001, "num_chars": 7}, {"sum_logits": -13.083551406860352, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.770219802856445, "logits_per_token": -6.541775703430176, "logits_per_char": -1.635443925857544, "num_chars": 8}, {"sum_logits": -15.748319625854492, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.213911056518555, "logits_per_token": -5.249439875284831, "logits_per_char": -0.6847095489501953, "num_chars": 23}, {"sum_logits": -8.004247665405273, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -8.004247665405273, "logits_per_char": -1.6008495330810546, "num_chars": 5}, {"sum_logits": -9.482246398925781, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.814264297485352, "logits_per_token": -3.1607487996419272, "logits_per_char": -1.1852807998657227, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1171, "native_id": "42520bf3f93f8de23670044e019001a3", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.4341721534729, "incorrect_loss_raw": 8.99017482995987, "correct_loss_per_char": 0.54341721534729, "incorrect_loss_per_char": 0.9917928834756214, "correct_loss_per_token": 2.71708607673645, "incorrect_loss_per_token": 5.952979852755864, "correct_loss_uncond": -13.436896800994873, "incorrect_loss_uncond": -6.424541652202606}, "model_output": [{"sum_logits": -7.982852458953857, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.071391105651855, "logits_per_token": -7.982852458953857, "logits_per_char": -1.3304754098256428, "num_chars": 6}, {"sum_logits": -15.581363677978516, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.162473678588867, "logits_per_token": -5.193787892659505, "logits_per_char": -1.2984469731648762, "num_chars": 12}, {"sum_logits": -8.874074935913086, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.753023147583008, "logits_per_token": -8.874074935913086, "logits_per_char": -0.9860083262125651, "num_chars": 9}, {"sum_logits": -3.5224082469940186, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.671977996826172, "logits_per_token": -1.7612041234970093, "logits_per_char": -0.3522408246994019, "num_chars": 10}, {"sum_logits": -5.4341721534729, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.871068954467773, "logits_per_token": -2.71708607673645, "logits_per_char": -0.54341721534729, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1172, "native_id": "5e260e1d96187716888cbd968010bb65", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.420563697814941, "incorrect_loss_raw": 8.247604370117188, "correct_loss_per_char": 0.6717978645773495, "incorrect_loss_per_char": 1.1646628958838328, "correct_loss_per_token": 3.806854565938314, "incorrect_loss_per_token": 5.872942686080933, "correct_loss_uncond": -10.584273338317871, "incorrect_loss_uncond": -6.084975242614746}, "model_output": [{"sum_logits": -10.047080993652344, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.660407066345215, "logits_per_token": -5.023540496826172, "logits_per_char": -0.8372567494710287, "num_chars": 12}, {"sum_logits": -7.674952507019043, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.889653205871582, "logits_per_token": -7.674952507019043, "logits_per_char": -1.2791587511698406, "num_chars": 6}, {"sum_logits": -8.950212478637695, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.561616897583008, "logits_per_token": -4.475106239318848, "logits_per_char": -1.278601782662528, "num_chars": 7}, {"sum_logits": -11.420563697814941, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.004837036132812, "logits_per_token": -3.806854565938314, "logits_per_char": -0.6717978645773495, "num_chars": 17}, {"sum_logits": -6.318171501159668, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.21864128112793, "logits_per_token": -6.318171501159668, "logits_per_char": -1.2636343002319337, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1173, "native_id": "ed50555f8db2b8f66caf9868dcd7e13b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.085748672485352, "incorrect_loss_raw": 9.428110599517822, "correct_loss_per_char": 0.7873054080539279, "incorrect_loss_per_char": 0.943061399128702, "correct_loss_per_token": 3.542874336242676, "incorrect_loss_per_token": 5.932716727256775, "correct_loss_uncond": -8.86050033569336, "incorrect_loss_uncond": -8.029993772506714}, "model_output": [{"sum_logits": -7.085748672485352, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.946249008178711, "logits_per_token": -3.542874336242676, "logits_per_char": -0.7873054080539279, "num_chars": 9}, {"sum_logits": -15.297788619995117, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -25.726177215576172, "logits_per_token": -7.648894309997559, "logits_per_char": -1.2748157183329265, "num_chars": 12}, {"sum_logits": -12.665362358093262, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.803524017333984, "logits_per_token": -6.332681179046631, "logits_per_char": -0.7915851473808289, "num_chars": 16}, {"sum_logits": -2.745152473449707, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.114952087402344, "logits_per_token": -2.745152473449707, "logits_per_char": -0.3050169414944119, "num_chars": 9}, {"sum_logits": -7.004138946533203, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -11.187764167785645, "logits_per_token": -7.004138946533203, "logits_per_char": -1.4008277893066405, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1174, "native_id": "a8c284637dabc87745a7eb05d4f7fcbc", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.417131781578064, "incorrect_loss_raw": 11.098344206809998, "correct_loss_per_char": 0.1574590868420071, "incorrect_loss_per_char": 1.381569323369435, "correct_loss_per_token": 1.417131781578064, "incorrect_loss_per_token": 7.270522856712342, "correct_loss_uncond": -11.369150757789612, "incorrect_loss_uncond": -4.646365761756897}, "model_output": [{"sum_logits": -19.13910675048828, "num_tokens": 5, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -23.054372787475586, "logits_per_token": -3.8278213500976563, "logits_per_char": -0.9569553375244141, "num_chars": 20}, {"sum_logits": -7.321671009063721, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.126147270202637, "logits_per_token": -7.321671009063721, "logits_per_char": -1.8304177522659302, "num_chars": 4}, {"sum_logits": -7.438382148742676, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.235330581665039, "logits_per_token": -7.438382148742676, "logits_per_char": -1.2397303581237793, "num_chars": 6}, {"sum_logits": -1.417131781578064, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -12.786282539367676, "logits_per_token": -1.417131781578064, "logits_per_char": -0.1574590868420071, "num_chars": 9}, {"sum_logits": -10.494216918945312, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.562989234924316, "logits_per_token": -10.494216918945312, "logits_per_char": -1.499173845563616, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1175, "native_id": "5758a0fb686071e95d95b1cfad5299a0", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.718564987182617, "incorrect_loss_raw": 12.337917566299438, "correct_loss_per_char": 0.9765470822652181, "incorrect_loss_per_char": 0.9744730959034928, "correct_loss_per_token": 3.9061883290608725, "incorrect_loss_per_token": 7.409959435462952, "correct_loss_uncond": -7.217733383178711, "incorrect_loss_uncond": -4.58682107925415}, "model_output": [{"sum_logits": -9.92800521850586, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.736008644104004, "logits_per_token": -9.92800521850586, "logits_per_char": -0.9025459289550781, "num_chars": 11}, {"sum_logits": -12.999236106872559, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.742107391357422, "logits_per_token": -6.499618053436279, "logits_per_char": -0.7221797837151421, "num_chars": 18}, {"sum_logits": -11.718564987182617, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.936298370361328, "logits_per_token": -3.9061883290608725, "logits_per_char": -0.9765470822652181, "num_chars": 12}, {"sum_logits": -17.035146713256836, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.34868812561035, "logits_per_token": -8.517573356628418, "logits_per_char": -1.4195955594380696, "num_chars": 12}, {"sum_logits": -9.3892822265625, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.872150421142578, "logits_per_token": -4.69464111328125, "logits_per_char": -0.8535711115056818, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1176, "native_id": "d986f17acb3ed19c77e3ca3f98c026b9", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.003772735595703, "incorrect_loss_raw": 22.528573989868164, "correct_loss_per_char": 0.5002095964219835, "incorrect_loss_per_char": 1.3191725643452723, "correct_loss_per_token": 4.501886367797852, "incorrect_loss_per_token": 8.247836271921793, "correct_loss_uncond": -12.428277969360352, "incorrect_loss_uncond": -3.396505355834961}, "model_output": [{"sum_logits": -30.107925415039062, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -29.575462341308594, "logits_per_token": -10.035975138346354, "logits_per_char": -1.309040235436481, "num_chars": 23}, {"sum_logits": -19.095766067504883, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.772315979003906, "logits_per_token": -9.547883033752441, "logits_per_char": -1.4689050821157603, "num_chars": 13}, {"sum_logits": -28.191261291503906, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -34.101112365722656, "logits_per_token": -7.047815322875977, "logits_per_char": -1.3424410138811385, "num_chars": 21}, {"sum_logits": -9.003772735595703, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.432050704956055, "logits_per_token": -4.501886367797852, "logits_per_char": -0.5002095964219835, "num_chars": 18}, {"sum_logits": -12.719343185424805, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.251426696777344, "logits_per_token": -6.359671592712402, "logits_per_char": -1.1563039259477095, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1177, "native_id": "4a4f6408fae400ce0beb5bea0f9913e9", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.1904959678649902, "incorrect_loss_raw": 8.736571907997131, "correct_loss_per_char": 0.07002917458029355, "incorrect_loss_per_char": 1.1099429158937364, "correct_loss_per_token": 0.5952479839324951, "incorrect_loss_per_token": 6.426235914230347, "correct_loss_uncond": -15.812893390655518, "incorrect_loss_uncond": -6.726888298988342}, "model_output": [{"sum_logits": -10.723501205444336, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.580500602722168, "logits_per_token": -10.723501205444336, "logits_per_char": -1.340437650680542, "num_chars": 8}, {"sum_logits": -12.849268913269043, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.186037063598633, "logits_per_token": -6.4246344566345215, "logits_per_char": -1.8356098447527205, "num_chars": 7}, {"sum_logits": -5.633419036865234, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.549616813659668, "logits_per_token": -2.816709518432617, "logits_per_char": -0.6259354485405816, "num_chars": 9}, {"sum_logits": -5.740098476409912, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.537686347961426, "logits_per_token": -5.740098476409912, "logits_per_char": -0.6377887196011014, "num_chars": 9}, {"sum_logits": -1.1904959678649902, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": true, "sum_logits_uncond": -17.003389358520508, "logits_per_token": -0.5952479839324951, "logits_per_char": -0.07002917458029355, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1178, "native_id": "8c655f3a55bde41aad880f138d7a445d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.448457717895508, "incorrect_loss_raw": 9.815754652023315, "correct_loss_per_char": 1.4896915435791016, "incorrect_loss_per_char": 2.228141689300537, "correct_loss_per_token": 7.448457717895508, "incorrect_loss_per_token": 9.815754652023315, "correct_loss_uncond": -4.720500946044922, "incorrect_loss_uncond": -3.832711696624756}, "model_output": [{"sum_logits": -10.599630355834961, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.42919921875, "logits_per_token": -10.599630355834961, "logits_per_char": -2.6499075889587402, "num_chars": 4}, {"sum_logits": -8.634748458862305, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.687225341796875, "logits_per_token": -8.634748458862305, "logits_per_char": -1.726949691772461, "num_chars": 5}, {"sum_logits": -7.448457717895508, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.16895866394043, "logits_per_token": -7.448457717895508, "logits_per_char": -1.4896915435791016, "num_chars": 5}, {"sum_logits": -10.599630355834961, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.42919921875, "logits_per_token": -10.599630355834961, "logits_per_char": -2.6499075889587402, "num_chars": 4}, {"sum_logits": -9.429009437561035, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.04824161529541, "logits_per_token": -9.429009437561035, "logits_per_char": -1.8858018875122071, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1179, "native_id": "56417ee33b44f0d916bedfb6fd99b0ec", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.342280864715576, "incorrect_loss_raw": 12.785136938095093, "correct_loss_per_char": 0.3947528058832342, "incorrect_loss_per_char": 1.2496670072986966, "correct_loss_per_token": 4.342280864715576, "incorrect_loss_per_token": 6.820834517478943, "correct_loss_uncond": -10.219858646392822, "incorrect_loss_uncond": -4.063785791397095}, "model_output": [{"sum_logits": -17.552724838256836, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.480022430419922, "logits_per_token": -5.850908279418945, "logits_per_char": -1.9503027598063152, "num_chars": 9}, {"sum_logits": -9.277036666870117, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.238256454467773, "logits_per_token": -9.277036666870117, "logits_per_char": -1.3252909524100167, "num_chars": 7}, {"sum_logits": -14.535577774047852, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.01914405822754, "logits_per_token": -7.267788887023926, "logits_per_char": -0.9084736108779907, "num_chars": 16}, {"sum_logits": -4.342280864715576, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.562139511108398, "logits_per_token": -4.342280864715576, "logits_per_char": -0.3947528058832342, "num_chars": 11}, {"sum_logits": -9.775208473205566, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.658267974853516, "logits_per_token": -4.887604236602783, "logits_per_char": -0.8146007061004639, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1180, "native_id": "43fb083962f825ae651d88648bbd2f74", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.41463851928711, "incorrect_loss_raw": 16.09160852432251, "correct_loss_per_char": 0.7439027513776507, "incorrect_loss_per_char": 1.5499366185603998, "correct_loss_per_token": 5.207319259643555, "incorrect_loss_per_token": 7.980644583702087, "correct_loss_uncond": -9.87786865234375, "incorrect_loss_uncond": -0.347383975982666}, "model_output": [{"sum_logits": -9.530941009521484, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -9.530941009521484, "logits_per_char": -0.7942450841267904, "num_chars": 12}, {"sum_logits": -17.11398696899414, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.27862548828125, "logits_per_token": -8.55699348449707, "logits_per_char": -1.9015541076660156, "num_chars": 9}, {"sum_logits": -20.10443687438965, "num_tokens": 4, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.527301788330078, "logits_per_token": -5.026109218597412, "logits_per_char": -1.546495144183819, "num_chars": 13}, {"sum_logits": -10.41463851928711, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.29250717163086, "logits_per_token": -5.207319259643555, "logits_per_char": -0.7439027513776507, "num_chars": 14}, {"sum_logits": -17.617069244384766, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.79250717163086, "logits_per_token": -8.808534622192383, "logits_per_char": -1.9574521382649739, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1181, "native_id": "aed771629c8dbd0c2587891e98030607", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.742021083831787, "incorrect_loss_raw": 9.56163465976715, "correct_loss_per_char": 1.1484042167663575, "incorrect_loss_per_char": 1.3070799864473797, "correct_loss_per_token": 5.742021083831787, "incorrect_loss_per_token": 9.56163465976715, "correct_loss_uncond": -7.022476673126221, "incorrect_loss_uncond": -3.6639007329940796}, "model_output": [{"sum_logits": -10.350627899169922, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.178909301757812, "logits_per_token": -10.350627899169922, "logits_per_char": -1.7251046498616536, "num_chars": 6}, {"sum_logits": -11.400259971618652, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.703615188598633, "logits_per_token": -11.400259971618652, "logits_per_char": -1.628608567374093, "num_chars": 7}, {"sum_logits": -7.493984699249268, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.897298812866211, "logits_per_token": -7.493984699249268, "logits_per_char": -0.7493984699249268, "num_chars": 10}, {"sum_logits": -9.001666069030762, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.122318267822266, "logits_per_token": -9.001666069030762, "logits_per_char": -1.1252082586288452, "num_chars": 8}, {"sum_logits": -5.742021083831787, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.764497756958008, "logits_per_token": -5.742021083831787, "logits_per_char": -1.1484042167663575, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1182, "native_id": "d0a42c8180b4e080aa071dd70fce7e03", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.2957763671875, "incorrect_loss_raw": 15.815919876098633, "correct_loss_per_char": 0.6275431315104166, "incorrect_loss_per_char": 1.289935100646246, "correct_loss_per_token": 5.64788818359375, "incorrect_loss_per_token": 9.169690132141113, "correct_loss_uncond": -7.930034637451172, "incorrect_loss_uncond": -1.4760308265686035}, "model_output": [{"sum_logits": -15.955337524414062, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -19.062339782714844, "logits_per_token": -7.977668762207031, "logits_per_char": -1.139666966029576, "num_chars": 14}, {"sum_logits": -11.2957763671875, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -19.225811004638672, "logits_per_token": -5.64788818359375, "logits_per_char": -0.6275431315104166, "num_chars": 18}, {"sum_logits": -22.26193618774414, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -20.663890838623047, "logits_per_token": -11.13096809387207, "logits_per_char": -1.5901382991245814, "num_chars": 14}, {"sum_logits": -10.093841552734375, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -12.624382019042969, "logits_per_token": -10.093841552734375, "logits_per_char": -1.6823069254557292, "num_chars": 6}, {"sum_logits": -14.952564239501953, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -16.817190170288086, "logits_per_token": -7.476282119750977, "logits_per_char": -0.7476282119750977, "num_chars": 20}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1183, "native_id": "533599262a5dae7c7137cfe69e0e24fb", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.5914359092712402, "incorrect_loss_raw": 8.876126766204834, "correct_loss_per_char": 0.29928632577260333, "incorrect_loss_per_char": 1.1638545682032904, "correct_loss_per_token": 3.5914359092712402, "incorrect_loss_per_token": 8.876126766204834, "correct_loss_uncond": -11.566099643707275, "incorrect_loss_uncond": -4.331298828125}, "model_output": [{"sum_logits": -4.557422161102295, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -13.80385971069336, "logits_per_token": -4.557422161102295, "logits_per_char": -0.45574221611022947, "num_chars": 10}, {"sum_logits": -3.5914359092712402, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -3.5914359092712402, "logits_per_char": -0.29928632577260333, "num_chars": 12}, {"sum_logits": -11.981706619262695, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -12.349469184875488, "logits_per_token": -11.981706619262695, "logits_per_char": -1.9969511032104492, "num_chars": 6}, {"sum_logits": -12.092207908630371, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -13.975624084472656, "logits_per_token": -12.092207908630371, "logits_per_char": -1.3435786565144856, "num_chars": 9}, {"sum_logits": -6.873170375823975, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -12.700749397277832, "logits_per_token": -6.873170375823975, "logits_per_char": -0.8591462969779968, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1184, "native_id": "edd1634d911614590c6b8ca730df95fe", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.381653785705566, "incorrect_loss_raw": 9.70257318019867, "correct_loss_per_char": 0.6710594350641425, "incorrect_loss_per_char": 0.9266376735328081, "correct_loss_per_token": 3.690826892852783, "incorrect_loss_per_token": 5.578982830047607, "correct_loss_uncond": -7.925286293029785, "incorrect_loss_uncond": -8.362717032432556}, "model_output": [{"sum_logits": -8.323745727539062, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.596628189086914, "logits_per_token": -4.161872863769531, "logits_per_char": -0.6936454772949219, "num_chars": 12}, {"sum_logits": -8.054411888122559, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.154966354370117, "logits_per_token": -4.027205944061279, "logits_per_char": -0.6712009906768799, "num_chars": 12}, {"sum_logits": -16.610565185546875, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.839126586914062, "logits_per_token": -8.305282592773438, "logits_per_char": -1.5100513805042615, "num_chars": 11}, {"sum_logits": -7.381653785705566, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.306940078735352, "logits_per_token": -3.690826892852783, "logits_per_char": -0.6710594350641425, "num_chars": 11}, {"sum_logits": -5.821569919586182, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.670439720153809, "logits_per_token": -5.821569919586182, "logits_per_char": -0.8316528456551688, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1185, "native_id": "9a544e9f4847c41a15fdf47ae7b98d8a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.083774089813232, "incorrect_loss_raw": 11.42299199104309, "correct_loss_per_char": 0.635471761226654, "incorrect_loss_per_char": 0.9767946473189763, "correct_loss_per_token": 5.083774089813232, "incorrect_loss_per_token": 5.904507557551066, "correct_loss_uncond": -9.794445514678955, "incorrect_loss_uncond": -6.095369338989258}, "model_output": [{"sum_logits": -7.453896522521973, "num_tokens": 1, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -13.023554801940918, "logits_per_token": -7.453896522521973, "logits_per_char": -0.9317370653152466, "num_chars": 8}, {"sum_logits": -5.083774089813232, "num_tokens": 1, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -14.878219604492188, "logits_per_token": -5.083774089813232, "logits_per_char": -0.635471761226654, "num_chars": 8}, {"sum_logits": -9.067665100097656, "num_tokens": 2, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -18.17926597595215, "logits_per_token": -4.533832550048828, "logits_per_char": -0.7556387583414713, "num_chars": 12}, {"sum_logits": -17.729412078857422, "num_tokens": 3, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -21.02561378479004, "logits_per_token": -5.909804026285808, "logits_per_char": -1.2663865770612444, "num_chars": 14}, {"sum_logits": -11.440994262695312, "num_tokens": 2, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -17.84501075744629, "logits_per_token": -5.720497131347656, "logits_per_char": -0.9534161885579427, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1186, "native_id": "26bd85f05d29863ed777a4f1a4b8fa63", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.2804975509643555, "incorrect_loss_raw": 16.375916004180908, "correct_loss_per_char": 0.3280497550964355, "incorrect_loss_per_char": 1.2141895785476222, "correct_loss_per_token": 3.2804975509643555, "incorrect_loss_per_token": 6.613535960515341, "correct_loss_uncond": -10.236645698547363, "incorrect_loss_uncond": -2.547455072402954}, "model_output": [{"sum_logits": -21.866239547729492, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.323509216308594, "logits_per_token": -7.288746515909831, "logits_per_char": -1.5618742534092493, "num_chars": 14}, {"sum_logits": -14.692514419555664, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.352984428405762, "logits_per_token": -7.346257209777832, "logits_per_char": -1.0494653156825475, "num_chars": 14}, {"sum_logits": -13.025020599365234, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.39490509033203, "logits_per_token": -6.512510299682617, "logits_per_char": -1.1840927817604758, "num_chars": 11}, {"sum_logits": -3.2804975509643555, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.517143249511719, "logits_per_token": -3.2804975509643555, "logits_per_char": -0.3280497550964355, "num_chars": 10}, {"sum_logits": -15.919889450073242, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.622085571289062, "logits_per_token": -5.306629816691081, "logits_per_char": -1.0613259633382162, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1187, "native_id": "3884d82524f2337ce53ce64776293cf7", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.156569480895996, "incorrect_loss_raw": 8.611862182617188, "correct_loss_per_char": 0.7156569480895996, "incorrect_loss_per_char": 0.837663757801056, "correct_loss_per_token": 3.578284740447998, "incorrect_loss_per_token": 5.608972549438477, "correct_loss_uncond": -13.110142707824707, "incorrect_loss_uncond": -6.527202606201172}, "model_output": [{"sum_logits": -7.156569480895996, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -20.266712188720703, "logits_per_token": -3.578284740447998, "logits_per_char": -0.7156569480895996, "num_chars": 10}, {"sum_logits": -9.488056182861328, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -18.2464542388916, "logits_per_token": -4.744028091430664, "logits_per_char": -0.6325370788574218, "num_chars": 15}, {"sum_logits": -11.276727676391602, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -13.451674461364746, "logits_per_token": -11.276727676391602, "logits_per_char": -1.4095909595489502, "num_chars": 8}, {"sum_logits": -9.690040588378906, "num_tokens": 4, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -17.289722442626953, "logits_per_token": -2.4225101470947266, "logits_per_char": -0.5100021362304688, "num_chars": 19}, {"sum_logits": -3.992624282836914, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -11.568408012390137, "logits_per_token": -3.992624282836914, "logits_per_char": -0.7985248565673828, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1188, "native_id": "acb3147d946db3b06a596d48e0be56cf", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.525455474853516, "incorrect_loss_raw": 11.928574800491333, "correct_loss_per_char": 1.1050910949707031, "incorrect_loss_per_char": 1.7240527898837357, "correct_loss_per_token": 5.525455474853516, "incorrect_loss_per_token": 9.577781915664673, "correct_loss_uncond": -8.83413314819336, "incorrect_loss_uncond": -3.3810362815856934}, "model_output": [{"sum_logits": -5.525455474853516, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.359588623046875, "logits_per_token": -5.525455474853516, "logits_per_char": -1.1050910949707031, "num_chars": 5}, {"sum_logits": -18.80634307861328, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.126605987548828, "logits_per_token": -9.40317153930664, "logits_per_char": -1.4466417752779448, "num_chars": 13}, {"sum_logits": -7.578920364379883, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.587641716003418, "logits_per_token": -7.578920364379883, "logits_per_char": -1.8947300910949707, "num_chars": 4}, {"sum_logits": -11.64684009552002, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.924269676208496, "logits_per_token": -11.64684009552002, "logits_per_char": -1.9411400159200032, "num_chars": 6}, {"sum_logits": -9.682195663452148, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.599926948547363, "logits_per_token": -9.682195663452148, "logits_per_char": -1.6136992772420247, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1189, "native_id": "52ab95f9216f1994e37cc08f7f258f13", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.612771034240723, "incorrect_loss_raw": 13.718321561813354, "correct_loss_per_char": 0.7075180689493815, "incorrect_loss_per_char": 0.9788001239569479, "correct_loss_per_token": 5.306385517120361, "incorrect_loss_per_token": 6.259686549504598, "correct_loss_uncond": -12.127057075500488, "incorrect_loss_uncond": -6.620413064956665}, "model_output": [{"sum_logits": -14.387381553649902, "num_tokens": 3, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -19.57058334350586, "logits_per_token": -4.795793851216634, "logits_per_char": -1.1067216579730694, "num_chars": 13}, {"sum_logits": -20.681577682495117, "num_tokens": 2, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -23.27791976928711, "logits_per_token": -10.340788841247559, "logits_per_char": -1.216563393087948, "num_chars": 17}, {"sum_logits": -10.612771034240723, "num_tokens": 2, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -22.73982810974121, "logits_per_token": -5.306385517120361, "logits_per_char": -0.7075180689493815, "num_chars": 15}, {"sum_logits": -10.368907928466797, "num_tokens": 2, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -19.614906311035156, "logits_per_token": -5.184453964233398, "logits_per_char": -1.0368907928466797, "num_chars": 10}, {"sum_logits": -9.435419082641602, "num_tokens": 2, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -18.891529083251953, "logits_per_token": -4.717709541320801, "logits_per_char": -0.5550246519200942, "num_chars": 17}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1190, "native_id": "f60641f550d5ee44ac1bedcaf6ad6357", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.639915943145752, "incorrect_loss_raw": 11.995312452316284, "correct_loss_per_char": 0.1639915943145752, "incorrect_loss_per_char": 1.3587678437466386, "correct_loss_per_token": 0.819957971572876, "incorrect_loss_per_token": 7.057165026664734, "correct_loss_uncond": -14.057752132415771, "incorrect_loss_uncond": -3.89935564994812}, "model_output": [{"sum_logits": -13.259206771850586, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.956235885620117, "logits_per_token": -6.629603385925293, "logits_per_char": -1.019938982450045, "num_chars": 13}, {"sum_logits": -1.639915943145752, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -15.697668075561523, "logits_per_token": -0.819957971572876, "logits_per_char": -0.1639915943145752, "num_chars": 10}, {"sum_logits": -8.476070404052734, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.094871520996094, "logits_per_token": -8.476070404052734, "logits_per_char": -2.1190176010131836, "num_chars": 4}, {"sum_logits": -11.864519119262695, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.009998321533203, "logits_per_token": -5.932259559631348, "logits_per_char": -0.9887099266052246, "num_chars": 12}, {"sum_logits": -14.381453514099121, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.517566680908203, "logits_per_token": -7.1907267570495605, "logits_per_char": -1.3074048649181018, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1191, "native_id": "d9835ede7a0ed79325de13ca95b85b78", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.690677642822266, "incorrect_loss_raw": 10.03855013847351, "correct_loss_per_char": 1.3363347053527832, "incorrect_loss_per_char": 1.027274602035635, "correct_loss_per_token": 3.5635592142740884, "incorrect_loss_per_token": 6.474309802055359, "correct_loss_uncond": -6.008081436157227, "incorrect_loss_uncond": -6.069244623184204}, "model_output": [{"sum_logits": -8.966992378234863, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.312360763549805, "logits_per_token": -4.483496189117432, "logits_per_char": -0.6897686444796048, "num_chars": 13}, {"sum_logits": -10.2755126953125, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.23485565185547, "logits_per_token": -5.13775634765625, "logits_per_char": -0.9341375177556818, "num_chars": 11}, {"sum_logits": -11.640277862548828, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.835836410522461, "logits_per_token": -11.640277862548828, "logits_per_char": -1.4550347328186035, "num_chars": 8}, {"sum_logits": -10.690677642822266, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.698759078979492, "logits_per_token": -3.5635592142740884, "logits_per_char": -1.3363347053527832, "num_chars": 8}, {"sum_logits": -9.271417617797852, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.048126220703125, "logits_per_token": -4.635708808898926, "logits_per_char": -1.0301575130886502, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1192, "native_id": "2987db72e66f5fa0015ac64f9b3614ec", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.043149948120117, "incorrect_loss_raw": 12.335381984710693, "correct_loss_per_char": 0.5869291623433431, "incorrect_loss_per_char": 1.145243089095406, "correct_loss_per_token": 3.5215749740600586, "incorrect_loss_per_token": 6.633138537406921, "correct_loss_uncond": -10.301542282104492, "incorrect_loss_uncond": -6.557774543762207}, "model_output": [{"sum_logits": -7.043149948120117, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.34469223022461, "logits_per_token": -3.5215749740600586, "logits_per_char": -0.5869291623433431, "num_chars": 12}, {"sum_logits": -9.318559646606445, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.067514419555664, "logits_per_token": -9.318559646606445, "logits_per_char": -0.9318559646606446, "num_chars": 10}, {"sum_logits": -5.01490592956543, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -9.980762481689453, "logits_per_token": -5.01490592956543, "logits_per_char": -1.002981185913086, "num_chars": 5}, {"sum_logits": -13.788291931152344, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.022357940673828, "logits_per_token": -6.894145965576172, "logits_per_char": -1.723536491394043, "num_chars": 8}, {"sum_logits": -21.219770431518555, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -29.501991271972656, "logits_per_token": -5.304942607879639, "logits_per_char": -0.9225987144138502, "num_chars": 23}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1193, "native_id": "8b548832703a8c68a788e2f9c0e222ae", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.818913459777832, "incorrect_loss_raw": 10.097650647163391, "correct_loss_per_char": 1.9637826919555663, "incorrect_loss_per_char": 1.7670191039168646, "correct_loss_per_token": 9.818913459777832, "incorrect_loss_per_token": 9.18765676021576, "correct_loss_uncond": -3.340329170227051, "incorrect_loss_uncond": -3.961450934410095}, "model_output": [{"sum_logits": -9.818913459777832, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.159242630004883, "logits_per_token": -9.818913459777832, "logits_per_char": -1.9637826919555663, "num_chars": 5}, {"sum_logits": -13.081121444702148, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.294007301330566, "logits_per_token": -13.081121444702148, "logits_per_char": -1.8687316349574499, "num_chars": 7}, {"sum_logits": -7.279951095581055, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.634145736694336, "logits_per_token": -3.6399755477905273, "logits_per_char": -0.8088834550645616, "num_chars": 9}, {"sum_logits": -12.338423728942871, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.337713241577148, "logits_per_token": -12.338423728942871, "logits_per_char": -2.467684745788574, "num_chars": 5}, {"sum_logits": -7.69110631942749, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -11.970540046691895, "logits_per_token": -7.69110631942749, "logits_per_char": -1.9227765798568726, "num_chars": 4}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1194, "native_id": "1ddd239a2a6438a891cb411b82e7f450", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.752176523208618, "incorrect_loss_raw": 11.80801248550415, "correct_loss_per_char": 0.25019786574623804, "incorrect_loss_per_char": 2.0511091096060614, "correct_loss_per_token": 2.752176523208618, "incorrect_loss_per_token": 9.69325876235962, "correct_loss_uncond": -9.597603559494019, "incorrect_loss_uncond": -3.8098108768463135}, "model_output": [{"sum_logits": -10.899229049682617, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.001758575439453, "logits_per_token": -10.899229049682617, "logits_per_char": -1.5570327213832311, "num_chars": 7}, {"sum_logits": -9.575679779052734, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.63223648071289, "logits_per_token": -9.575679779052734, "logits_per_char": -1.3679542541503906, "num_chars": 7}, {"sum_logits": -16.91802978515625, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.901077270507812, "logits_per_token": -8.459014892578125, "logits_per_char": -2.819671630859375, "num_chars": 6}, {"sum_logits": -9.839111328125, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.9362211227417, "logits_per_token": -9.839111328125, "logits_per_char": -2.45977783203125, "num_chars": 4}, {"sum_logits": -2.752176523208618, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.349780082702637, "logits_per_token": -2.752176523208618, "logits_per_char": -0.25019786574623804, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1195, "native_id": "6544a50bf9563d52dbd2034e81df0bf3", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.4954094886779785, "incorrect_loss_raw": 8.991151809692383, "correct_loss_per_char": 0.31776449897072534, "incorrect_loss_per_char": 0.9243529081344605, "correct_loss_per_token": 3.4954094886779785, "incorrect_loss_per_token": 5.463118235270183, "correct_loss_uncond": -10.409783840179443, "incorrect_loss_uncond": -6.992975473403931}, "model_output": [{"sum_logits": -9.08558177947998, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.604952812194824, "logits_per_token": -9.08558177947998, "logits_per_char": -1.0095090866088867, "num_chars": 9}, {"sum_logits": -10.556995391845703, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.41488265991211, "logits_per_token": -3.518998463948568, "logits_per_char": -1.0556995391845703, "num_chars": 10}, {"sum_logits": -5.710824012756348, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.257238388061523, "logits_per_token": -5.710824012756348, "logits_per_char": -0.5710824012756348, "num_chars": 10}, {"sum_logits": -3.4954094886779785, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.905193328857422, "logits_per_token": -3.4954094886779785, "logits_per_char": -0.31776449897072534, "num_chars": 11}, {"sum_logits": -10.6112060546875, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.659435272216797, "logits_per_token": -3.5370686848958335, "logits_per_char": -1.06112060546875, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1196, "native_id": "5ff6ce8ad88459272ffe23d33db4970a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.768548965454102, "incorrect_loss_raw": 13.027279138565063, "correct_loss_per_char": 0.7210686206817627, "incorrect_loss_per_char": 1.507582606209649, "correct_loss_per_token": 5.768548965454102, "incorrect_loss_per_token": 9.212480068206787, "correct_loss_uncond": -8.666437149047852, "incorrect_loss_uncond": -3.6522819995880127}, "model_output": [{"sum_logits": -14.027778625488281, "num_tokens": 2, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -21.15987777709961, "logits_per_token": -7.013889312744141, "logits_per_char": -1.402777862548828, "num_chars": 10}, {"sum_logits": -16.49061393737793, "num_tokens": 2, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -17.08570098876953, "logits_per_token": -8.245306968688965, "logits_per_char": -1.8322904374864366, "num_chars": 9}, {"sum_logits": -9.107501029968262, "num_tokens": 1, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -14.495186805725098, "logits_per_token": -9.107501029968262, "logits_per_char": -1.0119445588853624, "num_chars": 9}, {"sum_logits": -5.768548965454102, "num_tokens": 1, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -14.434986114501953, "logits_per_token": -5.768548965454102, "logits_per_char": -0.7210686206817627, "num_chars": 8}, {"sum_logits": -12.483222961425781, "num_tokens": 1, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -13.977478981018066, "logits_per_token": -12.483222961425781, "logits_per_char": -1.7833175659179688, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1197, "native_id": "2ca05683157a3cd89d82016f13e560ec", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.832569122314453, "incorrect_loss_raw": 7.7946940660476685, "correct_loss_per_char": 0.5369521247016059, "incorrect_loss_per_char": 0.9710636034607887, "correct_loss_per_token": 4.832569122314453, "incorrect_loss_per_token": 5.1742063164711, "correct_loss_uncond": -10.063329696655273, "incorrect_loss_uncond": -7.787565350532532}, "model_output": [{"sum_logits": -4.696183204650879, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.254728317260742, "logits_per_token": -4.696183204650879, "logits_per_char": -0.9392366409301758, "num_chars": 5}, {"sum_logits": -4.832569122314453, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.895898818969727, "logits_per_token": -4.832569122314453, "logits_per_char": -0.5369521247016059, "num_chars": 9}, {"sum_logits": -5.518691062927246, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.388150215148926, "logits_per_token": -5.518691062927246, "logits_per_char": -1.1037382125854491, "num_chars": 5}, {"sum_logits": -6.802950382232666, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.997615814208984, "logits_per_token": -3.401475191116333, "logits_per_char": -0.4251843988895416, "num_chars": 16}, {"sum_logits": -14.160951614379883, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.68854331970215, "logits_per_token": -7.080475807189941, "logits_per_char": -1.4160951614379882, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1198, "native_id": "1a8fbab20bbdf0bbf3961894662d5f7c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.37418794631958, "incorrect_loss_raw": 11.719413757324219, "correct_loss_per_char": 0.137418794631958, "incorrect_loss_per_char": 1.361797016334396, "correct_loss_per_token": 1.37418794631958, "incorrect_loss_per_token": 10.113754272460938, "correct_loss_uncond": -11.054577350616455, "incorrect_loss_uncond": -1.695518970489502}, "model_output": [{"sum_logits": -11.489053726196289, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.485124588012695, "logits_per_token": -11.489053726196289, "logits_per_char": -1.4361317157745361, "num_chars": 8}, {"sum_logits": -12.255928039550781, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.155227661132812, "logits_per_token": -12.255928039550781, "logits_per_char": -1.1141752763227983, "num_chars": 11}, {"sum_logits": -12.84527587890625, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.0106782913208, "logits_per_token": -6.422637939453125, "logits_per_char": -1.4272528754340277, "num_chars": 9}, {"sum_logits": -10.287397384643555, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.008700370788574, "logits_per_token": -10.287397384643555, "logits_per_char": -1.469628197806222, "num_chars": 7}, {"sum_logits": -1.37418794631958, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": true, "sum_logits_uncond": -12.428765296936035, "logits_per_token": -1.37418794631958, "logits_per_char": -0.137418794631958, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1199, "native_id": "5b5d2a8b83282f61c68a870116042f64", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.36179256439209, "incorrect_loss_raw": 9.17958402633667, "correct_loss_per_char": 0.4874356876720082, "incorrect_loss_per_char": 0.8774764215244966, "correct_loss_per_token": 2.680896282196045, "incorrect_loss_per_token": 4.819624900817871, "correct_loss_uncond": -10.395661354064941, "incorrect_loss_uncond": -7.894968271255493}, "model_output": [{"sum_logits": -7.938979148864746, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.102972984313965, "logits_per_token": -7.938979148864746, "logits_per_char": -1.3231631914774578, "num_chars": 6}, {"sum_logits": -10.348103523254395, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -21.41376495361328, "logits_per_token": -3.4493678410847983, "logits_per_char": -0.6087119719561409, "num_chars": 17}, {"sum_logits": -7.952844619750977, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.7159423828125, "logits_per_token": -2.6509482065836587, "logits_per_char": -0.5301896413167317, "num_chars": 15}, {"sum_logits": -10.478408813476562, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.065528869628906, "logits_per_token": -5.239204406738281, "logits_per_char": -1.0478408813476563, "num_chars": 10}, {"sum_logits": -5.36179256439209, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.757453918457031, "logits_per_token": -2.680896282196045, "logits_per_char": -0.4874356876720082, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1200, "native_id": "cfa081b5ba90dae4d7ddb5b7ad9d369a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.0012688636779785, "incorrect_loss_raw": 12.209783554077148, "correct_loss_per_char": 1.7503172159194946, "incorrect_loss_per_char": 1.6912624835968018, "correct_loss_per_token": 7.0012688636779785, "incorrect_loss_per_token": 10.350604057312012, "correct_loss_uncond": -4.984931468963623, "incorrect_loss_uncond": -1.7489266395568848}, "model_output": [{"sum_logits": -12.541961669921875, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.305567741394043, "logits_per_token": -12.541961669921875, "logits_per_char": -2.090326944986979, "num_chars": 6}, {"sum_logits": -11.550668716430664, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.668022155761719, "logits_per_token": -11.550668716430664, "logits_per_char": -1.925111452738444, "num_chars": 6}, {"sum_logits": -9.873067855834961, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.616837501525879, "logits_per_token": -9.873067855834961, "logits_per_char": -1.0970075395372179, "num_chars": 9}, {"sum_logits": -14.873435974121094, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.244413375854492, "logits_per_token": -7.436717987060547, "logits_per_char": -1.652603997124566, "num_chars": 9}, {"sum_logits": -7.0012688636779785, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.986200332641602, "logits_per_token": -7.0012688636779785, "logits_per_char": -1.7503172159194946, "num_chars": 4}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1201, "native_id": "009a7aabffe0583fc2df46656b29c326", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.895501136779785, "incorrect_loss_raw": 12.477970242500305, "correct_loss_per_char": 0.299653933598445, "incorrect_loss_per_char": 1.291643599574528, "correct_loss_per_token": 1.9477505683898926, "incorrect_loss_per_token": 7.732060253620148, "correct_loss_uncond": -11.259321212768555, "incorrect_loss_uncond": -3.098943829536438}, "model_output": [{"sum_logits": -18.628299713134766, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.789621353149414, "logits_per_token": -9.314149856567383, "logits_per_char": -2.069811079237196, "num_chars": 9}, {"sum_logits": -11.944601058959961, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.193441390991211, "logits_per_token": -11.944601058959961, "logits_per_char": -1.706371579851423, "num_chars": 7}, {"sum_logits": -3.895501136779785, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.15482234954834, "logits_per_token": -1.9477505683898926, "logits_per_char": -0.299653933598445, "num_chars": 13}, {"sum_logits": -6.0675835609436035, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.050267219543457, "logits_per_token": -3.0337917804718018, "logits_per_char": -0.5056319634119669, "num_chars": 12}, {"sum_logits": -13.27139663696289, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.27432632446289, "logits_per_token": -6.635698318481445, "logits_per_char": -0.8847597757975261, "num_chars": 15}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1202, "native_id": "2521b3fe6bfd6aeb91f9107dc7c4fbee", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.5687904357910156, "incorrect_loss_raw": 8.976434826850891, "correct_loss_per_char": 0.2568790435791016, "incorrect_loss_per_char": 0.8736852194108661, "correct_loss_per_token": 2.5687904357910156, "incorrect_loss_per_token": 7.00929868221283, "correct_loss_uncond": -11.102619171142578, "incorrect_loss_uncond": -6.368749260902405}, "model_output": [{"sum_logits": -15.737089157104492, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.692981719970703, "logits_per_token": -7.868544578552246, "logits_per_char": -1.0491392771402994, "num_chars": 15}, {"sum_logits": -10.877243995666504, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.442434310913086, "logits_per_token": -10.877243995666504, "logits_per_char": -1.208582666185167, "num_chars": 9}, {"sum_logits": -2.5687904357910156, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.671409606933594, "logits_per_token": -2.5687904357910156, "logits_per_char": -0.2568790435791016, "num_chars": 10}, {"sum_logits": -4.233217239379883, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.907820701599121, "logits_per_token": -4.233217239379883, "logits_per_char": -0.6047453199114118, "num_chars": 7}, {"sum_logits": -5.0581889152526855, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.337499618530273, "logits_per_token": -5.0581889152526855, "logits_per_char": -0.6322736144065857, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1203, "native_id": "3fe45ab3bd4a844ea290050fc0ece8c1_1", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.360246181488037, "incorrect_loss_raw": 14.385569095611572, "correct_loss_per_char": 0.7360246181488037, "incorrect_loss_per_char": 1.2519385190237136, "correct_loss_per_token": 7.360246181488037, "incorrect_loss_per_token": 8.627686619758606, "correct_loss_uncond": -6.415989398956299, "incorrect_loss_uncond": -1.0624134540557861}, "model_output": [{"sum_logits": -11.479216575622559, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.495186805725098, "logits_per_token": -11.479216575622559, "logits_per_char": -1.2754685084025066, "num_chars": 9}, {"sum_logits": -8.929430961608887, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.202157974243164, "logits_per_token": -4.464715480804443, "logits_per_char": -0.6378164972577777, "num_chars": 14}, {"sum_logits": -20.8668212890625, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.24637222290039, "logits_per_token": -10.43341064453125, "logits_per_char": -1.7389017740885417, "num_chars": 12}, {"sum_logits": -16.266807556152344, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.848213195800781, "logits_per_token": -8.133403778076172, "logits_per_char": -1.3555672963460286, "num_chars": 12}, {"sum_logits": -7.360246181488037, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.776235580444336, "logits_per_token": -7.360246181488037, "logits_per_char": -0.7360246181488037, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1204, "native_id": "a2e0f6b5651e5271fcff8d6f5c9adfee", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.779207229614258, "incorrect_loss_raw": 11.370889902114868, "correct_loss_per_char": 0.6779207229614258, "incorrect_loss_per_char": 0.9333550560655015, "correct_loss_per_token": 3.389603614807129, "incorrect_loss_per_token": 6.556081811587015, "correct_loss_uncond": -8.058076858520508, "incorrect_loss_uncond": -4.9511399269104}, "model_output": [{"sum_logits": -6.779207229614258, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.837284088134766, "logits_per_token": -3.389603614807129, "logits_per_char": -0.6779207229614258, "num_chars": 10}, {"sum_logits": -9.26657485961914, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.284173965454102, "logits_per_token": -3.0888582865397134, "logits_per_char": -0.8424158963290128, "num_chars": 11}, {"sum_logits": -10.053953170776367, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.73450756072998, "logits_per_token": -10.053953170776367, "logits_per_char": -1.0053953170776366, "num_chars": 10}, {"sum_logits": -14.14289379119873, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.815725326538086, "logits_per_token": -7.071446895599365, "logits_per_char": -0.8839308619499207, "num_chars": 16}, {"sum_logits": -12.020137786865234, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.453712463378906, "logits_per_token": -6.010068893432617, "logits_per_char": -1.0016781489054363, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1205, "native_id": "d6900a01a9dd6627b4bb22b0f6d191a5", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.485034942626953, "incorrect_loss_raw": 16.224674463272095, "correct_loss_per_char": 0.6856293678283691, "incorrect_loss_per_char": 1.0592828922143827, "correct_loss_per_token": 2.7425174713134766, "incorrect_loss_per_token": 4.615114132563273, "correct_loss_uncond": -14.69441032409668, "incorrect_loss_uncond": -8.144031286239624}, "model_output": [{"sum_logits": -20.84442138671875, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -33.29936218261719, "logits_per_token": -6.948140462239583, "logits_per_char": -1.8949473987926135, "num_chars": 11}, {"sum_logits": -14.012679100036621, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.53983497619629, "logits_per_token": -4.67089303334554, "logits_per_char": -0.8242752411786247, "num_chars": 17}, {"sum_logits": -19.034656524658203, "num_tokens": 6, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -22.968122482299805, "logits_per_token": -3.1724427541097007, "logits_per_char": -0.9064122154599145, "num_chars": 21}, {"sum_logits": -11.006940841674805, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.667503356933594, "logits_per_token": -3.668980280558268, "logits_per_char": -0.611496713426378, "num_chars": 18}, {"sum_logits": -5.485034942626953, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.179445266723633, "logits_per_token": -2.7425174713134766, "logits_per_char": -0.6856293678283691, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1206, "native_id": "8f2976690c83be6b8fa3a1196dfd9722", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.678144454956055, "incorrect_loss_raw": 10.860276222229004, "correct_loss_per_char": 0.578542963663737, "incorrect_loss_per_char": 1.1924850253613442, "correct_loss_per_token": 4.339072227478027, "incorrect_loss_per_token": 6.50838840007782, "correct_loss_uncond": -8.950462341308594, "incorrect_loss_uncond": -4.556702613830566}, "model_output": [{"sum_logits": -12.959763526916504, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -16.359838485717773, "logits_per_token": -6.479881763458252, "logits_per_char": -1.619970440864563, "num_chars": 8}, {"sum_logits": -10.248493194580078, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -16.277469635009766, "logits_per_token": -5.124246597290039, "logits_per_char": -1.0248493194580077, "num_chars": 10}, {"sum_logits": -11.60684585571289, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -16.418981552124023, "logits_per_token": -5.803422927856445, "logits_per_char": -0.8928342965932993, "num_chars": 13}, {"sum_logits": -8.626002311706543, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -12.611625671386719, "logits_per_token": -8.626002311706543, "logits_per_char": -1.232286044529506, "num_chars": 7}, {"sum_logits": -8.678144454956055, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -17.62860679626465, "logits_per_token": -4.339072227478027, "logits_per_char": -0.578542963663737, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1207, "native_id": "570be8c1edb8c638603dc5c8cae421cc", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.33323860168457, "incorrect_loss_raw": 10.172927737236023, "correct_loss_per_char": 0.7618912288120815, "incorrect_loss_per_char": 1.4312390585740409, "correct_loss_per_token": 5.33323860168457, "incorrect_loss_per_token": 7.467148184776306, "correct_loss_uncond": -7.275298118591309, "incorrect_loss_uncond": -5.900459885597229}, "model_output": [{"sum_logits": -6.318756103515625, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.126147270202637, "logits_per_token": -6.318756103515625, "logits_per_char": -1.5796890258789062, "num_chars": 4}, {"sum_logits": -21.646236419677734, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -22.39358901977539, "logits_per_token": -10.823118209838867, "logits_per_char": -2.1646236419677733, "num_chars": 10}, {"sum_logits": -5.33323860168457, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.608536720275879, "logits_per_token": -5.33323860168457, "logits_per_char": -0.7618912288120815, "num_chars": 7}, {"sum_logits": -4.840286731719971, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.157535552978516, "logits_per_token": -4.840286731719971, "logits_per_char": -0.4033572276433309, "num_chars": 12}, {"sum_logits": -7.886431694030762, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.616278648376465, "logits_per_token": -7.886431694030762, "logits_per_char": -1.5772863388061524, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1208, "native_id": "08d3175de59a639be02f2ebc032d56bd", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.642260551452637, "incorrect_loss_raw": 12.039456129074097, "correct_loss_per_char": 0.5671917971442727, "incorrect_loss_per_char": 1.5317380337488085, "correct_loss_per_token": 4.821130275726318, "incorrect_loss_per_token": 7.203665375709534, "correct_loss_uncond": -10.554156303405762, "incorrect_loss_uncond": -7.001672744750977}, "model_output": [{"sum_logits": -18.846702575683594, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -27.566125869750977, "logits_per_token": -6.282234191894531, "logits_per_char": -1.5705585479736328, "num_chars": 12}, {"sum_logits": -13.557389259338379, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.658775329589844, "logits_per_token": -6.7786946296691895, "logits_per_char": -1.936769894191197, "num_chars": 7}, {"sum_logits": -5.975131988525391, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.224496841430664, "logits_per_token": -5.975131988525391, "logits_per_char": -0.663903554280599, "num_chars": 9}, {"sum_logits": -9.778600692749023, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.715117454528809, "logits_per_token": -9.778600692749023, "logits_per_char": -1.9557201385498046, "num_chars": 5}, {"sum_logits": -9.642260551452637, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.1964168548584, "logits_per_token": -4.821130275726318, "logits_per_char": -0.5671917971442727, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1209, "native_id": "549cf641318edfc0510fa7c7dbb359e1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.535336971282959, "incorrect_loss_raw": 9.436107873916626, "correct_loss_per_char": 0.3239526408059256, "incorrect_loss_per_char": 1.2835878417605444, "correct_loss_per_token": 2.2676684856414795, "incorrect_loss_per_token": 8.119927167892456, "correct_loss_uncond": -14.096474170684814, "incorrect_loss_uncond": -6.821335792541504}, "model_output": [{"sum_logits": -7.005489349365234, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.424250602722168, "logits_per_token": -7.005489349365234, "logits_per_char": -1.7513723373413086, "num_chars": 4}, {"sum_logits": -12.139348983764648, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.998748779296875, "logits_per_token": -12.139348983764648, "logits_per_char": -1.7341927119663783, "num_chars": 7}, {"sum_logits": -4.535336971282959, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.631811141967773, "logits_per_token": -2.2676684856414795, "logits_per_char": -0.3239526408059256, "num_chars": 14}, {"sum_logits": -10.52944564819336, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -22.382278442382812, "logits_per_token": -5.26472282409668, "logits_per_char": -0.7521032605852399, "num_chars": 14}, {"sum_logits": -8.070147514343262, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.224496841430664, "logits_per_token": -8.070147514343262, "logits_per_char": -0.8966830571492513, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1210, "native_id": "dfa23d3422b7294843447b6950d2b476", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.58462905883789, "incorrect_loss_raw": 14.162974119186401, "correct_loss_per_char": 1.0389752705891928, "incorrect_loss_per_char": 1.0071604823707638, "correct_loss_per_token": 5.194876352945964, "incorrect_loss_per_token": 7.081487059593201, "correct_loss_uncond": -5.457292556762695, "incorrect_loss_uncond": -5.5469276905059814}, "model_output": [{"sum_logits": -13.764364242553711, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.992870330810547, "logits_per_token": -6.8821821212768555, "logits_per_char": -0.9176242828369141, "num_chars": 15}, {"sum_logits": -17.35820198059082, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -22.758499145507812, "logits_per_token": -8.67910099029541, "logits_per_char": -1.2398715700422014, "num_chars": 14}, {"sum_logits": -11.1360502243042, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.68854331970215, "logits_per_token": -5.5680251121521, "logits_per_char": -1.11360502243042, "num_chars": 10}, {"sum_logits": -14.393280029296875, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.399694442749023, "logits_per_token": -7.1966400146484375, "logits_per_char": -0.7575410541735197, "num_chars": 19}, {"sum_logits": -15.58462905883789, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.041921615600586, "logits_per_token": -5.194876352945964, "logits_per_char": -1.0389752705891928, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1211, "native_id": "1fe90a4aee405e1aa2279442d28803ae", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.587526321411133, "incorrect_loss_raw": 8.043516159057617, "correct_loss_per_char": 0.8822938601175944, "incorrect_loss_per_char": 0.922531533241272, "correct_loss_per_token": 5.293763160705566, "incorrect_loss_per_token": 4.021758079528809, "correct_loss_uncond": -11.185417175292969, "incorrect_loss_uncond": -9.528716564178467}, "model_output": [{"sum_logits": -8.568160057067871, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.194522857666016, "logits_per_token": -4.2840800285339355, "logits_per_char": -0.9520177841186523, "num_chars": 9}, {"sum_logits": -10.35387897491455, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -18.197635650634766, "logits_per_token": -5.176939487457275, "logits_per_char": -0.8628232479095459, "num_chars": 12}, {"sum_logits": -5.50082540512085, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.5350341796875, "logits_per_token": -2.750412702560425, "logits_per_char": -1.1001650810241699, "num_chars": 5}, {"sum_logits": -7.751200199127197, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -17.361738204956055, "logits_per_token": -3.8756000995635986, "logits_per_char": -0.7751200199127197, "num_chars": 10}, {"sum_logits": -10.587526321411133, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -21.7729434967041, "logits_per_token": -5.293763160705566, "logits_per_char": -0.8822938601175944, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1212, "native_id": "01794dde3ca2991615f1aa2f63fb22e3", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.625762939453125, "incorrect_loss_raw": 10.881549835205078, "correct_loss_per_char": 0.5782203674316406, "incorrect_loss_per_char": 1.0665077542003831, "correct_loss_per_token": 4.625762939453125, "incorrect_loss_per_token": 5.100619157155355, "correct_loss_uncond": -9.083585739135742, "incorrect_loss_uncond": -6.992461204528809}, "model_output": [{"sum_logits": -8.846414566040039, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -17.474275588989258, "logits_per_token": -4.4232072830200195, "logits_per_char": -0.46560076663368627, "num_chars": 19}, {"sum_logits": -8.163738250732422, "num_tokens": 3, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -17.814655303955078, "logits_per_token": -2.721246083577474, "logits_per_char": -0.8163738250732422, "num_chars": 10}, {"sum_logits": -13.217975616455078, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -17.45522689819336, "logits_per_token": -6.608987808227539, "logits_per_char": -1.3217975616455078, "num_chars": 10}, {"sum_logits": -13.298070907592773, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -18.75188636779785, "logits_per_token": -6.649035453796387, "logits_per_char": -1.6622588634490967, "num_chars": 8}, {"sum_logits": -4.625762939453125, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -13.709348678588867, "logits_per_token": -4.625762939453125, "logits_per_char": -0.5782203674316406, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1213, "native_id": "f794e376672c98ac25d8f70506a26e68", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.694199562072754, "incorrect_loss_raw": 12.14742922782898, "correct_loss_per_char": 1.0495856830051966, "incorrect_loss_per_char": 1.4529786335097419, "correct_loss_per_token": 7.347099781036377, "incorrect_loss_per_token": 10.835508346557617, "correct_loss_uncond": -3.859042167663574, "incorrect_loss_uncond": -2.687595844268799}, "model_output": [{"sum_logits": -14.694199562072754, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.553241729736328, "logits_per_token": -7.347099781036377, "logits_per_char": -1.0495856830051966, "num_chars": 14}, {"sum_logits": -12.52370548248291, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.969682693481445, "logits_per_token": -12.52370548248291, "logits_per_char": -1.39152283138699, "num_chars": 9}, {"sum_logits": -14.942347526550293, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.378087997436523, "logits_per_token": -14.942347526550293, "logits_per_char": -1.2451956272125244, "num_chars": 12}, {"sum_logits": -10.628296852111816, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.63059139251709, "logits_per_token": -10.628296852111816, "logits_per_char": -2.125659370422363, "num_chars": 5}, {"sum_logits": -10.495367050170898, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.361738204956055, "logits_per_token": -5.247683525085449, "logits_per_char": -1.0495367050170898, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1214, "native_id": "ace8fa2943ba8414aebdb74b48906fae", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 12.238899230957031, "incorrect_loss_raw": 12.103881359100342, "correct_loss_per_char": 1.0199082692464192, "incorrect_loss_per_char": 1.180431547582659, "correct_loss_per_token": 3.059724807739258, "incorrect_loss_per_token": 6.573023994763692, "correct_loss_uncond": -9.430688858032227, "incorrect_loss_uncond": -4.995327711105347}, "model_output": [{"sum_logits": -13.038675308227539, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.68340492248535, "logits_per_token": -4.346225102742513, "logits_per_char": -1.448741700914171, "num_chars": 9}, {"sum_logits": -8.514891624450684, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.402714729309082, "logits_per_token": -8.514891624450684, "logits_per_char": -1.419148604075114, "num_chars": 6}, {"sum_logits": -12.1308012008667, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.620946884155273, "logits_per_token": -6.06540060043335, "logits_per_char": -0.933138553912823, "num_chars": 13}, {"sum_logits": -14.731157302856445, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.689769744873047, "logits_per_token": -7.365578651428223, "logits_per_char": -0.9206973314285278, "num_chars": 16}, {"sum_logits": -12.238899230957031, "num_tokens": 4, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -21.669588088989258, "logits_per_token": -3.059724807739258, "logits_per_char": -1.0199082692464192, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1215, "native_id": "21ce6f7c5c3d1ad8cf234988c1ad471f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.024090766906738, "incorrect_loss_raw": 7.174564838409424, "correct_loss_per_char": 0.5853408972422282, "incorrect_loss_per_char": 1.1420040298075902, "correct_loss_per_token": 3.512045383453369, "incorrect_loss_per_token": 6.466198801994324, "correct_loss_uncond": -13.141097068786621, "incorrect_loss_uncond": -6.433308839797974}, "model_output": [{"sum_logits": -5.666928291320801, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.537425994873047, "logits_per_token": -2.8334641456604004, "logits_per_char": -0.8095611844744001, "num_chars": 7}, {"sum_logits": -7.024090766906738, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.16518783569336, "logits_per_token": -3.512045383453369, "logits_per_char": -0.5853408972422282, "num_chars": 12}, {"sum_logits": -6.394596099853516, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.641080856323242, "logits_per_token": -6.394596099853516, "logits_per_char": -1.0657660166422527, "num_chars": 6}, {"sum_logits": -8.46210765838623, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.5768461227417, "logits_per_token": -8.46210765838623, "logits_per_char": -1.0577634572982788, "num_chars": 8}, {"sum_logits": -8.174627304077148, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.676141738891602, "logits_per_token": -8.174627304077148, "logits_per_char": -1.6349254608154298, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1216, "native_id": "6c84e79d0595efd99596faa07c4961d0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.9348297119140625, "incorrect_loss_raw": 8.399416446685791, "correct_loss_per_char": 1.1869659423828125, "incorrect_loss_per_char": 0.9851274854607053, "correct_loss_per_token": 5.9348297119140625, "incorrect_loss_per_token": 5.726477265357971, "correct_loss_uncond": -7.712244987487793, "incorrect_loss_uncond": -7.206374883651733}, "model_output": [{"sum_logits": -5.9348297119140625, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.647074699401855, "logits_per_token": -5.9348297119140625, "logits_per_char": -1.1869659423828125, "num_chars": 5}, {"sum_logits": -8.22244644165039, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.815696716308594, "logits_per_token": -4.111223220825195, "logits_per_char": -0.8222446441650391, "num_chars": 10}, {"sum_logits": -3.0997314453125, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.78950309753418, "logits_per_token": -3.0997314453125, "logits_per_char": -0.5166219075520834, "num_chars": 6}, {"sum_logits": -9.114420890808105, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.3323392868042, "logits_per_token": -9.114420890808105, "logits_per_char": -1.1393026113510132, "num_chars": 8}, {"sum_logits": -13.161067008972168, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.485626220703125, "logits_per_token": -6.580533504486084, "logits_per_char": -1.4623407787746854, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1217, "native_id": "88f1fe6cfbcb1a25f25454341c789463", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.8201303482055664, "incorrect_loss_raw": 9.445798516273499, "correct_loss_per_char": 0.25467535654703777, "incorrect_loss_per_char": 0.9966004729528963, "correct_loss_per_token": 1.9100651741027832, "incorrect_loss_per_token": 6.420618891716003, "correct_loss_uncond": -16.555880546569824, "incorrect_loss_uncond": -7.028768420219421}, "model_output": [{"sum_logits": -11.386744499206543, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.799386978149414, "logits_per_token": -5.6933722496032715, "logits_per_char": -0.9488953749338785, "num_chars": 12}, {"sum_logits": -3.8201303482055664, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.37601089477539, "logits_per_token": -1.9100651741027832, "logits_per_char": -0.25467535654703777, "num_chars": 15}, {"sum_logits": -12.814692497253418, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.823257446289062, "logits_per_token": -6.407346248626709, "logits_per_char": -1.6018365621566772, "num_chars": 8}, {"sum_logits": -3.8685717582702637, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.63223648071289, "logits_per_token": -3.8685717582702637, "logits_per_char": -0.5526531083243233, "num_chars": 7}, {"sum_logits": -9.71318531036377, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.643386840820312, "logits_per_token": -9.71318531036377, "logits_per_char": -0.8830168463967063, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1218, "native_id": "5074bcaf0f700c9f3c8c563067af156a", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.787357330322266, "incorrect_loss_raw": 9.810316681861877, "correct_loss_per_char": 0.6430397033691406, "incorrect_loss_per_char": 1.1414134701093037, "correct_loss_per_token": 5.787357330322266, "incorrect_loss_per_token": 7.872919996579488, "correct_loss_uncond": -10.07036304473877, "incorrect_loss_uncond": -4.990447163581848}, "model_output": [{"sum_logits": -8.352668762207031, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.360184669494629, "logits_per_token": -8.352668762207031, "logits_per_char": -1.6705337524414063, "num_chars": 5}, {"sum_logits": -7.995142459869385, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.58857536315918, "logits_per_token": -7.995142459869385, "logits_per_char": -0.7995142459869384, "num_chars": 10}, {"sum_logits": -5.787357330322266, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.857720375061035, "logits_per_token": -5.787357330322266, "logits_per_char": -0.6430397033691406, "num_chars": 9}, {"sum_logits": -11.269075393676758, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.251171112060547, "logits_per_token": -11.269075393676758, "logits_per_char": -1.1269075393676757, "num_chars": 10}, {"sum_logits": -11.624380111694336, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.003124237060547, "logits_per_token": -3.874793370564779, "logits_per_char": -0.9686983426411947, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1219, "native_id": "6a253e076cd2af00e17d9950d70daf47", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.191840171813965, "incorrect_loss_raw": 10.255956888198853, "correct_loss_per_char": 0.4818729512831744, "incorrect_loss_per_char": 1.2485193516526902, "correct_loss_per_token": 4.095920085906982, "incorrect_loss_per_token": 10.255956888198853, "correct_loss_uncond": -10.200840950012207, "incorrect_loss_uncond": -3.8022825717926025}, "model_output": [{"sum_logits": -8.191840171813965, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.392681121826172, "logits_per_token": -4.095920085906982, "logits_per_char": -0.4818729512831744, "num_chars": 17}, {"sum_logits": -11.848433494567871, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.4810791015625, "logits_per_token": -11.848433494567871, "logits_per_char": -1.1848433494567872, "num_chars": 10}, {"sum_logits": -12.15594482421875, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.474529266357422, "logits_per_token": -12.15594482421875, "logits_per_char": -1.5194931030273438, "num_chars": 8}, {"sum_logits": -7.930100440979004, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.742267608642578, "logits_per_token": -7.930100440979004, "logits_per_char": -0.9912625551223755, "num_chars": 8}, {"sum_logits": -9.089348793029785, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.53508186340332, "logits_per_token": -9.089348793029785, "logits_per_char": -1.298478399004255, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1220, "native_id": "5af7c7860e3be61d4cfd814cc109f9d9", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.47913122177124, "incorrect_loss_raw": 14.881514072418213, "correct_loss_per_char": 0.373956561088562, "incorrect_loss_per_char": 1.1535922043565392, "correct_loss_per_token": 2.4930437405904136, "incorrect_loss_per_token": 6.399878025054932, "correct_loss_uncond": -10.526228427886963, "incorrect_loss_uncond": -6.162747859954834}, "model_output": [{"sum_logits": -11.907604217529297, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.233928680419922, "logits_per_token": -5.953802108764648, "logits_per_char": -0.7938402811686198, "num_chars": 15}, {"sum_logits": -16.654064178466797, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -24.630496978759766, "logits_per_token": -4.163516044616699, "logits_per_char": -0.7240897468898607, "num_chars": 23}, {"sum_logits": -7.47913122177124, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.005359649658203, "logits_per_token": -2.4930437405904136, "logits_per_char": -0.373956561088562, "num_chars": 20}, {"sum_logits": -14.302427291870117, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.939943313598633, "logits_per_token": -7.151213645935059, "logits_per_char": -1.4302427291870117, "num_chars": 10}, {"sum_logits": -16.66196060180664, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.372678756713867, "logits_per_token": -8.33098030090332, "logits_per_char": -1.666196060180664, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "03418cf8091a9882619950ffb07429a5"}