{"doc_id": 0, "native_id": 0, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4626020193099976, "incorrect_loss_raw": 1.377543846766154, "correct_loss_per_char": 0.7313010096549988, "incorrect_loss_per_char": 0.688771923383077, "correct_loss_per_token": 1.4626020193099976, "incorrect_loss_per_token": 1.377543846766154, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.545853614807129, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.545853614807129, "logits_per_char": -0.7729268074035645, "num_chars": 2}, {"sum_logits": -1.373976469039917, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.373976469039917, "logits_per_char": -0.6869882345199585, "num_chars": 2}, {"sum_logits": -1.4626020193099976, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.4626020193099976, "logits_per_char": -0.7313010096549988, "num_chars": 2}, {"sum_logits": -1.212801456451416, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": true, "logits_per_token": -1.212801456451416, "logits_per_char": -0.606400728225708, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 1, "native_id": 1, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4446001052856445, "incorrect_loss_raw": 1.3783042430877686, "correct_loss_per_char": 0.7223000526428223, "incorrect_loss_per_char": 0.6891521215438843, "correct_loss_per_token": 1.4446001052856445, "incorrect_loss_per_token": 1.3783042430877686, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3420472145080566, "num_tokens": 1, "num_tokens_all": 479, "is_greedy": false, "logits_per_token": -1.3420472145080566, "logits_per_char": -0.6710236072540283, "num_chars": 2}, {"sum_logits": -1.2866718769073486, "num_tokens": 1, "num_tokens_all": 479, "is_greedy": true, "logits_per_token": -1.2866718769073486, "logits_per_char": -0.6433359384536743, "num_chars": 2}, {"sum_logits": -1.4446001052856445, "num_tokens": 1, "num_tokens_all": 479, "is_greedy": false, "logits_per_token": -1.4446001052856445, "logits_per_char": -0.7223000526428223, "num_chars": 2}, {"sum_logits": -1.5061936378479004, "num_tokens": 1, "num_tokens_all": 479, "is_greedy": false, "logits_per_token": -1.5061936378479004, "logits_per_char": -0.7530968189239502, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 2, "native_id": 2, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5311728715896606, "incorrect_loss_raw": 1.3528168598810832, "correct_loss_per_char": 0.7655864357948303, "incorrect_loss_per_char": 0.6764084299405416, "correct_loss_per_token": 1.5311728715896606, "incorrect_loss_per_token": 1.3528168598810832, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5311728715896606, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.5311728715896606, "logits_per_char": -0.7655864357948303, "num_chars": 2}, {"sum_logits": -1.277451992034912, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": true, "logits_per_token": -1.277451992034912, "logits_per_char": -0.638725996017456, "num_chars": 2}, {"sum_logits": -1.4558924436569214, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.4558924436569214, "logits_per_char": -0.7279462218284607, "num_chars": 2}, {"sum_logits": -1.325106143951416, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.325106143951416, "logits_per_char": -0.662553071975708, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 3, "native_id": 3, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3876533508300781, "incorrect_loss_raw": 1.3945255279541016, "correct_loss_per_char": 0.6938266754150391, "incorrect_loss_per_char": 0.6972627639770508, "correct_loss_per_token": 1.3876533508300781, "incorrect_loss_per_token": 1.3945255279541016, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.469810128211975, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": false, "logits_per_token": -1.469810128211975, "logits_per_char": -0.7349050641059875, "num_chars": 2}, {"sum_logits": -1.3876533508300781, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": false, "logits_per_token": -1.3876533508300781, "logits_per_char": -0.6938266754150391, "num_chars": 2}, {"sum_logits": -1.3960531949996948, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": false, "logits_per_token": -1.3960531949996948, "logits_per_char": -0.6980265974998474, "num_chars": 2}, {"sum_logits": -1.3177132606506348, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": true, "logits_per_token": -1.3177132606506348, "logits_per_char": -0.6588566303253174, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 4, "native_id": 4, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4230189323425293, "incorrect_loss_raw": 1.3818968137105305, "correct_loss_per_char": 0.7115094661712646, "incorrect_loss_per_char": 0.6909484068552653, "correct_loss_per_token": 1.4230189323425293, "incorrect_loss_per_token": 1.3818968137105305, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4230189323425293, "num_tokens": 1, "num_tokens_all": 449, "is_greedy": false, "logits_per_token": -1.4230189323425293, "logits_per_char": -0.7115094661712646, "num_chars": 2}, {"sum_logits": -1.3735930919647217, "num_tokens": 1, "num_tokens_all": 449, "is_greedy": false, "logits_per_token": -1.3735930919647217, "logits_per_char": -0.6867965459823608, "num_chars": 2}, {"sum_logits": -1.2911982536315918, "num_tokens": 1, "num_tokens_all": 449, "is_greedy": true, "logits_per_token": -1.2911982536315918, "logits_per_char": -0.6455991268157959, "num_chars": 2}, {"sum_logits": -1.4808990955352783, "num_tokens": 1, "num_tokens_all": 449, "is_greedy": false, "logits_per_token": -1.4808990955352783, "logits_per_char": -0.7404495477676392, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 5, "native_id": 5, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0128052234649658, "incorrect_loss_raw": 1.5708976586659749, "correct_loss_per_char": 0.5064026117324829, "incorrect_loss_per_char": 0.7854488293329874, "correct_loss_per_token": 1.0128052234649658, "incorrect_loss_per_token": 1.5708976586659749, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0128052234649658, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": true, "logits_per_token": -1.0128052234649658, "logits_per_char": -0.5064026117324829, "num_chars": 2}, {"sum_logits": -1.4064719676971436, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": false, "logits_per_token": -1.4064719676971436, "logits_per_char": -0.7032359838485718, "num_chars": 2}, {"sum_logits": -1.699242353439331, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": false, "logits_per_token": -1.699242353439331, "logits_per_char": -0.8496211767196655, "num_chars": 2}, {"sum_logits": -1.6069786548614502, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": false, "logits_per_token": -1.6069786548614502, "logits_per_char": -0.8034893274307251, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 6, "native_id": 6, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3560919761657715, "incorrect_loss_raw": 1.4070260127385457, "correct_loss_per_char": 0.6780459880828857, "incorrect_loss_per_char": 0.7035130063692728, "correct_loss_per_token": 1.3560919761657715, "incorrect_loss_per_token": 1.4070260127385457, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.498753547668457, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.498753547668457, "logits_per_char": -0.7493767738342285, "num_chars": 2}, {"sum_logits": -1.3560919761657715, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.3560919761657715, "logits_per_char": -0.6780459880828857, "num_chars": 2}, {"sum_logits": -1.3756349086761475, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.3756349086761475, "logits_per_char": -0.6878174543380737, "num_chars": 2}, {"sum_logits": -1.3466895818710327, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": true, "logits_per_token": -1.3466895818710327, "logits_per_char": -0.6733447909355164, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 7, "native_id": 7, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3468395471572876, "incorrect_loss_raw": 1.4108672142028809, "correct_loss_per_char": 0.6734197735786438, "incorrect_loss_per_char": 0.7054336071014404, "correct_loss_per_token": 1.3468395471572876, "incorrect_loss_per_token": 1.4108672142028809, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4970121383666992, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": false, "logits_per_token": -1.4970121383666992, "logits_per_char": -0.7485060691833496, "num_chars": 2}, {"sum_logits": -1.3468395471572876, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": true, "logits_per_token": -1.3468395471572876, "logits_per_char": -0.6734197735786438, "num_chars": 2}, {"sum_logits": -1.349867582321167, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": false, "logits_per_token": -1.349867582321167, "logits_per_char": -0.6749337911605835, "num_chars": 2}, {"sum_logits": -1.3857219219207764, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": false, "logits_per_token": -1.3857219219207764, "logits_per_char": -0.6928609609603882, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 8, "native_id": 8, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2813470363616943, "incorrect_loss_raw": 1.4299967686335247, "correct_loss_per_char": 0.6406735181808472, "incorrect_loss_per_char": 0.7149983843167623, "correct_loss_per_token": 1.2813470363616943, "incorrect_loss_per_token": 1.4299967686335247, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4278990030288696, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": false, "logits_per_token": -1.4278990030288696, "logits_per_char": -0.7139495015144348, "num_chars": 2}, {"sum_logits": -1.4312413930892944, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": false, "logits_per_token": -1.4312413930892944, "logits_per_char": -0.7156206965446472, "num_chars": 2}, {"sum_logits": -1.4308499097824097, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": false, "logits_per_token": -1.4308499097824097, "logits_per_char": -0.7154249548912048, "num_chars": 2}, {"sum_logits": -1.2813470363616943, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": true, "logits_per_token": -1.2813470363616943, "logits_per_char": -0.6406735181808472, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 9, "native_id": 9, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.439144492149353, "incorrect_loss_raw": 1.3787542184193928, "correct_loss_per_char": 0.7195722460746765, "incorrect_loss_per_char": 0.6893771092096964, "correct_loss_per_token": 1.439144492149353, "incorrect_loss_per_token": 1.3787542184193928, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4652501344680786, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.4652501344680786, "logits_per_char": -0.7326250672340393, "num_chars": 2}, {"sum_logits": -1.368533968925476, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.368533968925476, "logits_per_char": -0.684266984462738, "num_chars": 2}, {"sum_logits": -1.439144492149353, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.439144492149353, "logits_per_char": -0.7195722460746765, "num_chars": 2}, {"sum_logits": -1.302478551864624, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": true, "logits_per_token": -1.302478551864624, "logits_per_char": -0.651239275932312, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 10, "native_id": 10, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.459897756576538, "incorrect_loss_raw": 1.3751092751820881, "correct_loss_per_char": 0.729948878288269, "incorrect_loss_per_char": 0.6875546375910441, "correct_loss_per_token": 1.459897756576538, "incorrect_loss_per_token": 1.3751092751820881, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3274199962615967, "num_tokens": 1, "num_tokens_all": 441, "is_greedy": false, "logits_per_token": -1.3274199962615967, "logits_per_char": -0.6637099981307983, "num_chars": 2}, {"sum_logits": -1.544297218322754, "num_tokens": 1, "num_tokens_all": 441, "is_greedy": false, "logits_per_token": -1.544297218322754, "logits_per_char": -0.772148609161377, "num_chars": 2}, {"sum_logits": -1.459897756576538, "num_tokens": 1, "num_tokens_all": 441, "is_greedy": false, "logits_per_token": -1.459897756576538, "logits_per_char": -0.729948878288269, "num_chars": 2}, {"sum_logits": -1.253610610961914, "num_tokens": 1, "num_tokens_all": 441, "is_greedy": true, "logits_per_token": -1.253610610961914, "logits_per_char": -0.626805305480957, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 11, "native_id": 11, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5165832042694092, "incorrect_loss_raw": 1.3579479853312175, "correct_loss_per_char": 0.7582916021347046, "incorrect_loss_per_char": 0.6789739926656088, "correct_loss_per_token": 1.5165832042694092, "incorrect_loss_per_token": 1.3579479853312175, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3909183740615845, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": false, "logits_per_token": -1.3909183740615845, "logits_per_char": -0.6954591870307922, "num_chars": 2}, {"sum_logits": -1.2595559358596802, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": true, "logits_per_token": -1.2595559358596802, "logits_per_char": -0.6297779679298401, "num_chars": 2}, {"sum_logits": -1.4233696460723877, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": false, "logits_per_token": -1.4233696460723877, "logits_per_char": -0.7116848230361938, "num_chars": 2}, {"sum_logits": -1.5165832042694092, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": false, "logits_per_token": -1.5165832042694092, "logits_per_char": -0.7582916021347046, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 12, "native_id": 12, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.388978123664856, "incorrect_loss_raw": 1.4101134538650513, "correct_loss_per_char": 0.694489061832428, "incorrect_loss_per_char": 0.7050567269325256, "correct_loss_per_token": 1.388978123664856, "incorrect_loss_per_token": 1.4101134538650513, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6460464000701904, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": false, "logits_per_token": -1.6460464000701904, "logits_per_char": -0.8230232000350952, "num_chars": 2}, {"sum_logits": -1.4200611114501953, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": false, "logits_per_token": -1.4200611114501953, "logits_per_char": -0.7100305557250977, "num_chars": 2}, {"sum_logits": -1.388978123664856, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": false, "logits_per_token": -1.388978123664856, "logits_per_char": -0.694489061832428, "num_chars": 2}, {"sum_logits": -1.164232850074768, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": true, "logits_per_token": -1.164232850074768, "logits_per_char": -0.582116425037384, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 13, "native_id": 13, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4255385398864746, "incorrect_loss_raw": 1.4352378050486247, "correct_loss_per_char": 0.7127692699432373, "incorrect_loss_per_char": 0.7176189025243124, "correct_loss_per_token": 1.4255385398864746, "incorrect_loss_per_token": 1.4352378050486247, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.380840539932251, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": true, "logits_per_token": -1.380840539932251, "logits_per_char": -0.6904202699661255, "num_chars": 2}, {"sum_logits": -1.4745817184448242, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": false, "logits_per_token": -1.4745817184448242, "logits_per_char": -0.7372908592224121, "num_chars": 2}, {"sum_logits": -1.4502911567687988, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": false, "logits_per_token": -1.4502911567687988, "logits_per_char": -0.7251455783843994, "num_chars": 2}, {"sum_logits": -1.4255385398864746, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": false, "logits_per_token": -1.4255385398864746, "logits_per_char": -0.7127692699432373, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 14, "native_id": 14, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5217705965042114, "incorrect_loss_raw": 1.3522565762201946, "correct_loss_per_char": 0.7608852982521057, "incorrect_loss_per_char": 0.6761282881100973, "correct_loss_per_token": 1.5217705965042114, "incorrect_loss_per_token": 1.3522565762201946, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5217705965042114, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.5217705965042114, "logits_per_char": -0.7608852982521057, "num_chars": 2}, {"sum_logits": -1.38364839553833, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.38364839553833, "logits_per_char": -0.691824197769165, "num_chars": 2}, {"sum_logits": -1.39031183719635, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.39031183719635, "logits_per_char": -0.695155918598175, "num_chars": 2}, {"sum_logits": -1.2828094959259033, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": true, "logits_per_token": -1.2828094959259033, "logits_per_char": -0.6414047479629517, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 15, "native_id": 15, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.378201961517334, "incorrect_loss_raw": 1.4031339883804321, "correct_loss_per_char": 0.689100980758667, "incorrect_loss_per_char": 0.7015669941902161, "correct_loss_per_token": 1.378201961517334, "incorrect_loss_per_token": 1.4031339883804321, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5684709548950195, "num_tokens": 1, "num_tokens_all": 456, "is_greedy": false, "logits_per_token": -1.5684709548950195, "logits_per_char": -0.7842354774475098, "num_chars": 2}, {"sum_logits": -1.2848790884017944, "num_tokens": 1, "num_tokens_all": 456, "is_greedy": true, "logits_per_token": -1.2848790884017944, "logits_per_char": -0.6424395442008972, "num_chars": 2}, {"sum_logits": -1.378201961517334, "num_tokens": 1, "num_tokens_all": 456, "is_greedy": false, "logits_per_token": -1.378201961517334, "logits_per_char": -0.689100980758667, "num_chars": 2}, {"sum_logits": -1.3560519218444824, "num_tokens": 1, "num_tokens_all": 456, "is_greedy": false, "logits_per_token": -1.3560519218444824, "logits_per_char": -0.6780259609222412, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 16, "native_id": 16, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.340018630027771, "incorrect_loss_raw": 1.424018661181132, "correct_loss_per_char": 0.6700093150138855, "incorrect_loss_per_char": 0.712009330590566, "correct_loss_per_token": 1.340018630027771, "incorrect_loss_per_token": 1.424018661181132, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6510766744613647, "num_tokens": 1, "num_tokens_all": 488, "is_greedy": false, "logits_per_token": -1.6510766744613647, "logits_per_char": -0.8255383372306824, "num_chars": 2}, {"sum_logits": -1.4149768352508545, "num_tokens": 1, "num_tokens_all": 488, "is_greedy": false, "logits_per_token": -1.4149768352508545, "logits_per_char": -0.7074884176254272, "num_chars": 2}, {"sum_logits": -1.340018630027771, "num_tokens": 1, "num_tokens_all": 488, "is_greedy": false, "logits_per_token": -1.340018630027771, "logits_per_char": -0.6700093150138855, "num_chars": 2}, {"sum_logits": -1.2060024738311768, "num_tokens": 1, "num_tokens_all": 488, "is_greedy": true, "logits_per_token": -1.2060024738311768, "logits_per_char": -0.6030012369155884, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 17, "native_id": 17, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5025138854980469, "incorrect_loss_raw": 1.381672700246175, "correct_loss_per_char": 0.7512569427490234, "incorrect_loss_per_char": 0.6908363501230875, "correct_loss_per_token": 1.5025138854980469, "incorrect_loss_per_token": 1.381672700246175, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6403348445892334, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.6403348445892334, "logits_per_char": -0.8201674222946167, "num_chars": 2}, {"sum_logits": -1.4261932373046875, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.4261932373046875, "logits_per_char": -0.7130966186523438, "num_chars": 2}, {"sum_logits": -1.5025138854980469, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.5025138854980469, "logits_per_char": -0.7512569427490234, "num_chars": 2}, {"sum_logits": -1.0784900188446045, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": true, "logits_per_token": -1.0784900188446045, "logits_per_char": -0.5392450094223022, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 18, "native_id": 18, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3774446249008179, "incorrect_loss_raw": 1.3966862360636394, "correct_loss_per_char": 0.6887223124504089, "incorrect_loss_per_char": 0.6983431180318197, "correct_loss_per_token": 1.3774446249008179, "incorrect_loss_per_token": 1.3966862360636394, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4479761123657227, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.4479761123657227, "logits_per_char": -0.7239880561828613, "num_chars": 2}, {"sum_logits": -1.3065640926361084, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": true, "logits_per_token": -1.3065640926361084, "logits_per_char": -0.6532820463180542, "num_chars": 2}, {"sum_logits": -1.3774446249008179, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.3774446249008179, "logits_per_char": -0.6887223124504089, "num_chars": 2}, {"sum_logits": -1.435518503189087, "num_tokens": 1, "num_tokens_all": 471, "is_greedy": false, "logits_per_token": -1.435518503189087, "logits_per_char": -0.7177592515945435, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 19, "native_id": 19, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4883350133895874, "incorrect_loss_raw": 1.3626591364542644, "correct_loss_per_char": 0.7441675066947937, "incorrect_loss_per_char": 0.6813295682271322, "correct_loss_per_token": 1.4883350133895874, "incorrect_loss_per_token": 1.3626591364542644, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4883350133895874, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": false, "logits_per_token": -1.4883350133895874, "logits_per_char": -0.7441675066947937, "num_chars": 2}, {"sum_logits": -1.293162226676941, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": true, "logits_per_token": -1.293162226676941, "logits_per_char": -0.6465811133384705, "num_chars": 2}, {"sum_logits": -1.3758958578109741, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": false, "logits_per_token": -1.3758958578109741, "logits_per_char": -0.6879479289054871, "num_chars": 2}, {"sum_logits": -1.418919324874878, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": false, "logits_per_token": -1.418919324874878, "logits_per_char": -0.709459662437439, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 20, "native_id": 20, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4179670810699463, "incorrect_loss_raw": 1.3928889433542888, "correct_loss_per_char": 0.7089835405349731, "incorrect_loss_per_char": 0.6964444716771444, "correct_loss_per_token": 1.4179670810699463, "incorrect_loss_per_token": 1.3928889433542888, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.576961636543274, "num_tokens": 1, "num_tokens_all": 501, "is_greedy": false, "logits_per_token": -1.576961636543274, "logits_per_char": -0.788480818271637, "num_chars": 2}, {"sum_logits": -1.2094652652740479, "num_tokens": 1, "num_tokens_all": 501, "is_greedy": true, "logits_per_token": -1.2094652652740479, "logits_per_char": -0.6047326326370239, "num_chars": 2}, {"sum_logits": -1.3922399282455444, "num_tokens": 1, "num_tokens_all": 501, "is_greedy": false, "logits_per_token": -1.3922399282455444, "logits_per_char": -0.6961199641227722, "num_chars": 2}, {"sum_logits": -1.4179670810699463, "num_tokens": 1, "num_tokens_all": 501, "is_greedy": false, "logits_per_token": -1.4179670810699463, "logits_per_char": -0.7089835405349731, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 21, "native_id": 21, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4130017757415771, "incorrect_loss_raw": 1.3836006323496501, "correct_loss_per_char": 0.7065008878707886, "incorrect_loss_per_char": 0.6918003161748251, "correct_loss_per_token": 1.4130017757415771, "incorrect_loss_per_token": 1.3836006323496501, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3974696397781372, "num_tokens": 1, "num_tokens_all": 447, "is_greedy": false, "logits_per_token": -1.3974696397781372, "logits_per_char": -0.6987348198890686, "num_chars": 2}, {"sum_logits": -1.4374659061431885, "num_tokens": 1, "num_tokens_all": 447, "is_greedy": false, "logits_per_token": -1.4374659061431885, "logits_per_char": -0.7187329530715942, "num_chars": 2}, {"sum_logits": -1.4130017757415771, "num_tokens": 1, "num_tokens_all": 447, "is_greedy": false, "logits_per_token": -1.4130017757415771, "logits_per_char": -0.7065008878707886, "num_chars": 2}, {"sum_logits": -1.3158663511276245, "num_tokens": 1, "num_tokens_all": 447, "is_greedy": true, "logits_per_token": -1.3158663511276245, "logits_per_char": -0.6579331755638123, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 22, "native_id": 22, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4824837446212769, "incorrect_loss_raw": 1.3661762873331706, "correct_loss_per_char": 0.7412418723106384, "incorrect_loss_per_char": 0.6830881436665853, "correct_loss_per_token": 1.4824837446212769, "incorrect_loss_per_token": 1.3661762873331706, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4851511716842651, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": false, "logits_per_token": -1.4851511716842651, "logits_per_char": -0.7425755858421326, "num_chars": 2}, {"sum_logits": -1.4824837446212769, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": false, "logits_per_token": -1.4824837446212769, "logits_per_char": -0.7412418723106384, "num_chars": 2}, {"sum_logits": -1.3600236177444458, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": false, "logits_per_token": -1.3600236177444458, "logits_per_char": -0.6800118088722229, "num_chars": 2}, {"sum_logits": -1.2533540725708008, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": true, "logits_per_token": -1.2533540725708008, "logits_per_char": -0.6266770362854004, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 23, "native_id": 23, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5779058933258057, "incorrect_loss_raw": 1.3413942257563274, "correct_loss_per_char": 0.7889529466629028, "incorrect_loss_per_char": 0.6706971128781637, "correct_loss_per_token": 1.5779058933258057, "incorrect_loss_per_token": 1.3413942257563274, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5779058933258057, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.5779058933258057, "logits_per_char": -0.7889529466629028, "num_chars": 2}, {"sum_logits": -1.4108346700668335, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.4108346700668335, "logits_per_char": -0.7054173350334167, "num_chars": 2}, {"sum_logits": -1.422274112701416, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.422274112701416, "logits_per_char": -0.711137056350708, "num_chars": 2}, {"sum_logits": -1.1910738945007324, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": true, "logits_per_token": -1.1910738945007324, "logits_per_char": -0.5955369472503662, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 24, "native_id": 24, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3650879859924316, "incorrect_loss_raw": 1.4018768469492595, "correct_loss_per_char": 0.6825439929962158, "incorrect_loss_per_char": 0.7009384234746298, "correct_loss_per_token": 1.3650879859924316, "incorrect_loss_per_token": 1.4018768469492595, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4398750066757202, "num_tokens": 1, "num_tokens_all": 434, "is_greedy": false, "logits_per_token": -1.4398750066757202, "logits_per_char": -0.7199375033378601, "num_chars": 2}, {"sum_logits": -1.3157280683517456, "num_tokens": 1, "num_tokens_all": 434, "is_greedy": true, "logits_per_token": -1.3157280683517456, "logits_per_char": -0.6578640341758728, "num_chars": 2}, {"sum_logits": -1.4500274658203125, "num_tokens": 1, "num_tokens_all": 434, "is_greedy": false, "logits_per_token": -1.4500274658203125, "logits_per_char": -0.7250137329101562, "num_chars": 2}, {"sum_logits": -1.3650879859924316, "num_tokens": 1, "num_tokens_all": 434, "is_greedy": false, "logits_per_token": -1.3650879859924316, "logits_per_char": -0.6825439929962158, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 25, "native_id": 25, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3811506032943726, "incorrect_loss_raw": 1.3956165711085002, "correct_loss_per_char": 0.6905753016471863, "incorrect_loss_per_char": 0.6978082855542501, "correct_loss_per_token": 1.3811506032943726, "incorrect_loss_per_token": 1.3956165711085002, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.480710506439209, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.480710506439209, "logits_per_char": -0.7403552532196045, "num_chars": 2}, {"sum_logits": -1.3184266090393066, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": true, "logits_per_token": -1.3184266090393066, "logits_per_char": -0.6592133045196533, "num_chars": 2}, {"sum_logits": -1.3877125978469849, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.3877125978469849, "logits_per_char": -0.6938562989234924, "num_chars": 2}, {"sum_logits": -1.3811506032943726, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.3811506032943726, "logits_per_char": -0.6905753016471863, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 26, "native_id": 26, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5122178792953491, "incorrect_loss_raw": 1.3558917840321858, "correct_loss_per_char": 0.7561089396476746, "incorrect_loss_per_char": 0.6779458920160929, "correct_loss_per_token": 1.5122178792953491, "incorrect_loss_per_token": 1.3558917840321858, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5122178792953491, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": false, "logits_per_token": -1.5122178792953491, "logits_per_char": -0.7561089396476746, "num_chars": 2}, {"sum_logits": -1.3435158729553223, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": false, "logits_per_token": -1.3435158729553223, "logits_per_char": -0.6717579364776611, "num_chars": 2}, {"sum_logits": -1.4623253345489502, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": false, "logits_per_token": -1.4623253345489502, "logits_per_char": -0.7311626672744751, "num_chars": 2}, {"sum_logits": -1.2618341445922852, "num_tokens": 1, "num_tokens_all": 581, "is_greedy": true, "logits_per_token": -1.2618341445922852, "logits_per_char": -0.6309170722961426, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 27, "native_id": 27, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4622215032577515, "incorrect_loss_raw": 1.371818979581197, "correct_loss_per_char": 0.7311107516288757, "incorrect_loss_per_char": 0.6859094897905985, "correct_loss_per_token": 1.4622215032577515, "incorrect_loss_per_token": 1.371818979581197, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3537983894348145, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.3537983894348145, "logits_per_char": -0.6768991947174072, "num_chars": 2}, {"sum_logits": -1.4144656658172607, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.4144656658172607, "logits_per_char": -0.7072328329086304, "num_chars": 2}, {"sum_logits": -1.4622215032577515, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.4622215032577515, "logits_per_char": -0.7311107516288757, "num_chars": 2}, {"sum_logits": -1.3471928834915161, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": true, "logits_per_token": -1.3471928834915161, "logits_per_char": -0.6735964417457581, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 28, "native_id": 28, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2688506841659546, "incorrect_loss_raw": 1.4412580728530884, "correct_loss_per_char": 0.6344253420829773, "incorrect_loss_per_char": 0.7206290364265442, "correct_loss_per_token": 1.2688506841659546, "incorrect_loss_per_token": 1.4412580728530884, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2688506841659546, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": true, "logits_per_token": -1.2688506841659546, "logits_per_char": -0.6344253420829773, "num_chars": 2}, {"sum_logits": -1.3575676679611206, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": false, "logits_per_token": -1.3575676679611206, "logits_per_char": -0.6787838339805603, "num_chars": 2}, {"sum_logits": -1.5443259477615356, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": false, "logits_per_token": -1.5443259477615356, "logits_per_char": -0.7721629738807678, "num_chars": 2}, {"sum_logits": -1.4218806028366089, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": false, "logits_per_token": -1.4218806028366089, "logits_per_char": -0.7109403014183044, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 29, "native_id": 29, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4320406913757324, "incorrect_loss_raw": 1.3795276880264282, "correct_loss_per_char": 0.7160203456878662, "incorrect_loss_per_char": 0.6897638440132141, "correct_loss_per_token": 1.4320406913757324, "incorrect_loss_per_token": 1.3795276880264282, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4525747299194336, "num_tokens": 1, "num_tokens_all": 449, "is_greedy": false, "logits_per_token": -1.4525747299194336, "logits_per_char": -0.7262873649597168, "num_chars": 2}, {"sum_logits": -1.3358830213546753, "num_tokens": 1, "num_tokens_all": 449, "is_greedy": true, "logits_per_token": -1.3358830213546753, "logits_per_char": -0.6679415106773376, "num_chars": 2}, {"sum_logits": -1.4320406913757324, "num_tokens": 1, "num_tokens_all": 449, "is_greedy": false, "logits_per_token": -1.4320406913757324, "logits_per_char": -0.7160203456878662, "num_chars": 2}, {"sum_logits": -1.3501253128051758, "num_tokens": 1, "num_tokens_all": 449, "is_greedy": false, "logits_per_token": -1.3501253128051758, "logits_per_char": -0.6750626564025879, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 30, "native_id": 30, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0720086097717285, "incorrect_loss_raw": 1.5509158770243328, "correct_loss_per_char": 0.5360043048858643, "incorrect_loss_per_char": 0.7754579385121664, "correct_loss_per_token": 1.0720086097717285, "incorrect_loss_per_token": 1.5509158770243328, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0720086097717285, "num_tokens": 1, "num_tokens_all": 433, "is_greedy": true, "logits_per_token": -1.0720086097717285, "logits_per_char": -0.5360043048858643, "num_chars": 2}, {"sum_logits": -1.31266450881958, "num_tokens": 1, "num_tokens_all": 433, "is_greedy": false, "logits_per_token": -1.31266450881958, "logits_per_char": -0.65633225440979, "num_chars": 2}, {"sum_logits": -1.6186373233795166, "num_tokens": 1, "num_tokens_all": 433, "is_greedy": false, "logits_per_token": -1.6186373233795166, "logits_per_char": -0.8093186616897583, "num_chars": 2}, {"sum_logits": -1.7214457988739014, "num_tokens": 1, "num_tokens_all": 433, "is_greedy": false, "logits_per_token": -1.7214457988739014, "logits_per_char": -0.8607228994369507, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 31, "native_id": 31, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.353499412536621, "incorrect_loss_raw": 1.4047553936640422, "correct_loss_per_char": 0.6767497062683105, "incorrect_loss_per_char": 0.7023776968320211, "correct_loss_per_token": 1.353499412536621, "incorrect_loss_per_token": 1.4047553936640422, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4531886577606201, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.4531886577606201, "logits_per_char": -0.7265943288803101, "num_chars": 2}, {"sum_logits": -1.353499412536621, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": true, "logits_per_token": -1.353499412536621, "logits_per_char": -0.6767497062683105, "num_chars": 2}, {"sum_logits": -1.360602855682373, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.360602855682373, "logits_per_char": -0.6803014278411865, "num_chars": 2}, {"sum_logits": -1.4004746675491333, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.4004746675491333, "logits_per_char": -0.7002373337745667, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 32, "native_id": 32, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4139173030853271, "incorrect_loss_raw": 1.3991941213607788, "correct_loss_per_char": 0.7069586515426636, "incorrect_loss_per_char": 0.6995970606803894, "correct_loss_per_token": 1.4139173030853271, "incorrect_loss_per_token": 1.3991941213607788, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6616747379302979, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.6616747379302979, "logits_per_char": -0.8308373689651489, "num_chars": 2}, {"sum_logits": -1.4139173030853271, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.4139173030853271, "logits_per_char": -0.7069586515426636, "num_chars": 2}, {"sum_logits": -1.297236680984497, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": false, "logits_per_token": -1.297236680984497, "logits_per_char": -0.6486183404922485, "num_chars": 2}, {"sum_logits": -1.2386709451675415, "num_tokens": 1, "num_tokens_all": 493, "is_greedy": true, "logits_per_token": -1.2386709451675415, "logits_per_char": -0.6193354725837708, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 33, "native_id": 33, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7322371006011963, "incorrect_loss_raw": 1.3561240037282307, "correct_loss_per_char": 0.8661185503005981, "incorrect_loss_per_char": 0.6780620018641154, "correct_loss_per_token": 1.7322371006011963, "incorrect_loss_per_token": 1.3561240037282307, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.000523328781128, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": true, "logits_per_token": -1.000523328781128, "logits_per_char": -0.500261664390564, "num_chars": 2}, {"sum_logits": -1.2928056716918945, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.2928056716918945, "logits_per_char": -0.6464028358459473, "num_chars": 2}, {"sum_logits": -1.77504301071167, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.77504301071167, "logits_per_char": -0.887521505355835, "num_chars": 2}, {"sum_logits": -1.7322371006011963, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.7322371006011963, "logits_per_char": -0.8661185503005981, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 34, "native_id": 34, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.338106632232666, "incorrect_loss_raw": 1.4162629048029582, "correct_loss_per_char": 0.669053316116333, "incorrect_loss_per_char": 0.7081314524014791, "correct_loss_per_token": 1.338106632232666, "incorrect_loss_per_token": 1.4162629048029582, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.584673523902893, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": false, "logits_per_token": -1.584673523902893, "logits_per_char": -0.7923367619514465, "num_chars": 2}, {"sum_logits": -1.3923548460006714, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": false, "logits_per_token": -1.3923548460006714, "logits_per_char": -0.6961774230003357, "num_chars": 2}, {"sum_logits": -1.27176034450531, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": true, "logits_per_token": -1.27176034450531, "logits_per_char": -0.635880172252655, "num_chars": 2}, {"sum_logits": -1.338106632232666, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": false, "logits_per_token": -1.338106632232666, "logits_per_char": -0.669053316116333, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 35, "native_id": 35, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4474549293518066, "incorrect_loss_raw": 1.3729535341262817, "correct_loss_per_char": 0.7237274646759033, "incorrect_loss_per_char": 0.6864767670631409, "correct_loss_per_token": 1.4474549293518066, "incorrect_loss_per_token": 1.3729535341262817, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4474549293518066, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.4474549293518066, "logits_per_char": -0.7237274646759033, "num_chars": 2}, {"sum_logits": -1.3792529106140137, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.3792529106140137, "logits_per_char": -0.6896264553070068, "num_chars": 2}, {"sum_logits": -1.3745825290679932, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.3745825290679932, "logits_per_char": -0.6872912645339966, "num_chars": 2}, {"sum_logits": -1.3650251626968384, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": true, "logits_per_token": -1.3650251626968384, "logits_per_char": -0.6825125813484192, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 36, "native_id": 36, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2818913459777832, "incorrect_loss_raw": 1.4465792973836262, "correct_loss_per_char": 0.6409456729888916, "incorrect_loss_per_char": 0.7232896486918131, "correct_loss_per_token": 1.2818913459777832, "incorrect_loss_per_token": 1.4465792973836262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2473417520523071, "num_tokens": 1, "num_tokens_all": 496, "is_greedy": true, "logits_per_token": -1.2473417520523071, "logits_per_char": -0.6236708760261536, "num_chars": 2}, {"sum_logits": -1.2818913459777832, "num_tokens": 1, "num_tokens_all": 496, "is_greedy": false, "logits_per_token": -1.2818913459777832, "logits_per_char": -0.6409456729888916, "num_chars": 2}, {"sum_logits": -1.4760044813156128, "num_tokens": 1, "num_tokens_all": 496, "is_greedy": false, "logits_per_token": -1.4760044813156128, "logits_per_char": -0.7380022406578064, "num_chars": 2}, {"sum_logits": -1.616391658782959, "num_tokens": 1, "num_tokens_all": 496, "is_greedy": false, "logits_per_token": -1.616391658782959, "logits_per_char": -0.8081958293914795, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 37, "native_id": 37, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.562151551246643, "incorrect_loss_raw": 1.3401514291763306, "correct_loss_per_char": 0.7810757756233215, "incorrect_loss_per_char": 0.6700757145881653, "correct_loss_per_token": 1.562151551246643, "incorrect_loss_per_token": 1.3401514291763306, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.562151551246643, "num_tokens": 1, "num_tokens_all": 510, "is_greedy": false, "logits_per_token": -1.562151551246643, "logits_per_char": -0.7810757756233215, "num_chars": 2}, {"sum_logits": -1.3718101978302002, "num_tokens": 1, "num_tokens_all": 510, "is_greedy": false, "logits_per_token": -1.3718101978302002, "logits_per_char": -0.6859050989151001, "num_chars": 2}, {"sum_logits": -1.3787862062454224, "num_tokens": 1, "num_tokens_all": 510, "is_greedy": false, "logits_per_token": -1.3787862062454224, "logits_per_char": -0.6893931031227112, "num_chars": 2}, {"sum_logits": -1.2698578834533691, "num_tokens": 1, "num_tokens_all": 510, "is_greedy": true, "logits_per_token": -1.2698578834533691, "logits_per_char": -0.6349289417266846, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 38, "native_id": 38, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4114179611206055, "incorrect_loss_raw": 1.385738492012024, "correct_loss_per_char": 0.7057089805603027, "incorrect_loss_per_char": 0.692869246006012, "correct_loss_per_token": 1.4114179611206055, "incorrect_loss_per_token": 1.385738492012024, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4101338386535645, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.4101338386535645, "logits_per_char": -0.7050669193267822, "num_chars": 2}, {"sum_logits": -1.3166383504867554, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": true, "logits_per_token": -1.3166383504867554, "logits_per_char": -0.6583191752433777, "num_chars": 2}, {"sum_logits": -1.4114179611206055, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.4114179611206055, "logits_per_char": -0.7057089805603027, "num_chars": 2}, {"sum_logits": -1.430443286895752, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.430443286895752, "logits_per_char": -0.715221643447876, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 39, "native_id": 39, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4786595106124878, "incorrect_loss_raw": 1.3651208480199177, "correct_loss_per_char": 0.7393297553062439, "incorrect_loss_per_char": 0.6825604240099589, "correct_loss_per_token": 1.4786595106124878, "incorrect_loss_per_token": 1.3651208480199177, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4786595106124878, "num_tokens": 1, "num_tokens_all": 495, "is_greedy": false, "logits_per_token": -1.4786595106124878, "logits_per_char": -0.7393297553062439, "num_chars": 2}, {"sum_logits": -1.3686891794204712, "num_tokens": 1, "num_tokens_all": 495, "is_greedy": false, "logits_per_token": -1.3686891794204712, "logits_per_char": -0.6843445897102356, "num_chars": 2}, {"sum_logits": -1.3689364194869995, "num_tokens": 1, "num_tokens_all": 495, "is_greedy": false, "logits_per_token": -1.3689364194869995, "logits_per_char": -0.6844682097434998, "num_chars": 2}, {"sum_logits": -1.3577369451522827, "num_tokens": 1, "num_tokens_all": 495, "is_greedy": true, "logits_per_token": -1.3577369451522827, "logits_per_char": -0.6788684725761414, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 40, "native_id": 40, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4078238010406494, "incorrect_loss_raw": 1.3900291522343953, "correct_loss_per_char": 0.7039119005203247, "incorrect_loss_per_char": 0.6950145761171976, "correct_loss_per_token": 1.4078238010406494, "incorrect_loss_per_token": 1.3900291522343953, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5207444429397583, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.5207444429397583, "logits_per_char": -0.7603722214698792, "num_chars": 2}, {"sum_logits": -1.2920352220535278, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": true, "logits_per_token": -1.2920352220535278, "logits_per_char": -0.6460176110267639, "num_chars": 2}, {"sum_logits": -1.3573077917099, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.3573077917099, "logits_per_char": -0.67865389585495, "num_chars": 2}, {"sum_logits": -1.4078238010406494, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.4078238010406494, "logits_per_char": -0.7039119005203247, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 41, "native_id": 41, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2497899532318115, "incorrect_loss_raw": 1.7395604054133098, "correct_loss_per_char": 0.6248949766159058, "incorrect_loss_per_char": 0.8697802027066549, "correct_loss_per_token": 1.2497899532318115, "incorrect_loss_per_token": 1.7395604054133098, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9085475206375122, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": true, "logits_per_token": -0.9085475206375122, "logits_per_char": -0.4542737603187561, "num_chars": 2}, {"sum_logits": -1.2497899532318115, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.2497899532318115, "logits_per_char": -0.6248949766159058, "num_chars": 2}, {"sum_logits": -1.936913251876831, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.936913251876831, "logits_per_char": -0.9684566259384155, "num_chars": 2}, {"sum_logits": -2.373220443725586, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -2.373220443725586, "logits_per_char": -1.186610221862793, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 42, "native_id": 42, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3775854110717773, "incorrect_loss_raw": 1.4094378153483074, "correct_loss_per_char": 0.6887927055358887, "incorrect_loss_per_char": 0.7047189076741537, "correct_loss_per_token": 1.3775854110717773, "incorrect_loss_per_token": 1.4094378153483074, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.657633900642395, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": false, "logits_per_token": -1.657633900642395, "logits_per_char": -0.8288169503211975, "num_chars": 2}, {"sum_logits": -1.2967997789382935, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": false, "logits_per_token": -1.2967997789382935, "logits_per_char": -0.6483998894691467, "num_chars": 2}, {"sum_logits": -1.3775854110717773, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": false, "logits_per_token": -1.3775854110717773, "logits_per_char": -0.6887927055358887, "num_chars": 2}, {"sum_logits": -1.2738797664642334, "num_tokens": 1, "num_tokens_all": 478, "is_greedy": true, "logits_per_token": -1.2738797664642334, "logits_per_char": -0.6369398832321167, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 43, "native_id": 43, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5724518299102783, "incorrect_loss_raw": 1.3382672866185505, "correct_loss_per_char": 0.7862259149551392, "incorrect_loss_per_char": 0.6691336433092753, "correct_loss_per_token": 1.5724518299102783, "incorrect_loss_per_token": 1.3382672866185505, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5724518299102783, "num_tokens": 1, "num_tokens_all": 435, "is_greedy": false, "logits_per_token": -1.5724518299102783, "logits_per_char": -0.7862259149551392, "num_chars": 2}, {"sum_logits": -1.3572067022323608, "num_tokens": 1, "num_tokens_all": 435, "is_greedy": false, "logits_per_token": -1.3572067022323608, "logits_per_char": -0.6786033511161804, "num_chars": 2}, {"sum_logits": -1.2957170009613037, "num_tokens": 1, "num_tokens_all": 435, "is_greedy": true, "logits_per_token": -1.2957170009613037, "logits_per_char": -0.6478585004806519, "num_chars": 2}, {"sum_logits": -1.3618781566619873, "num_tokens": 1, "num_tokens_all": 435, "is_greedy": false, "logits_per_token": -1.3618781566619873, "logits_per_char": -0.6809390783309937, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 44, "native_id": 44, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4351638555526733, "incorrect_loss_raw": 1.3898677428563435, "correct_loss_per_char": 0.7175819277763367, "incorrect_loss_per_char": 0.6949338714281718, "correct_loss_per_token": 1.4351638555526733, "incorrect_loss_per_token": 1.3898677428563435, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2436977624893188, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": true, "logits_per_token": -1.2436977624893188, "logits_per_char": -0.6218488812446594, "num_chars": 2}, {"sum_logits": -1.3899732828140259, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.3899732828140259, "logits_per_char": -0.6949866414070129, "num_chars": 2}, {"sum_logits": -1.535932183265686, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.535932183265686, "logits_per_char": -0.767966091632843, "num_chars": 2}, {"sum_logits": -1.4351638555526733, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.4351638555526733, "logits_per_char": -0.7175819277763367, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 45, "native_id": 45, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3411678075790405, "incorrect_loss_raw": 1.4182178179423015, "correct_loss_per_char": 0.6705839037895203, "incorrect_loss_per_char": 0.7091089089711508, "correct_loss_per_token": 1.3411678075790405, "incorrect_loss_per_token": 1.4182178179423015, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5474454164505005, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": false, "logits_per_token": -1.5474454164505005, "logits_per_char": -0.7737227082252502, "num_chars": 2}, {"sum_logits": -1.4387041330337524, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": false, "logits_per_token": -1.4387041330337524, "logits_per_char": -0.7193520665168762, "num_chars": 2}, {"sum_logits": -1.2685039043426514, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": true, "logits_per_token": -1.2685039043426514, "logits_per_char": -0.6342519521713257, "num_chars": 2}, {"sum_logits": -1.3411678075790405, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": false, "logits_per_token": -1.3411678075790405, "logits_per_char": -0.6705839037895203, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 46, "native_id": 46, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.318442702293396, "incorrect_loss_raw": 1.4294074773788452, "correct_loss_per_char": 0.659221351146698, "incorrect_loss_per_char": 0.7147037386894226, "correct_loss_per_token": 1.318442702293396, "incorrect_loss_per_token": 1.4294074773788452, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6773616075515747, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": false, "logits_per_token": -1.6773616075515747, "logits_per_char": -0.8386808037757874, "num_chars": 2}, {"sum_logits": -1.318442702293396, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": false, "logits_per_token": -1.318442702293396, "logits_per_char": -0.659221351146698, "num_chars": 2}, {"sum_logits": -1.288464069366455, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": true, "logits_per_token": -1.288464069366455, "logits_per_char": -0.6442320346832275, "num_chars": 2}, {"sum_logits": -1.3223967552185059, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": false, "logits_per_token": -1.3223967552185059, "logits_per_char": -0.6611983776092529, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 47, "native_id": 47, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2349367141723633, "incorrect_loss_raw": 1.488577405611674, "correct_loss_per_char": 0.6174683570861816, "incorrect_loss_per_char": 0.744288702805837, "correct_loss_per_token": 1.2349367141723633, "incorrect_loss_per_token": 1.488577405611674, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.8377246856689453, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.8377246856689453, "logits_per_char": -0.9188623428344727, "num_chars": 2}, {"sum_logits": -1.4960342645645142, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.4960342645645142, "logits_per_char": -0.7480171322822571, "num_chars": 2}, {"sum_logits": -1.2349367141723633, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.2349367141723633, "logits_per_char": -0.6174683570861816, "num_chars": 2}, {"sum_logits": -1.1319732666015625, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": true, "logits_per_token": -1.1319732666015625, "logits_per_char": -0.5659866333007812, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 48, "native_id": 48, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3727571964263916, "incorrect_loss_raw": 1.4013217687606812, "correct_loss_per_char": 0.6863785982131958, "incorrect_loss_per_char": 0.7006608843803406, "correct_loss_per_token": 1.3727571964263916, "incorrect_loss_per_token": 1.4013217687606812, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4808672666549683, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.4808672666549683, "logits_per_char": -0.7404336333274841, "num_chars": 2}, {"sum_logits": -1.3727571964263916, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.3727571964263916, "logits_per_char": -0.6863785982131958, "num_chars": 2}, {"sum_logits": -1.4592360258102417, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.4592360258102417, "logits_per_char": -0.7296180129051208, "num_chars": 2}, {"sum_logits": -1.2638620138168335, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": true, "logits_per_token": -1.2638620138168335, "logits_per_char": -0.6319310069084167, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 49, "native_id": 49, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6317756175994873, "incorrect_loss_raw": 1.3223188718159993, "correct_loss_per_char": 0.8158878087997437, "incorrect_loss_per_char": 0.6611594359079996, "correct_loss_per_token": 1.6317756175994873, "incorrect_loss_per_token": 1.3223188718159993, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6317756175994873, "num_tokens": 1, "num_tokens_all": 474, "is_greedy": false, "logits_per_token": -1.6317756175994873, "logits_per_char": -0.8158878087997437, "num_chars": 2}, {"sum_logits": -1.349320888519287, "num_tokens": 1, "num_tokens_all": 474, "is_greedy": false, "logits_per_token": -1.349320888519287, "logits_per_char": -0.6746604442596436, "num_chars": 2}, {"sum_logits": -1.3386549949645996, "num_tokens": 1, "num_tokens_all": 474, "is_greedy": false, "logits_per_token": -1.3386549949645996, "logits_per_char": -0.6693274974822998, "num_chars": 2}, {"sum_logits": -1.2789807319641113, "num_tokens": 1, "num_tokens_all": 474, "is_greedy": true, "logits_per_token": -1.2789807319641113, "logits_per_char": -0.6394903659820557, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 50, "native_id": 50, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3729469776153564, "incorrect_loss_raw": 1.401975194613139, "correct_loss_per_char": 0.6864734888076782, "incorrect_loss_per_char": 0.7009875973065695, "correct_loss_per_token": 1.3729469776153564, "incorrect_loss_per_token": 1.401975194613139, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3729469776153564, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.3729469776153564, "logits_per_char": -0.6864734888076782, "num_chars": 2}, {"sum_logits": -1.2546943426132202, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": true, "logits_per_token": -1.2546943426132202, "logits_per_char": -0.6273471713066101, "num_chars": 2}, {"sum_logits": -1.443520188331604, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.443520188331604, "logits_per_char": -0.721760094165802, "num_chars": 2}, {"sum_logits": -1.5077110528945923, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.5077110528945923, "logits_per_char": -0.7538555264472961, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 51, "native_id": 51, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6595209836959839, "incorrect_loss_raw": 1.347482403119405, "correct_loss_per_char": 0.8297604918479919, "incorrect_loss_per_char": 0.6737412015597025, "correct_loss_per_token": 1.6595209836959839, "incorrect_loss_per_token": 1.347482403119405, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.096543788909912, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": true, "logits_per_token": -1.096543788909912, "logits_per_char": -0.548271894454956, "num_chars": 2}, {"sum_logits": -1.3223634958267212, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": false, "logits_per_token": -1.3223634958267212, "logits_per_char": -0.6611817479133606, "num_chars": 2}, {"sum_logits": -1.6595209836959839, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": false, "logits_per_token": -1.6595209836959839, "logits_per_char": -0.8297604918479919, "num_chars": 2}, {"sum_logits": -1.623539924621582, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": false, "logits_per_token": -1.623539924621582, "logits_per_char": -0.811769962310791, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 52, "native_id": 52, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6208701133728027, "incorrect_loss_raw": 1.3948312203089397, "correct_loss_per_char": 0.8104350566864014, "incorrect_loss_per_char": 0.6974156101544698, "correct_loss_per_token": 1.6208701133728027, "incorrect_loss_per_token": 1.3948312203089397, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.043548345565796, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": true, "logits_per_token": -1.043548345565796, "logits_per_char": -0.521774172782898, "num_chars": 2}, {"sum_logits": -1.2826677560806274, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.2826677560806274, "logits_per_char": -0.6413338780403137, "num_chars": 2}, {"sum_logits": -1.6208701133728027, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.6208701133728027, "logits_per_char": -0.8104350566864014, "num_chars": 2}, {"sum_logits": -1.8582775592803955, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.8582775592803955, "logits_per_char": -0.9291387796401978, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 53, "native_id": 53, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3282402753829956, "incorrect_loss_raw": 1.4161770343780518, "correct_loss_per_char": 0.6641201376914978, "incorrect_loss_per_char": 0.7080885171890259, "correct_loss_per_token": 1.3282402753829956, "incorrect_loss_per_token": 1.4161770343780518, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3282402753829956, "num_tokens": 1, "num_tokens_all": 548, "is_greedy": true, "logits_per_token": -1.3282402753829956, "logits_per_char": -0.6641201376914978, "num_chars": 2}, {"sum_logits": -1.3353772163391113, "num_tokens": 1, "num_tokens_all": 548, "is_greedy": false, "logits_per_token": -1.3353772163391113, "logits_per_char": -0.6676886081695557, "num_chars": 2}, {"sum_logits": -1.4143823385238647, "num_tokens": 1, "num_tokens_all": 548, "is_greedy": false, "logits_per_token": -1.4143823385238647, "logits_per_char": -0.7071911692619324, "num_chars": 2}, {"sum_logits": -1.4987715482711792, "num_tokens": 1, "num_tokens_all": 548, "is_greedy": false, "logits_per_token": -1.4987715482711792, "logits_per_char": -0.7493857741355896, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 54, "native_id": 54, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4524797201156616, "incorrect_loss_raw": 1.3791484435399373, "correct_loss_per_char": 0.7262398600578308, "incorrect_loss_per_char": 0.6895742217699686, "correct_loss_per_token": 1.4524797201156616, "incorrect_loss_per_token": 1.3791484435399373, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2918131351470947, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": true, "logits_per_token": -1.2918131351470947, "logits_per_char": -0.6459065675735474, "num_chars": 2}, {"sum_logits": -1.320286512374878, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.320286512374878, "logits_per_char": -0.660143256187439, "num_chars": 2}, {"sum_logits": -1.5253456830978394, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.5253456830978394, "logits_per_char": -0.7626728415489197, "num_chars": 2}, {"sum_logits": -1.4524797201156616, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.4524797201156616, "logits_per_char": -0.7262398600578308, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 55, "native_id": 55, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3373756408691406, "incorrect_loss_raw": 1.419150710105896, "correct_loss_per_char": 0.6686878204345703, "incorrect_loss_per_char": 0.709575355052948, "correct_loss_per_token": 1.3373756408691406, "incorrect_loss_per_token": 1.419150710105896, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.608307957649231, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": false, "logits_per_token": -1.608307957649231, "logits_per_char": -0.8041539788246155, "num_chars": 2}, {"sum_logits": -1.3373756408691406, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": false, "logits_per_token": -1.3373756408691406, "logits_per_char": -0.6686878204345703, "num_chars": 2}, {"sum_logits": -1.406224250793457, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": false, "logits_per_token": -1.406224250793457, "logits_per_char": -0.7031121253967285, "num_chars": 2}, {"sum_logits": -1.242919921875, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": true, "logits_per_token": -1.242919921875, "logits_per_char": -0.6214599609375, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 56, "native_id": 56, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3144763708114624, "incorrect_loss_raw": 1.4271037975947063, "correct_loss_per_char": 0.6572381854057312, "incorrect_loss_per_char": 0.7135518987973531, "correct_loss_per_token": 1.3144763708114624, "incorrect_loss_per_token": 1.4271037975947063, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.618600845336914, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": false, "logits_per_token": -1.618600845336914, "logits_per_char": -0.809300422668457, "num_chars": 2}, {"sum_logits": -1.3904762268066406, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": false, "logits_per_token": -1.3904762268066406, "logits_per_char": -0.6952381134033203, "num_chars": 2}, {"sum_logits": -1.3144763708114624, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": false, "logits_per_token": -1.3144763708114624, "logits_per_char": -0.6572381854057312, "num_chars": 2}, {"sum_logits": -1.272234320640564, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": true, "logits_per_token": -1.272234320640564, "logits_per_char": -0.636117160320282, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 57, "native_id": 57, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3958525657653809, "incorrect_loss_raw": 1.3961841662724812, "correct_loss_per_char": 0.6979262828826904, "incorrect_loss_per_char": 0.6980920831362406, "correct_loss_per_token": 1.3958525657653809, "incorrect_loss_per_token": 1.3961841662724812, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4580492973327637, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.4580492973327637, "logits_per_char": -0.7290246486663818, "num_chars": 2}, {"sum_logits": -1.236407995223999, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": true, "logits_per_token": -1.236407995223999, "logits_per_char": -0.6182039976119995, "num_chars": 2}, {"sum_logits": -1.3958525657653809, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.3958525657653809, "logits_per_char": -0.6979262828826904, "num_chars": 2}, {"sum_logits": -1.4940952062606812, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.4940952062606812, "logits_per_char": -0.7470476031303406, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 58, "native_id": 58, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2643526792526245, "incorrect_loss_raw": 1.4413721164067586, "correct_loss_per_char": 0.6321763396263123, "incorrect_loss_per_char": 0.7206860582033793, "correct_loss_per_token": 1.2643526792526245, "incorrect_loss_per_token": 1.4413721164067586, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4904086589813232, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": false, "logits_per_token": -1.4904086589813232, "logits_per_char": -0.7452043294906616, "num_chars": 2}, {"sum_logits": -1.2643526792526245, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": true, "logits_per_token": -1.2643526792526245, "logits_per_char": -0.6321763396263123, "num_chars": 2}, {"sum_logits": -1.487518310546875, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": false, "logits_per_token": -1.487518310546875, "logits_per_char": -0.7437591552734375, "num_chars": 2}, {"sum_logits": -1.3461893796920776, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": false, "logits_per_token": -1.3461893796920776, "logits_per_char": -0.6730946898460388, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 59, "native_id": 59, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1969914436340332, "incorrect_loss_raw": 1.4672639767328899, "correct_loss_per_char": 0.5984957218170166, "incorrect_loss_per_char": 0.7336319883664449, "correct_loss_per_token": 1.1969914436340332, "incorrect_loss_per_token": 1.4672639767328899, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.533388376235962, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.533388376235962, "logits_per_char": -0.766694188117981, "num_chars": 2}, {"sum_logits": -1.4066050052642822, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.4066050052642822, "logits_per_char": -0.7033025026321411, "num_chars": 2}, {"sum_logits": -1.4617985486984253, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.4617985486984253, "logits_per_char": -0.7308992743492126, "num_chars": 2}, {"sum_logits": -1.1969914436340332, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": true, "logits_per_token": -1.1969914436340332, "logits_per_char": -0.5984957218170166, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 60, "native_id": 60, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3460484743118286, "incorrect_loss_raw": 1.4100052913029988, "correct_loss_per_char": 0.6730242371559143, "incorrect_loss_per_char": 0.7050026456514994, "correct_loss_per_token": 1.3460484743118286, "incorrect_loss_per_token": 1.4100052913029988, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4412922859191895, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.4412922859191895, "logits_per_char": -0.7206461429595947, "num_chars": 2}, {"sum_logits": -1.3460484743118286, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": true, "logits_per_token": -1.3460484743118286, "logits_per_char": -0.6730242371559143, "num_chars": 2}, {"sum_logits": -1.4316959381103516, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.4316959381103516, "logits_per_char": -0.7158479690551758, "num_chars": 2}, {"sum_logits": -1.3570276498794556, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.3570276498794556, "logits_per_char": -0.6785138249397278, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 61, "native_id": 61, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4825525283813477, "incorrect_loss_raw": 1.3820910056432087, "correct_loss_per_char": 0.7412762641906738, "incorrect_loss_per_char": 0.6910455028216044, "correct_loss_per_token": 1.4825525283813477, "incorrect_loss_per_token": 1.3820910056432087, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1639642715454102, "num_tokens": 1, "num_tokens_all": 490, "is_greedy": true, "logits_per_token": -1.1639642715454102, "logits_per_char": -0.5819821357727051, "num_chars": 2}, {"sum_logits": -1.3945215940475464, "num_tokens": 1, "num_tokens_all": 490, "is_greedy": false, "logits_per_token": -1.3945215940475464, "logits_per_char": -0.6972607970237732, "num_chars": 2}, {"sum_logits": -1.58778715133667, "num_tokens": 1, "num_tokens_all": 490, "is_greedy": false, "logits_per_token": -1.58778715133667, "logits_per_char": -0.793893575668335, "num_chars": 2}, {"sum_logits": -1.4825525283813477, "num_tokens": 1, "num_tokens_all": 490, "is_greedy": false, "logits_per_token": -1.4825525283813477, "logits_per_char": -0.7412762641906738, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 62, "native_id": 62, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3605401515960693, "incorrect_loss_raw": 1.4066891272862752, "correct_loss_per_char": 0.6802700757980347, "incorrect_loss_per_char": 0.7033445636431376, "correct_loss_per_token": 1.3605401515960693, "incorrect_loss_per_token": 1.4066891272862752, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3605401515960693, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": true, "logits_per_token": -1.3605401515960693, "logits_per_char": -0.6802700757980347, "num_chars": 2}, {"sum_logits": -1.4019931554794312, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.4019931554794312, "logits_per_char": -0.7009965777397156, "num_chars": 2}, {"sum_logits": -1.4005937576293945, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.4005937576293945, "logits_per_char": -0.7002968788146973, "num_chars": 2}, {"sum_logits": -1.41748046875, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.41748046875, "logits_per_char": -0.708740234375, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 63, "native_id": 63, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4315381050109863, "incorrect_loss_raw": 1.3826684554417927, "correct_loss_per_char": 0.7157690525054932, "incorrect_loss_per_char": 0.6913342277208964, "correct_loss_per_token": 1.4315381050109863, "incorrect_loss_per_token": 1.3826684554417927, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4261085987091064, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": false, "logits_per_token": -1.4261085987091064, "logits_per_char": -0.7130542993545532, "num_chars": 2}, {"sum_logits": -1.4286096096038818, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": false, "logits_per_token": -1.4286096096038818, "logits_per_char": -0.7143048048019409, "num_chars": 2}, {"sum_logits": -1.4315381050109863, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": false, "logits_per_token": -1.4315381050109863, "logits_per_char": -0.7157690525054932, "num_chars": 2}, {"sum_logits": -1.2932871580123901, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": true, "logits_per_token": -1.2932871580123901, "logits_per_char": -0.6466435790061951, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 64, "native_id": 64, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2584445476531982, "incorrect_loss_raw": 1.4435538053512573, "correct_loss_per_char": 0.6292222738265991, "incorrect_loss_per_char": 0.7217769026756287, "correct_loss_per_token": 1.2584445476531982, "incorrect_loss_per_token": 1.4435538053512573, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5955301523208618, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.5955301523208618, "logits_per_char": -0.7977650761604309, "num_chars": 2}, {"sum_logits": -1.3733503818511963, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.3733503818511963, "logits_per_char": -0.6866751909255981, "num_chars": 2}, {"sum_logits": -1.3617808818817139, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.3617808818817139, "logits_per_char": -0.6808904409408569, "num_chars": 2}, {"sum_logits": -1.2584445476531982, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": true, "logits_per_token": -1.2584445476531982, "logits_per_char": -0.6292222738265991, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 65, "native_id": 65, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4068732261657715, "incorrect_loss_raw": 1.389906922976176, "correct_loss_per_char": 0.7034366130828857, "incorrect_loss_per_char": 0.694953461488088, "correct_loss_per_token": 1.4068732261657715, "incorrect_loss_per_token": 1.389906922976176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4068732261657715, "num_tokens": 1, "num_tokens_all": 490, "is_greedy": false, "logits_per_token": -1.4068732261657715, "logits_per_char": -0.7034366130828857, "num_chars": 2}, {"sum_logits": -1.3169301748275757, "num_tokens": 1, "num_tokens_all": 490, "is_greedy": true, "logits_per_token": -1.3169301748275757, "logits_per_char": -0.6584650874137878, "num_chars": 2}, {"sum_logits": -1.3816665410995483, "num_tokens": 1, "num_tokens_all": 490, "is_greedy": false, "logits_per_token": -1.3816665410995483, "logits_per_char": -0.6908332705497742, "num_chars": 2}, {"sum_logits": -1.4711240530014038, "num_tokens": 1, "num_tokens_all": 490, "is_greedy": false, "logits_per_token": -1.4711240530014038, "logits_per_char": -0.7355620265007019, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 66, "native_id": 66, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5298078060150146, "incorrect_loss_raw": 1.3527750571568806, "correct_loss_per_char": 0.7649039030075073, "incorrect_loss_per_char": 0.6763875285784403, "correct_loss_per_token": 1.5298078060150146, "incorrect_loss_per_token": 1.3527750571568806, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3880832195281982, "num_tokens": 1, "num_tokens_all": 446, "is_greedy": false, "logits_per_token": -1.3880832195281982, "logits_per_char": -0.6940416097640991, "num_chars": 2}, {"sum_logits": -1.2686452865600586, "num_tokens": 1, "num_tokens_all": 446, "is_greedy": true, "logits_per_token": -1.2686452865600586, "logits_per_char": -0.6343226432800293, "num_chars": 2}, {"sum_logits": -1.4015966653823853, "num_tokens": 1, "num_tokens_all": 446, "is_greedy": false, "logits_per_token": -1.4015966653823853, "logits_per_char": -0.7007983326911926, "num_chars": 2}, {"sum_logits": -1.5298078060150146, "num_tokens": 1, "num_tokens_all": 446, "is_greedy": false, "logits_per_token": -1.5298078060150146, "logits_per_char": -0.7649039030075073, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 67, "native_id": 67, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3560619354248047, "incorrect_loss_raw": 1.4053141673405964, "correct_loss_per_char": 0.6780309677124023, "incorrect_loss_per_char": 0.7026570836702982, "correct_loss_per_token": 1.3560619354248047, "incorrect_loss_per_token": 1.4053141673405964, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4770011901855469, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.4770011901855469, "logits_per_char": -0.7385005950927734, "num_chars": 2}, {"sum_logits": -1.3560619354248047, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.3560619354248047, "logits_per_char": -0.6780309677124023, "num_chars": 2}, {"sum_logits": -1.3499780893325806, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": true, "logits_per_token": -1.3499780893325806, "logits_per_char": -0.6749890446662903, "num_chars": 2}, {"sum_logits": -1.388963222503662, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.388963222503662, "logits_per_char": -0.694481611251831, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 68, "native_id": 68, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4491840600967407, "incorrect_loss_raw": 1.3763054211934407, "correct_loss_per_char": 0.7245920300483704, "incorrect_loss_per_char": 0.6881527105967203, "correct_loss_per_token": 1.4491840600967407, "incorrect_loss_per_token": 1.3763054211934407, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3881773948669434, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": false, "logits_per_token": -1.3881773948669434, "logits_per_char": -0.6940886974334717, "num_chars": 2}, {"sum_logits": -1.3324562311172485, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": true, "logits_per_token": -1.3324562311172485, "logits_per_char": -0.6662281155586243, "num_chars": 2}, {"sum_logits": -1.4082826375961304, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": false, "logits_per_token": -1.4082826375961304, "logits_per_char": -0.7041413187980652, "num_chars": 2}, {"sum_logits": -1.4491840600967407, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": false, "logits_per_token": -1.4491840600967407, "logits_per_char": -0.7245920300483704, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 69, "native_id": 69, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4570980072021484, "incorrect_loss_raw": 1.3919212818145752, "correct_loss_per_char": 0.7285490036010742, "incorrect_loss_per_char": 0.6959606409072876, "correct_loss_per_token": 1.4570980072021484, "incorrect_loss_per_token": 1.3919212818145752, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6655861139297485, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": false, "logits_per_token": -1.6655861139297485, "logits_per_char": -0.8327930569648743, "num_chars": 2}, {"sum_logits": -1.4570980072021484, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": false, "logits_per_token": -1.4570980072021484, "logits_per_char": -0.7285490036010742, "num_chars": 2}, {"sum_logits": -1.3690075874328613, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": false, "logits_per_token": -1.3690075874328613, "logits_per_char": -0.6845037937164307, "num_chars": 2}, {"sum_logits": -1.1411701440811157, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": true, "logits_per_token": -1.1411701440811157, "logits_per_char": -0.5705850720405579, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 70, "native_id": 70, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6149680614471436, "incorrect_loss_raw": 1.3374792734781902, "correct_loss_per_char": 0.8074840307235718, "incorrect_loss_per_char": 0.6687396367390951, "correct_loss_per_token": 1.6149680614471436, "incorrect_loss_per_token": 1.3374792734781902, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6149680614471436, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.6149680614471436, "logits_per_char": -0.8074840307235718, "num_chars": 2}, {"sum_logits": -1.3591117858886719, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.3591117858886719, "logits_per_char": -0.6795558929443359, "num_chars": 2}, {"sum_logits": -1.5034735202789307, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.5034735202789307, "logits_per_char": -0.7517367601394653, "num_chars": 2}, {"sum_logits": -1.1498525142669678, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": true, "logits_per_token": -1.1498525142669678, "logits_per_char": -0.5749262571334839, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 71, "native_id": 71, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4144107103347778, "incorrect_loss_raw": 1.386789083480835, "correct_loss_per_char": 0.7072053551673889, "incorrect_loss_per_char": 0.6933945417404175, "correct_loss_per_token": 1.4144107103347778, "incorrect_loss_per_token": 1.386789083480835, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4233615398406982, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.4233615398406982, "logits_per_char": -0.7116807699203491, "num_chars": 2}, {"sum_logits": -1.2697269916534424, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": true, "logits_per_token": -1.2697269916534424, "logits_per_char": -0.6348634958267212, "num_chars": 2}, {"sum_logits": -1.4144107103347778, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.4144107103347778, "logits_per_char": -0.7072053551673889, "num_chars": 2}, {"sum_logits": -1.4672787189483643, "num_tokens": 1, "num_tokens_all": 448, "is_greedy": false, "logits_per_token": -1.4672787189483643, "logits_per_char": -0.7336393594741821, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 72, "native_id": 72, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.434326410293579, "incorrect_loss_raw": 1.3793988625208538, "correct_loss_per_char": 0.7171632051467896, "incorrect_loss_per_char": 0.6896994312604269, "correct_loss_per_token": 1.434326410293579, "incorrect_loss_per_token": 1.3793988625208538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.434326410293579, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": false, "logits_per_token": -1.434326410293579, "logits_per_char": -0.7171632051467896, "num_chars": 2}, {"sum_logits": -1.3326449394226074, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": true, "logits_per_token": -1.3326449394226074, "logits_per_char": -0.6663224697113037, "num_chars": 2}, {"sum_logits": -1.4551159143447876, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": false, "logits_per_token": -1.4551159143447876, "logits_per_char": -0.7275579571723938, "num_chars": 2}, {"sum_logits": -1.350435733795166, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": false, "logits_per_token": -1.350435733795166, "logits_per_char": -0.675217866897583, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 73, "native_id": 73, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6188111305236816, "incorrect_loss_raw": 1.3520104885101318, "correct_loss_per_char": 0.8094055652618408, "incorrect_loss_per_char": 0.6760052442550659, "correct_loss_per_token": 1.6188111305236816, "incorrect_loss_per_token": 1.3520104885101318, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6188111305236816, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.6188111305236816, "logits_per_char": -0.8094055652618408, "num_chars": 2}, {"sum_logits": -1.4522368907928467, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.4522368907928467, "logits_per_char": -0.7261184453964233, "num_chars": 2}, {"sum_logits": -1.482215166091919, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.482215166091919, "logits_per_char": -0.7411075830459595, "num_chars": 2}, {"sum_logits": -1.1215794086456299, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": true, "logits_per_token": -1.1215794086456299, "logits_per_char": -0.5607897043228149, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 74, "native_id": 74, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3622031211853027, "incorrect_loss_raw": 1.4048399925231934, "correct_loss_per_char": 0.6811015605926514, "incorrect_loss_per_char": 0.7024199962615967, "correct_loss_per_token": 1.3622031211853027, "incorrect_loss_per_token": 1.4048399925231934, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.352548599243164, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": true, "logits_per_token": -1.352548599243164, "logits_per_char": -0.676274299621582, "num_chars": 2}, {"sum_logits": -1.3622031211853027, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.3622031211853027, "logits_per_char": -0.6811015605926514, "num_chars": 2}, {"sum_logits": -1.4417338371276855, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.4417338371276855, "logits_per_char": -0.7208669185638428, "num_chars": 2}, {"sum_logits": -1.4202375411987305, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.4202375411987305, "logits_per_char": -0.7101187705993652, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 75, "native_id": 75, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3339593410491943, "incorrect_loss_raw": 1.4131240844726562, "correct_loss_per_char": 0.6669796705245972, "incorrect_loss_per_char": 0.7065620422363281, "correct_loss_per_token": 1.3339593410491943, "incorrect_loss_per_token": 1.4131240844726562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3339593410491943, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": true, "logits_per_token": -1.3339593410491943, "logits_per_char": -0.6669796705245972, "num_chars": 2}, {"sum_logits": -1.4214420318603516, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.4214420318603516, "logits_per_char": -0.7107210159301758, "num_chars": 2}, {"sum_logits": -1.401183009147644, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.401183009147644, "logits_per_char": -0.700591504573822, "num_chars": 2}, {"sum_logits": -1.4167472124099731, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.4167472124099731, "logits_per_char": -0.7083736062049866, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 76, "native_id": 76, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3944292068481445, "incorrect_loss_raw": 1.3934613466262817, "correct_loss_per_char": 0.6972146034240723, "incorrect_loss_per_char": 0.6967306733131409, "correct_loss_per_token": 1.3944292068481445, "incorrect_loss_per_token": 1.3934613466262817, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5062702894210815, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.5062702894210815, "logits_per_char": -0.7531351447105408, "num_chars": 2}, {"sum_logits": -1.2894346714019775, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": true, "logits_per_token": -1.2894346714019775, "logits_per_char": -0.6447173357009888, "num_chars": 2}, {"sum_logits": -1.3944292068481445, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.3944292068481445, "logits_per_char": -0.6972146034240723, "num_chars": 2}, {"sum_logits": -1.3846790790557861, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.3846790790557861, "logits_per_char": -0.6923395395278931, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 77, "native_id": 77, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5294910669326782, "incorrect_loss_raw": 1.3499036232630413, "correct_loss_per_char": 0.7647455334663391, "incorrect_loss_per_char": 0.6749518116315206, "correct_loss_per_token": 1.5294910669326782, "incorrect_loss_per_token": 1.3499036232630413, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5294910669326782, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.5294910669326782, "logits_per_char": -0.7647455334663391, "num_chars": 2}, {"sum_logits": -1.3635298013687134, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.3635298013687134, "logits_per_char": -0.6817649006843567, "num_chars": 2}, {"sum_logits": -1.4227735996246338, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.4227735996246338, "logits_per_char": -0.7113867998123169, "num_chars": 2}, {"sum_logits": -1.2634074687957764, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": true, "logits_per_token": -1.2634074687957764, "logits_per_char": -0.6317037343978882, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 78, "native_id": 78, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.278085470199585, "incorrect_loss_raw": 1.4384064277013142, "correct_loss_per_char": 0.6390427350997925, "incorrect_loss_per_char": 0.7192032138506571, "correct_loss_per_token": 1.278085470199585, "incorrect_loss_per_token": 1.4384064277013142, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6154335737228394, "num_tokens": 1, "num_tokens_all": 479, "is_greedy": false, "logits_per_token": -1.6154335737228394, "logits_per_char": -0.8077167868614197, "num_chars": 2}, {"sum_logits": -1.4000359773635864, "num_tokens": 1, "num_tokens_all": 479, "is_greedy": false, "logits_per_token": -1.4000359773635864, "logits_per_char": -0.7000179886817932, "num_chars": 2}, {"sum_logits": -1.299749732017517, "num_tokens": 1, "num_tokens_all": 479, "is_greedy": false, "logits_per_token": -1.299749732017517, "logits_per_char": -0.6498748660087585, "num_chars": 2}, {"sum_logits": -1.278085470199585, "num_tokens": 1, "num_tokens_all": 479, "is_greedy": true, "logits_per_token": -1.278085470199585, "logits_per_char": -0.6390427350997925, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 79, "native_id": 79, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9527401924133301, "incorrect_loss_raw": 1.5976265668869019, "correct_loss_per_char": 0.47637009620666504, "incorrect_loss_per_char": 0.7988132834434509, "correct_loss_per_token": 0.9527401924133301, "incorrect_loss_per_token": 1.5976265668869019, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6477423906326294, "num_tokens": 1, "num_tokens_all": 525, "is_greedy": false, "logits_per_token": -1.6477423906326294, "logits_per_char": -0.8238711953163147, "num_chars": 2}, {"sum_logits": -1.5193665027618408, "num_tokens": 1, "num_tokens_all": 525, "is_greedy": false, "logits_per_token": -1.5193665027618408, "logits_per_char": -0.7596832513809204, "num_chars": 2}, {"sum_logits": -1.6257708072662354, "num_tokens": 1, "num_tokens_all": 525, "is_greedy": false, "logits_per_token": -1.6257708072662354, "logits_per_char": -0.8128854036331177, "num_chars": 2}, {"sum_logits": -0.9527401924133301, "num_tokens": 1, "num_tokens_all": 525, "is_greedy": true, "logits_per_token": -0.9527401924133301, "logits_per_char": -0.47637009620666504, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 80, "native_id": 80, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3644766807556152, "incorrect_loss_raw": 1.401103178660075, "correct_loss_per_char": 0.6822383403778076, "incorrect_loss_per_char": 0.7005515893300375, "correct_loss_per_token": 1.3644766807556152, "incorrect_loss_per_token": 1.401103178660075, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.465677261352539, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": false, "logits_per_token": -1.465677261352539, "logits_per_char": -0.7328386306762695, "num_chars": 2}, {"sum_logits": -1.3508524894714355, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": true, "logits_per_token": -1.3508524894714355, "logits_per_char": -0.6754262447357178, "num_chars": 2}, {"sum_logits": -1.3644766807556152, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": false, "logits_per_token": -1.3644766807556152, "logits_per_char": -0.6822383403778076, "num_chars": 2}, {"sum_logits": -1.38677978515625, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": false, "logits_per_token": -1.38677978515625, "logits_per_char": -0.693389892578125, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 81, "native_id": 81, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3694061040878296, "incorrect_loss_raw": 1.4019073645273845, "correct_loss_per_char": 0.6847030520439148, "incorrect_loss_per_char": 0.7009536822636923, "correct_loss_per_token": 1.3694061040878296, "incorrect_loss_per_token": 1.4019073645273845, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.471338152885437, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.471338152885437, "logits_per_char": -0.7356690764427185, "num_chars": 2}, {"sum_logits": -1.3682670593261719, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.3682670593261719, "logits_per_char": -0.6841335296630859, "num_chars": 2}, {"sum_logits": -1.3661168813705444, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": true, "logits_per_token": -1.3661168813705444, "logits_per_char": -0.6830584406852722, "num_chars": 2}, {"sum_logits": -1.3694061040878296, "num_tokens": 1, "num_tokens_all": 443, "is_greedy": false, "logits_per_token": -1.3694061040878296, "logits_per_char": -0.6847030520439148, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 82, "native_id": 82, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.213122010231018, "incorrect_loss_raw": 1.4582795699437459, "correct_loss_per_char": 0.606561005115509, "incorrect_loss_per_char": 0.7291397849718729, "correct_loss_per_token": 1.213122010231018, "incorrect_loss_per_token": 1.4582795699437459, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5030138492584229, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.5030138492584229, "logits_per_char": -0.7515069246292114, "num_chars": 2}, {"sum_logits": -1.4668644666671753, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.4668644666671753, "logits_per_char": -0.7334322333335876, "num_chars": 2}, {"sum_logits": -1.4049603939056396, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.4049603939056396, "logits_per_char": -0.7024801969528198, "num_chars": 2}, {"sum_logits": -1.213122010231018, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": true, "logits_per_token": -1.213122010231018, "logits_per_char": -0.606561005115509, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 83, "native_id": 83, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.372735619544983, "incorrect_loss_raw": 1.407611886660258, "correct_loss_per_char": 0.6863678097724915, "incorrect_loss_per_char": 0.703805943330129, "correct_loss_per_token": 1.372735619544983, "incorrect_loss_per_token": 1.407611886660258, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.474900484085083, "num_tokens": 1, "num_tokens_all": 455, "is_greedy": false, "logits_per_token": -1.474900484085083, "logits_per_char": -0.7374502420425415, "num_chars": 2}, {"sum_logits": -1.3552017211914062, "num_tokens": 1, "num_tokens_all": 455, "is_greedy": true, "logits_per_token": -1.3552017211914062, "logits_per_char": -0.6776008605957031, "num_chars": 2}, {"sum_logits": -1.372735619544983, "num_tokens": 1, "num_tokens_all": 455, "is_greedy": false, "logits_per_token": -1.372735619544983, "logits_per_char": -0.6863678097724915, "num_chars": 2}, {"sum_logits": -1.3927334547042847, "num_tokens": 1, "num_tokens_all": 455, "is_greedy": false, "logits_per_token": -1.3927334547042847, "logits_per_char": -0.6963667273521423, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 84, "native_id": 84, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2590100765228271, "incorrect_loss_raw": 1.4400235414505005, "correct_loss_per_char": 0.6295050382614136, "incorrect_loss_per_char": 0.7200117707252502, "correct_loss_per_token": 1.2590100765228271, "incorrect_loss_per_token": 1.4400235414505005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.413562536239624, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.413562536239624, "logits_per_char": -0.706781268119812, "num_chars": 2}, {"sum_logits": -1.479879379272461, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.479879379272461, "logits_per_char": -0.7399396896362305, "num_chars": 2}, {"sum_logits": -1.4266287088394165, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.4266287088394165, "logits_per_char": -0.7133143544197083, "num_chars": 2}, {"sum_logits": -1.2590100765228271, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": true, "logits_per_token": -1.2590100765228271, "logits_per_char": -0.6295050382614136, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 85, "native_id": 85, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.494011640548706, "incorrect_loss_raw": 1.3658944765726726, "correct_loss_per_char": 0.747005820274353, "incorrect_loss_per_char": 0.6829472382863363, "correct_loss_per_token": 1.494011640548706, "incorrect_loss_per_token": 1.3658944765726726, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3144093751907349, "num_tokens": 1, "num_tokens_all": 433, "is_greedy": true, "logits_per_token": -1.3144093751907349, "logits_per_char": -0.6572046875953674, "num_chars": 2}, {"sum_logits": -1.3236004114151, "num_tokens": 1, "num_tokens_all": 433, "is_greedy": false, "logits_per_token": -1.3236004114151, "logits_per_char": -0.66180020570755, "num_chars": 2}, {"sum_logits": -1.4596736431121826, "num_tokens": 1, "num_tokens_all": 433, "is_greedy": false, "logits_per_token": -1.4596736431121826, "logits_per_char": -0.7298368215560913, "num_chars": 2}, {"sum_logits": -1.494011640548706, "num_tokens": 1, "num_tokens_all": 433, "is_greedy": false, "logits_per_token": -1.494011640548706, "logits_per_char": -0.747005820274353, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 86, "native_id": 86, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3600784540176392, "incorrect_loss_raw": 1.402909795443217, "correct_loss_per_char": 0.6800392270088196, "incorrect_loss_per_char": 0.7014548977216085, "correct_loss_per_token": 1.3600784540176392, "incorrect_loss_per_token": 1.402909795443217, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5029748678207397, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.5029748678207397, "logits_per_char": -0.7514874339103699, "num_chars": 2}, {"sum_logits": -1.3600784540176392, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.3600784540176392, "logits_per_char": -0.6800392270088196, "num_chars": 2}, {"sum_logits": -1.384900450706482, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.384900450706482, "logits_per_char": -0.692450225353241, "num_chars": 2}, {"sum_logits": -1.3208540678024292, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": true, "logits_per_token": -1.3208540678024292, "logits_per_char": -0.6604270339012146, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 87, "native_id": 87, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3151733875274658, "incorrect_loss_raw": 1.421595533688863, "correct_loss_per_char": 0.6575866937637329, "incorrect_loss_per_char": 0.7107977668444315, "correct_loss_per_token": 1.3151733875274658, "incorrect_loss_per_token": 1.421595533688863, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5541812181472778, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": false, "logits_per_token": -1.5541812181472778, "logits_per_char": -0.7770906090736389, "num_chars": 2}, {"sum_logits": -1.3151733875274658, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": true, "logits_per_token": -1.3151733875274658, "logits_per_char": -0.6575866937637329, "num_chars": 2}, {"sum_logits": -1.3913956880569458, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": false, "logits_per_token": -1.3913956880569458, "logits_per_char": -0.6956978440284729, "num_chars": 2}, {"sum_logits": -1.3192096948623657, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": false, "logits_per_token": -1.3192096948623657, "logits_per_char": -0.6596048474311829, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 88, "native_id": 88, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3526532649993896, "incorrect_loss_raw": 1.4146974086761475, "correct_loss_per_char": 0.6763266324996948, "incorrect_loss_per_char": 0.7073487043380737, "correct_loss_per_token": 1.3526532649993896, "incorrect_loss_per_token": 1.4146974086761475, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5861802101135254, "num_tokens": 1, "num_tokens_all": 447, "is_greedy": false, "logits_per_token": -1.5861802101135254, "logits_per_char": -0.7930901050567627, "num_chars": 2}, {"sum_logits": -1.3526532649993896, "num_tokens": 1, "num_tokens_all": 447, "is_greedy": false, "logits_per_token": -1.3526532649993896, "logits_per_char": -0.6763266324996948, "num_chars": 2}, {"sum_logits": -1.2940218448638916, "num_tokens": 1, "num_tokens_all": 447, "is_greedy": true, "logits_per_token": -1.2940218448638916, "logits_per_char": -0.6470109224319458, "num_chars": 2}, {"sum_logits": -1.3638901710510254, "num_tokens": 1, "num_tokens_all": 447, "is_greedy": false, "logits_per_token": -1.3638901710510254, "logits_per_char": -0.6819450855255127, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 89, "native_id": 89, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3609896898269653, "incorrect_loss_raw": 1.406021237373352, "correct_loss_per_char": 0.6804948449134827, "incorrect_loss_per_char": 0.703010618686676, "correct_loss_per_token": 1.3609896898269653, "incorrect_loss_per_token": 1.406021237373352, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5403037071228027, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.5403037071228027, "logits_per_char": -0.7701518535614014, "num_chars": 2}, {"sum_logits": -1.3609896898269653, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.3609896898269653, "logits_per_char": -0.6804948449134827, "num_chars": 2}, {"sum_logits": -1.3582967519760132, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": false, "logits_per_token": -1.3582967519760132, "logits_per_char": -0.6791483759880066, "num_chars": 2}, {"sum_logits": -1.3194632530212402, "num_tokens": 1, "num_tokens_all": 556, "is_greedy": true, "logits_per_token": -1.3194632530212402, "logits_per_char": -0.6597316265106201, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 90, "native_id": 90, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2486704587936401, "incorrect_loss_raw": 1.4508641560872395, "correct_loss_per_char": 0.6243352293968201, "incorrect_loss_per_char": 0.7254320780436198, "correct_loss_per_token": 1.2486704587936401, "incorrect_loss_per_token": 1.4508641560872395, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6290135383605957, "num_tokens": 1, "num_tokens_all": 526, "is_greedy": false, "logits_per_token": -1.6290135383605957, "logits_per_char": -0.8145067691802979, "num_chars": 2}, {"sum_logits": -1.3327982425689697, "num_tokens": 1, "num_tokens_all": 526, "is_greedy": false, "logits_per_token": -1.3327982425689697, "logits_per_char": -0.6663991212844849, "num_chars": 2}, {"sum_logits": -1.3907806873321533, "num_tokens": 1, "num_tokens_all": 526, "is_greedy": false, "logits_per_token": -1.3907806873321533, "logits_per_char": -0.6953903436660767, "num_chars": 2}, {"sum_logits": -1.2486704587936401, "num_tokens": 1, "num_tokens_all": 526, "is_greedy": true, "logits_per_token": -1.2486704587936401, "logits_per_char": -0.6243352293968201, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 91, "native_id": 91, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3057769536972046, "incorrect_loss_raw": 1.4336854616800945, "correct_loss_per_char": 0.6528884768486023, "incorrect_loss_per_char": 0.7168427308400472, "correct_loss_per_token": 1.3057769536972046, "incorrect_loss_per_token": 1.4336854616800945, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.653267502784729, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": false, "logits_per_token": -1.653267502784729, "logits_per_char": -0.8266337513923645, "num_chars": 2}, {"sum_logits": -1.372622013092041, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": false, "logits_per_token": -1.372622013092041, "logits_per_char": -0.6863110065460205, "num_chars": 2}, {"sum_logits": -1.3057769536972046, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": false, "logits_per_token": -1.3057769536972046, "logits_per_char": -0.6528884768486023, "num_chars": 2}, {"sum_logits": -1.2751668691635132, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": true, "logits_per_token": -1.2751668691635132, "logits_per_char": -0.6375834345817566, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 92, "native_id": 92, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5067802667617798, "incorrect_loss_raw": 1.3740064303080242, "correct_loss_per_char": 0.7533901333808899, "incorrect_loss_per_char": 0.6870032151540121, "correct_loss_per_token": 1.5067802667617798, "incorrect_loss_per_token": 1.3740064303080242, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1416704654693604, "num_tokens": 1, "num_tokens_all": 434, "is_greedy": true, "logits_per_token": -1.1416704654693604, "logits_per_char": -0.5708352327346802, "num_chars": 2}, {"sum_logits": -1.5067802667617798, "num_tokens": 1, "num_tokens_all": 434, "is_greedy": false, "logits_per_token": -1.5067802667617798, "logits_per_char": -0.7533901333808899, "num_chars": 2}, {"sum_logits": -1.4830809831619263, "num_tokens": 1, "num_tokens_all": 434, "is_greedy": false, "logits_per_token": -1.4830809831619263, "logits_per_char": -0.7415404915809631, "num_chars": 2}, {"sum_logits": -1.4972678422927856, "num_tokens": 1, "num_tokens_all": 434, "is_greedy": false, "logits_per_token": -1.4972678422927856, "logits_per_char": -0.7486339211463928, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 93, "native_id": 93, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4721258878707886, "incorrect_loss_raw": 1.371808648109436, "correct_loss_per_char": 0.7360629439353943, "incorrect_loss_per_char": 0.685904324054718, "correct_loss_per_token": 1.4721258878707886, "incorrect_loss_per_token": 1.371808648109436, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2957029342651367, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": true, "logits_per_token": -1.2957029342651367, "logits_per_char": -0.6478514671325684, "num_chars": 2}, {"sum_logits": -1.3703794479370117, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": false, "logits_per_token": -1.3703794479370117, "logits_per_char": -0.6851897239685059, "num_chars": 2}, {"sum_logits": -1.4493435621261597, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": false, "logits_per_token": -1.4493435621261597, "logits_per_char": -0.7246717810630798, "num_chars": 2}, {"sum_logits": -1.4721258878707886, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": false, "logits_per_token": -1.4721258878707886, "logits_per_char": -0.7360629439353943, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 94, "native_id": 94, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.41584050655365, "incorrect_loss_raw": 1.3884681065877278, "correct_loss_per_char": 0.707920253276825, "incorrect_loss_per_char": 0.6942340532938639, "correct_loss_per_token": 1.41584050655365, "incorrect_loss_per_token": 1.3884681065877278, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2857139110565186, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": true, "logits_per_token": -1.2857139110565186, "logits_per_char": -0.6428569555282593, "num_chars": 2}, {"sum_logits": -1.3589121103286743, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": false, "logits_per_token": -1.3589121103286743, "logits_per_char": -0.6794560551643372, "num_chars": 2}, {"sum_logits": -1.5207782983779907, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": false, "logits_per_token": -1.5207782983779907, "logits_per_char": -0.7603891491889954, "num_chars": 2}, {"sum_logits": -1.41584050655365, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": false, "logits_per_token": -1.41584050655365, "logits_per_char": -0.707920253276825, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 95, "native_id": 95, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.334419846534729, "incorrect_loss_raw": 1.4136803150177002, "correct_loss_per_char": 0.6672099232673645, "incorrect_loss_per_char": 0.7068401575088501, "correct_loss_per_token": 1.334419846534729, "incorrect_loss_per_token": 1.4136803150177002, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4906766414642334, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.4906766414642334, "logits_per_char": -0.7453383207321167, "num_chars": 2}, {"sum_logits": -1.334419846534729, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.334419846534729, "logits_per_char": -0.6672099232673645, "num_chars": 2}, {"sum_logits": -1.455417275428772, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.455417275428772, "logits_per_char": -0.727708637714386, "num_chars": 2}, {"sum_logits": -1.2949470281600952, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": true, "logits_per_token": -1.2949470281600952, "logits_per_char": -0.6474735140800476, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 96, "native_id": 96, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5606456995010376, "incorrect_loss_raw": 1.3418713013331096, "correct_loss_per_char": 0.7803228497505188, "incorrect_loss_per_char": 0.6709356506665548, "correct_loss_per_token": 1.5606456995010376, "incorrect_loss_per_token": 1.3418713013331096, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5606456995010376, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": false, "logits_per_token": -1.5606456995010376, "logits_per_char": -0.7803228497505188, "num_chars": 2}, {"sum_logits": -1.3877129554748535, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": false, "logits_per_token": -1.3877129554748535, "logits_per_char": -0.6938564777374268, "num_chars": 2}, {"sum_logits": -1.3845680952072144, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": false, "logits_per_token": -1.3845680952072144, "logits_per_char": -0.6922840476036072, "num_chars": 2}, {"sum_logits": -1.2533328533172607, "num_tokens": 1, "num_tokens_all": 482, "is_greedy": true, "logits_per_token": -1.2533328533172607, "logits_per_char": -0.6266664266586304, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 97, "native_id": 97, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5918569564819336, "incorrect_loss_raw": 1.3435083627700806, "correct_loss_per_char": 0.7959284782409668, "incorrect_loss_per_char": 0.6717541813850403, "correct_loss_per_token": 1.5918569564819336, "incorrect_loss_per_token": 1.3435083627700806, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2262357473373413, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": true, "logits_per_token": -1.2262357473373413, "logits_per_char": -0.6131178736686707, "num_chars": 2}, {"sum_logits": -1.2821909189224243, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.2821909189224243, "logits_per_char": -0.6410954594612122, "num_chars": 2}, {"sum_logits": -1.522098422050476, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.522098422050476, "logits_per_char": -0.761049211025238, "num_chars": 2}, {"sum_logits": -1.5918569564819336, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.5918569564819336, "logits_per_char": -0.7959284782409668, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 98, "native_id": 98, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2966216802597046, "incorrect_loss_raw": 1.4244293769200642, "correct_loss_per_char": 0.6483108401298523, "incorrect_loss_per_char": 0.7122146884600321, "correct_loss_per_token": 1.2966216802597046, "incorrect_loss_per_token": 1.4244293769200642, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4592070579528809, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.4592070579528809, "logits_per_char": -0.7296035289764404, "num_chars": 2}, {"sum_logits": -1.4085720777511597, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.4085720777511597, "logits_per_char": -0.7042860388755798, "num_chars": 2}, {"sum_logits": -1.4055089950561523, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.4055089950561523, "logits_per_char": -0.7027544975280762, "num_chars": 2}, {"sum_logits": -1.2966216802597046, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": true, "logits_per_token": -1.2966216802597046, "logits_per_char": -0.6483108401298523, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 99, "native_id": 99, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.380925178527832, "incorrect_loss_raw": 1.4105109373728435, "correct_loss_per_char": 0.690462589263916, "incorrect_loss_per_char": 0.7052554686864217, "correct_loss_per_token": 1.380925178527832, "incorrect_loss_per_token": 1.4105109373728435, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.604055643081665, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.604055643081665, "logits_per_char": -0.8020278215408325, "num_chars": 2}, {"sum_logits": -1.380925178527832, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.380925178527832, "logits_per_char": -0.690462589263916, "num_chars": 2}, {"sum_logits": -1.4423863887786865, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.4423863887786865, "logits_per_char": -0.7211931943893433, "num_chars": 2}, {"sum_logits": -1.1850907802581787, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": true, "logits_per_token": -1.1850907802581787, "logits_per_char": -0.5925453901290894, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 100, "native_id": 100, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3682763576507568, "incorrect_loss_raw": 1.4039491415023804, "correct_loss_per_char": 0.6841381788253784, "incorrect_loss_per_char": 0.7019745707511902, "correct_loss_per_token": 1.3682763576507568, "incorrect_loss_per_token": 1.4039491415023804, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.523897409439087, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.523897409439087, "logits_per_char": -0.7619487047195435, "num_chars": 2}, {"sum_logits": -1.3184489011764526, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": true, "logits_per_token": -1.3184489011764526, "logits_per_char": -0.6592244505882263, "num_chars": 2}, {"sum_logits": -1.3695011138916016, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.3695011138916016, "logits_per_char": -0.6847505569458008, "num_chars": 2}, {"sum_logits": -1.3682763576507568, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.3682763576507568, "logits_per_char": -0.6841381788253784, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 101, "native_id": 101, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4438202381134033, "incorrect_loss_raw": 1.3761664628982544, "correct_loss_per_char": 0.7219101190567017, "incorrect_loss_per_char": 0.6880832314491272, "correct_loss_per_token": 1.4438202381134033, "incorrect_loss_per_token": 1.3761664628982544, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4552648067474365, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": false, "logits_per_token": -1.4552648067474365, "logits_per_char": -0.7276324033737183, "num_chars": 2}, {"sum_logits": -1.3588825464248657, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": false, "logits_per_token": -1.3588825464248657, "logits_per_char": -0.6794412732124329, "num_chars": 2}, {"sum_logits": -1.4438202381134033, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": false, "logits_per_token": -1.4438202381134033, "logits_per_char": -0.7219101190567017, "num_chars": 2}, {"sum_logits": -1.314352035522461, "num_tokens": 1, "num_tokens_all": 450, "is_greedy": true, "logits_per_token": -1.314352035522461, "logits_per_char": -0.6571760177612305, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 102, "native_id": 102, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4539233446121216, "incorrect_loss_raw": 1.3776702483495076, "correct_loss_per_char": 0.7269616723060608, "incorrect_loss_per_char": 0.6888351241747538, "correct_loss_per_token": 1.4539233446121216, "incorrect_loss_per_token": 1.3776702483495076, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4539233446121216, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.4539233446121216, "logits_per_char": -0.7269616723060608, "num_chars": 2}, {"sum_logits": -1.2454496622085571, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": true, "logits_per_token": -1.2454496622085571, "logits_per_char": -0.6227248311042786, "num_chars": 2}, {"sum_logits": -1.4179046154022217, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.4179046154022217, "logits_per_char": -0.7089523077011108, "num_chars": 2}, {"sum_logits": -1.4696564674377441, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.4696564674377441, "logits_per_char": -0.7348282337188721, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 103, "native_id": 103, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4580082893371582, "incorrect_loss_raw": 1.3700772126515706, "correct_loss_per_char": 0.7290041446685791, "incorrect_loss_per_char": 0.6850386063257853, "correct_loss_per_token": 1.4580082893371582, "incorrect_loss_per_token": 1.3700772126515706, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4148061275482178, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": false, "logits_per_token": -1.4148061275482178, "logits_per_char": -0.7074030637741089, "num_chars": 2}, {"sum_logits": -1.3716930150985718, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": false, "logits_per_token": -1.3716930150985718, "logits_per_char": -0.6858465075492859, "num_chars": 2}, {"sum_logits": -1.3237324953079224, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": true, "logits_per_token": -1.3237324953079224, "logits_per_char": -0.6618662476539612, "num_chars": 2}, {"sum_logits": -1.4580082893371582, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": false, "logits_per_token": -1.4580082893371582, "logits_per_char": -0.7290041446685791, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 104, "native_id": 104, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3347779512405396, "incorrect_loss_raw": 1.4134450753529866, "correct_loss_per_char": 0.6673889756202698, "incorrect_loss_per_char": 0.7067225376764933, "correct_loss_per_token": 1.3347779512405396, "incorrect_loss_per_token": 1.4134450753529866, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.468990445137024, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": false, "logits_per_token": -1.468990445137024, "logits_per_char": -0.734495222568512, "num_chars": 2}, {"sum_logits": -1.2909539937973022, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": true, "logits_per_token": -1.2909539937973022, "logits_per_char": -0.6454769968986511, "num_chars": 2}, {"sum_logits": -1.4803907871246338, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": false, "logits_per_token": -1.4803907871246338, "logits_per_char": -0.7401953935623169, "num_chars": 2}, {"sum_logits": -1.3347779512405396, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": false, "logits_per_token": -1.3347779512405396, "logits_per_char": -0.6673889756202698, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 105, "native_id": 105, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3125160932540894, "incorrect_loss_raw": 1.421215335528056, "correct_loss_per_char": 0.6562580466270447, "incorrect_loss_per_char": 0.710607667764028, "correct_loss_per_token": 1.3125160932540894, "incorrect_loss_per_token": 1.421215335528056, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3555223941802979, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": false, "logits_per_token": -1.3555223941802979, "logits_per_char": -0.6777611970901489, "num_chars": 2}, {"sum_logits": -1.3125160932540894, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": true, "logits_per_token": -1.3125160932540894, "logits_per_char": -0.6562580466270447, "num_chars": 2}, {"sum_logits": -1.4345934391021729, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": false, "logits_per_token": -1.4345934391021729, "logits_per_char": -0.7172967195510864, "num_chars": 2}, {"sum_logits": -1.4735301733016968, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": false, "logits_per_token": -1.4735301733016968, "logits_per_char": -0.7367650866508484, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 106, "native_id": 106, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4258110523223877, "incorrect_loss_raw": 1.3904613256454468, "correct_loss_per_char": 0.7129055261611938, "incorrect_loss_per_char": 0.6952306628227234, "correct_loss_per_token": 1.4258110523223877, "incorrect_loss_per_token": 1.3904613256454468, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3035725355148315, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": true, "logits_per_token": -1.3035725355148315, "logits_per_char": -0.6517862677574158, "num_chars": 2}, {"sum_logits": -1.3702518939971924, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": false, "logits_per_token": -1.3702518939971924, "logits_per_char": -0.6851259469985962, "num_chars": 2}, {"sum_logits": -1.4258110523223877, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": false, "logits_per_token": -1.4258110523223877, "logits_per_char": -0.7129055261611938, "num_chars": 2}, {"sum_logits": -1.4975595474243164, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": false, "logits_per_token": -1.4975595474243164, "logits_per_char": -0.7487797737121582, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 107, "native_id": 107, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.437340497970581, "incorrect_loss_raw": 1.376815915107727, "correct_loss_per_char": 0.7186702489852905, "incorrect_loss_per_char": 0.6884079575538635, "correct_loss_per_token": 1.437340497970581, "incorrect_loss_per_token": 1.376815915107727, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.437340497970581, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.437340497970581, "logits_per_char": -0.7186702489852905, "num_chars": 2}, {"sum_logits": -1.3229373693466187, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": true, "logits_per_token": -1.3229373693466187, "logits_per_char": -0.6614686846733093, "num_chars": 2}, {"sum_logits": -1.3716986179351807, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.3716986179351807, "logits_per_char": -0.6858493089675903, "num_chars": 2}, {"sum_logits": -1.4358117580413818, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.4358117580413818, "logits_per_char": -0.7179058790206909, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 108, "native_id": 108, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3683573007583618, "incorrect_loss_raw": 1.4092063903808594, "correct_loss_per_char": 0.6841786503791809, "incorrect_loss_per_char": 0.7046031951904297, "correct_loss_per_token": 1.3683573007583618, "incorrect_loss_per_token": 1.4092063903808594, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5830354690551758, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": false, "logits_per_token": -1.5830354690551758, "logits_per_char": -0.7915177345275879, "num_chars": 2}, {"sum_logits": -1.4358030557632446, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": false, "logits_per_token": -1.4358030557632446, "logits_per_char": -0.7179015278816223, "num_chars": 2}, {"sum_logits": -1.3683573007583618, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": false, "logits_per_token": -1.3683573007583618, "logits_per_char": -0.6841786503791809, "num_chars": 2}, {"sum_logits": -1.2087806463241577, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": true, "logits_per_token": -1.2087806463241577, "logits_per_char": -0.6043903231620789, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 109, "native_id": 109, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.345811367034912, "incorrect_loss_raw": 1.4291035334269206, "correct_loss_per_char": 0.672905683517456, "incorrect_loss_per_char": 0.7145517667134603, "correct_loss_per_token": 1.345811367034912, "incorrect_loss_per_token": 1.4291035334269206, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6545379161834717, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.6545379161834717, "logits_per_char": -0.8272689580917358, "num_chars": 2}, {"sum_logits": -1.345811367034912, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.345811367034912, "logits_per_char": -0.672905683517456, "num_chars": 2}, {"sum_logits": -1.4785752296447754, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": false, "logits_per_token": -1.4785752296447754, "logits_per_char": -0.7392876148223877, "num_chars": 2}, {"sum_logits": -1.1541974544525146, "num_tokens": 1, "num_tokens_all": 485, "is_greedy": true, "logits_per_token": -1.1541974544525146, "logits_per_char": -0.5770987272262573, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 110, "native_id": 110, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3099054098129272, "incorrect_loss_raw": 1.4281671444574993, "correct_loss_per_char": 0.6549527049064636, "incorrect_loss_per_char": 0.7140835722287496, "correct_loss_per_token": 1.3099054098129272, "incorrect_loss_per_token": 1.4281671444574993, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6128361225128174, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.6128361225128174, "logits_per_char": -0.8064180612564087, "num_chars": 2}, {"sum_logits": -1.33576238155365, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.33576238155365, "logits_per_char": -0.667881190776825, "num_chars": 2}, {"sum_logits": -1.3099054098129272, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": true, "logits_per_token": -1.3099054098129272, "logits_per_char": -0.6549527049064636, "num_chars": 2}, {"sum_logits": -1.3359029293060303, "num_tokens": 1, "num_tokens_all": 489, "is_greedy": false, "logits_per_token": -1.3359029293060303, "logits_per_char": -0.6679514646530151, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 111, "native_id": 111, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2907536029815674, "incorrect_loss_raw": 1.4304325580596924, "correct_loss_per_char": 0.6453768014907837, "incorrect_loss_per_char": 0.7152162790298462, "correct_loss_per_token": 1.2907536029815674, "incorrect_loss_per_token": 1.4304325580596924, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2907536029815674, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": true, "logits_per_token": -1.2907536029815674, "logits_per_char": -0.6453768014907837, "num_chars": 2}, {"sum_logits": -1.3664145469665527, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.3664145469665527, "logits_per_char": -0.6832072734832764, "num_chars": 2}, {"sum_logits": -1.4436172246932983, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.4436172246932983, "logits_per_char": -0.7218086123466492, "num_chars": 2}, {"sum_logits": -1.481265902519226, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.481265902519226, "logits_per_char": -0.740632951259613, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 112, "native_id": 112, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.307344675064087, "incorrect_loss_raw": 1.4223685661951702, "correct_loss_per_char": 0.6536723375320435, "incorrect_loss_per_char": 0.7111842830975851, "correct_loss_per_token": 1.307344675064087, "incorrect_loss_per_token": 1.4223685661951702, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4821126461029053, "num_tokens": 1, "num_tokens_all": 537, "is_greedy": false, "logits_per_token": -1.4821126461029053, "logits_per_char": -0.7410563230514526, "num_chars": 2}, {"sum_logits": -1.307344675064087, "num_tokens": 1, "num_tokens_all": 537, "is_greedy": true, "logits_per_token": -1.307344675064087, "logits_per_char": -0.6536723375320435, "num_chars": 2}, {"sum_logits": -1.4210996627807617, "num_tokens": 1, "num_tokens_all": 537, "is_greedy": false, "logits_per_token": -1.4210996627807617, "logits_per_char": -0.7105498313903809, "num_chars": 2}, {"sum_logits": -1.3638933897018433, "num_tokens": 1, "num_tokens_all": 537, "is_greedy": false, "logits_per_token": -1.3638933897018433, "logits_per_char": -0.6819466948509216, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 113, "native_id": 113, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5781924724578857, "incorrect_loss_raw": 1.342084805170695, "correct_loss_per_char": 0.7890962362289429, "incorrect_loss_per_char": 0.6710424025853475, "correct_loss_per_token": 1.5781924724578857, "incorrect_loss_per_token": 1.342084805170695, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5781924724578857, "num_tokens": 1, "num_tokens_all": 517, "is_greedy": false, "logits_per_token": -1.5781924724578857, "logits_per_char": -0.7890962362289429, "num_chars": 2}, {"sum_logits": -1.415757179260254, "num_tokens": 1, "num_tokens_all": 517, "is_greedy": false, "logits_per_token": -1.415757179260254, "logits_per_char": -0.707878589630127, "num_chars": 2}, {"sum_logits": -1.4258692264556885, "num_tokens": 1, "num_tokens_all": 517, "is_greedy": false, "logits_per_token": -1.4258692264556885, "logits_per_char": -0.7129346132278442, "num_chars": 2}, {"sum_logits": -1.1846280097961426, "num_tokens": 1, "num_tokens_all": 517, "is_greedy": true, "logits_per_token": -1.1846280097961426, "logits_per_char": -0.5923140048980713, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 114, "native_id": 114, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3417067527770996, "incorrect_loss_raw": 1.4091078042984009, "correct_loss_per_char": 0.6708533763885498, "incorrect_loss_per_char": 0.7045539021492004, "correct_loss_per_token": 1.3417067527770996, "incorrect_loss_per_token": 1.4091078042984009, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3417067527770996, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": true, "logits_per_token": -1.3417067527770996, "logits_per_char": -0.6708533763885498, "num_chars": 2}, {"sum_logits": -1.3925974369049072, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": false, "logits_per_token": -1.3925974369049072, "logits_per_char": -0.6962987184524536, "num_chars": 2}, {"sum_logits": -1.3574353456497192, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": false, "logits_per_token": -1.3574353456497192, "logits_per_char": -0.6787176728248596, "num_chars": 2}, {"sum_logits": -1.4772906303405762, "num_tokens": 1, "num_tokens_all": 469, "is_greedy": false, "logits_per_token": -1.4772906303405762, "logits_per_char": -0.7386453151702881, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 115, "native_id": 115, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5117391347885132, "incorrect_loss_raw": 1.3583484490712483, "correct_loss_per_char": 0.7558695673942566, "incorrect_loss_per_char": 0.6791742245356241, "correct_loss_per_token": 1.5117391347885132, "incorrect_loss_per_token": 1.3583484490712483, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4760278463363647, "num_tokens": 1, "num_tokens_all": 492, "is_greedy": false, "logits_per_token": -1.4760278463363647, "logits_per_char": -0.7380139231681824, "num_chars": 2}, {"sum_logits": -1.2129617929458618, "num_tokens": 1, "num_tokens_all": 492, "is_greedy": true, "logits_per_token": -1.2129617929458618, "logits_per_char": -0.6064808964729309, "num_chars": 2}, {"sum_logits": -1.3860557079315186, "num_tokens": 1, "num_tokens_all": 492, "is_greedy": false, "logits_per_token": -1.3860557079315186, "logits_per_char": -0.6930278539657593, "num_chars": 2}, {"sum_logits": -1.5117391347885132, "num_tokens": 1, "num_tokens_all": 492, "is_greedy": false, "logits_per_token": -1.5117391347885132, "logits_per_char": -0.7558695673942566, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 116, "native_id": 116, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4362961053848267, "incorrect_loss_raw": 1.377193848292033, "correct_loss_per_char": 0.7181480526924133, "incorrect_loss_per_char": 0.6885969241460165, "correct_loss_per_token": 1.4362961053848267, "incorrect_loss_per_token": 1.377193848292033, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4174832105636597, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": false, "logits_per_token": -1.4174832105636597, "logits_per_char": -0.7087416052818298, "num_chars": 2}, {"sum_logits": -1.3364531993865967, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": true, "logits_per_token": -1.3364531993865967, "logits_per_char": -0.6682265996932983, "num_chars": 2}, {"sum_logits": -1.4362961053848267, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": false, "logits_per_token": -1.4362961053848267, "logits_per_char": -0.7181480526924133, "num_chars": 2}, {"sum_logits": -1.3776451349258423, "num_tokens": 1, "num_tokens_all": 460, "is_greedy": false, "logits_per_token": -1.3776451349258423, "logits_per_char": -0.6888225674629211, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 117, "native_id": 117, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.340210199356079, "incorrect_loss_raw": 1.4166963497797649, "correct_loss_per_char": 0.6701050996780396, "incorrect_loss_per_char": 0.7083481748898824, "correct_loss_per_token": 1.340210199356079, "incorrect_loss_per_token": 1.4166963497797649, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.586037278175354, "num_tokens": 1, "num_tokens_all": 490, "is_greedy": false, "logits_per_token": -1.586037278175354, "logits_per_char": -0.793018639087677, "num_chars": 2}, {"sum_logits": -1.3995118141174316, "num_tokens": 1, "num_tokens_all": 490, "is_greedy": false, "logits_per_token": -1.3995118141174316, "logits_per_char": -0.6997559070587158, "num_chars": 2}, {"sum_logits": -1.340210199356079, "num_tokens": 1, "num_tokens_all": 490, "is_greedy": false, "logits_per_token": -1.340210199356079, "logits_per_char": -0.6701050996780396, "num_chars": 2}, {"sum_logits": -1.2645399570465088, "num_tokens": 1, "num_tokens_all": 490, "is_greedy": true, "logits_per_token": -1.2645399570465088, "logits_per_char": -0.6322699785232544, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 118, "native_id": 118, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4583677053451538, "incorrect_loss_raw": 1.3725227912267048, "correct_loss_per_char": 0.7291838526725769, "incorrect_loss_per_char": 0.6862613956133524, "correct_loss_per_token": 1.4583677053451538, "incorrect_loss_per_token": 1.3725227912267048, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4583677053451538, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": false, "logits_per_token": -1.4583677053451538, "logits_per_char": -0.7291838526725769, "num_chars": 2}, {"sum_logits": -1.3861504793167114, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": false, "logits_per_token": -1.3861504793167114, "logits_per_char": -0.6930752396583557, "num_chars": 2}, {"sum_logits": -1.422334909439087, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": false, "logits_per_token": -1.422334909439087, "logits_per_char": -0.7111674547195435, "num_chars": 2}, {"sum_logits": -1.3090829849243164, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": true, "logits_per_token": -1.3090829849243164, "logits_per_char": -0.6545414924621582, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 119, "native_id": 119, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4995914697647095, "incorrect_loss_raw": 1.3568872213363647, "correct_loss_per_char": 0.7497957348823547, "incorrect_loss_per_char": 0.6784436106681824, "correct_loss_per_token": 1.4995914697647095, "incorrect_loss_per_token": 1.3568872213363647, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4995914697647095, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.4995914697647095, "logits_per_char": -0.7497957348823547, "num_chars": 2}, {"sum_logits": -1.3593361377716064, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.3593361377716064, "logits_per_char": -0.6796680688858032, "num_chars": 2}, {"sum_logits": -1.3953635692596436, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.3953635692596436, "logits_per_char": -0.6976817846298218, "num_chars": 2}, {"sum_logits": -1.3159619569778442, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": true, "logits_per_token": -1.3159619569778442, "logits_per_char": -0.6579809784889221, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 120, "native_id": 120, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4255222082138062, "incorrect_loss_raw": 1.3856092691421509, "correct_loss_per_char": 0.7127611041069031, "incorrect_loss_per_char": 0.6928046345710754, "correct_loss_per_token": 1.4255222082138062, "incorrect_loss_per_token": 1.3856092691421509, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3452835083007812, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": true, "logits_per_token": -1.3452835083007812, "logits_per_char": -0.6726417541503906, "num_chars": 2}, {"sum_logits": -1.3731681108474731, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": false, "logits_per_token": -1.3731681108474731, "logits_per_char": -0.6865840554237366, "num_chars": 2}, {"sum_logits": -1.4255222082138062, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": false, "logits_per_token": -1.4255222082138062, "logits_per_char": -0.7127611041069031, "num_chars": 2}, {"sum_logits": -1.4383761882781982, "num_tokens": 1, "num_tokens_all": 444, "is_greedy": false, "logits_per_token": -1.4383761882781982, "logits_per_char": -0.7191880941390991, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 121, "native_id": 121, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3930494785308838, "incorrect_loss_raw": 1.3964824279149373, "correct_loss_per_char": 0.6965247392654419, "incorrect_loss_per_char": 0.6982412139574686, "correct_loss_per_token": 1.3930494785308838, "incorrect_loss_per_token": 1.3964824279149373, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.543229341506958, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": false, "logits_per_token": -1.543229341506958, "logits_per_char": -0.771614670753479, "num_chars": 2}, {"sum_logits": -1.3930494785308838, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": false, "logits_per_token": -1.3930494785308838, "logits_per_char": -0.6965247392654419, "num_chars": 2}, {"sum_logits": -1.337996482849121, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": false, "logits_per_token": -1.337996482849121, "logits_per_char": -0.6689982414245605, "num_chars": 2}, {"sum_logits": -1.308221459388733, "num_tokens": 1, "num_tokens_all": 463, "is_greedy": true, "logits_per_token": -1.308221459388733, "logits_per_char": -0.6541107296943665, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 122, "native_id": 122, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.255306363105774, "incorrect_loss_raw": 1.4584054152170818, "correct_loss_per_char": 0.627653181552887, "incorrect_loss_per_char": 0.7292027076085409, "correct_loss_per_token": 1.255306363105774, "incorrect_loss_per_token": 1.4584054152170818, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7318497896194458, "num_tokens": 1, "num_tokens_all": 502, "is_greedy": false, "logits_per_token": -1.7318497896194458, "logits_per_char": -0.8659248948097229, "num_chars": 2}, {"sum_logits": -1.3626189231872559, "num_tokens": 1, "num_tokens_all": 502, "is_greedy": false, "logits_per_token": -1.3626189231872559, "logits_per_char": -0.6813094615936279, "num_chars": 2}, {"sum_logits": -1.2807475328445435, "num_tokens": 1, "num_tokens_all": 502, "is_greedy": false, "logits_per_token": -1.2807475328445435, "logits_per_char": -0.6403737664222717, "num_chars": 2}, {"sum_logits": -1.255306363105774, "num_tokens": 1, "num_tokens_all": 502, "is_greedy": true, "logits_per_token": -1.255306363105774, "logits_per_char": -0.627653181552887, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 123, "native_id": 123, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4869589805603027, "incorrect_loss_raw": 1.3635632991790771, "correct_loss_per_char": 0.7434794902801514, "incorrect_loss_per_char": 0.6817816495895386, "correct_loss_per_token": 1.4869589805603027, "incorrect_loss_per_token": 1.3635632991790771, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4869589805603027, "num_tokens": 1, "num_tokens_all": 435, "is_greedy": false, "logits_per_token": -1.4869589805603027, "logits_per_char": -0.7434794902801514, "num_chars": 2}, {"sum_logits": -1.399402379989624, "num_tokens": 1, "num_tokens_all": 435, "is_greedy": false, "logits_per_token": -1.399402379989624, "logits_per_char": -0.699701189994812, "num_chars": 2}, {"sum_logits": -1.3784289360046387, "num_tokens": 1, "num_tokens_all": 435, "is_greedy": false, "logits_per_token": -1.3784289360046387, "logits_per_char": -0.6892144680023193, "num_chars": 2}, {"sum_logits": -1.3128585815429688, "num_tokens": 1, "num_tokens_all": 435, "is_greedy": true, "logits_per_token": -1.3128585815429688, "logits_per_char": -0.6564292907714844, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 124, "native_id": 124, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3081570863723755, "incorrect_loss_raw": 1.429011583328247, "correct_loss_per_char": 0.6540785431861877, "incorrect_loss_per_char": 0.7145057916641235, "correct_loss_per_token": 1.3081570863723755, "incorrect_loss_per_token": 1.429011583328247, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6020433902740479, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.6020433902740479, "logits_per_char": -0.8010216951370239, "num_chars": 2}, {"sum_logits": -1.3081570863723755, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.3081570863723755, "logits_per_char": -0.6540785431861877, "num_chars": 2}, {"sum_logits": -1.4196561574935913, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": false, "logits_per_token": -1.4196561574935913, "logits_per_char": -0.7098280787467957, "num_chars": 2}, {"sum_logits": -1.265335202217102, "num_tokens": 1, "num_tokens_all": 472, "is_greedy": true, "logits_per_token": -1.265335202217102, "logits_per_char": -0.632667601108551, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 125, "native_id": 125, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4219588041305542, "incorrect_loss_raw": 1.3848882913589478, "correct_loss_per_char": 0.7109794020652771, "incorrect_loss_per_char": 0.6924441456794739, "correct_loss_per_token": 1.4219588041305542, "incorrect_loss_per_token": 1.3848882913589478, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4897512197494507, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": false, "logits_per_token": -1.4897512197494507, "logits_per_char": -0.7448756098747253, "num_chars": 2}, {"sum_logits": -1.3753496408462524, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": false, "logits_per_token": -1.3753496408462524, "logits_per_char": -0.6876748204231262, "num_chars": 2}, {"sum_logits": -1.4219588041305542, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": false, "logits_per_token": -1.4219588041305542, "logits_per_char": -0.7109794020652771, "num_chars": 2}, {"sum_logits": -1.2895640134811401, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": true, "logits_per_token": -1.2895640134811401, "logits_per_char": -0.6447820067405701, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 126, "native_id": 126, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4037697315216064, "incorrect_loss_raw": 1.3917019367218018, "correct_loss_per_char": 0.7018848657608032, "incorrect_loss_per_char": 0.6958509683609009, "correct_loss_per_token": 1.4037697315216064, "incorrect_loss_per_token": 1.3917019367218018, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4509210586547852, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4509210586547852, "logits_per_char": -0.7254605293273926, "num_chars": 2}, {"sum_logits": -1.2654669284820557, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": true, "logits_per_token": -1.2654669284820557, "logits_per_char": -0.6327334642410278, "num_chars": 2}, {"sum_logits": -1.4587178230285645, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4587178230285645, "logits_per_char": -0.7293589115142822, "num_chars": 2}, {"sum_logits": -1.4037697315216064, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4037697315216064, "logits_per_char": -0.7018848657608032, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 127, "native_id": 127, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4790396690368652, "incorrect_loss_raw": 1.3655448357264202, "correct_loss_per_char": 0.7395198345184326, "incorrect_loss_per_char": 0.6827724178632101, "correct_loss_per_token": 1.4790396690368652, "incorrect_loss_per_token": 1.3655448357264202, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.426720380783081, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": false, "logits_per_token": -1.426720380783081, "logits_per_char": -0.7133601903915405, "num_chars": 2}, {"sum_logits": -1.4790396690368652, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": false, "logits_per_token": -1.4790396690368652, "logits_per_char": -0.7395198345184326, "num_chars": 2}, {"sum_logits": -1.3714243173599243, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": false, "logits_per_token": -1.3714243173599243, "logits_per_char": -0.6857121586799622, "num_chars": 2}, {"sum_logits": -1.2984898090362549, "num_tokens": 1, "num_tokens_all": 465, "is_greedy": true, "logits_per_token": -1.2984898090362549, "logits_per_char": -0.6492449045181274, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 128, "native_id": 128, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3495503664016724, "incorrect_loss_raw": 1.4070106744766235, "correct_loss_per_char": 0.6747751832008362, "incorrect_loss_per_char": 0.7035053372383118, "correct_loss_per_token": 1.3495503664016724, "incorrect_loss_per_token": 1.4070106744766235, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4823377132415771, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": false, "logits_per_token": -1.4823377132415771, "logits_per_char": -0.7411688566207886, "num_chars": 2}, {"sum_logits": -1.3752950429916382, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": false, "logits_per_token": -1.3752950429916382, "logits_per_char": -0.6876475214958191, "num_chars": 2}, {"sum_logits": -1.3495503664016724, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": true, "logits_per_token": -1.3495503664016724, "logits_per_char": -0.6747751832008362, "num_chars": 2}, {"sum_logits": -1.3633992671966553, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": false, "logits_per_token": -1.3633992671966553, "logits_per_char": -0.6816996335983276, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 129, "native_id": 129, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.370051622390747, "incorrect_loss_raw": 1.4020699262619019, "correct_loss_per_char": 0.6850258111953735, "incorrect_loss_per_char": 0.7010349631309509, "correct_loss_per_token": 1.370051622390747, "incorrect_loss_per_token": 1.4020699262619019, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.359075903892517, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": true, "logits_per_token": -1.359075903892517, "logits_per_char": -0.6795379519462585, "num_chars": 2}, {"sum_logits": -1.370051622390747, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.370051622390747, "logits_per_char": -0.6850258111953735, "num_chars": 2}, {"sum_logits": -1.3922526836395264, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.3922526836395264, "logits_per_char": -0.6961263418197632, "num_chars": 2}, {"sum_logits": -1.454881191253662, "num_tokens": 1, "num_tokens_all": 452, "is_greedy": false, "logits_per_token": -1.454881191253662, "logits_per_char": -0.727440595626831, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 130, "native_id": 130, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3619920015335083, "incorrect_loss_raw": 1.4060949484507244, "correct_loss_per_char": 0.6809960007667542, "incorrect_loss_per_char": 0.7030474742253622, "correct_loss_per_token": 1.3619920015335083, "incorrect_loss_per_token": 1.4060949484507244, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5500850677490234, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.5500850677490234, "logits_per_char": -0.7750425338745117, "num_chars": 2}, {"sum_logits": -1.2923516035079956, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": true, "logits_per_token": -1.2923516035079956, "logits_per_char": -0.6461758017539978, "num_chars": 2}, {"sum_logits": -1.3619920015335083, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.3619920015335083, "logits_per_char": -0.6809960007667542, "num_chars": 2}, {"sum_logits": -1.3758481740951538, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.3758481740951538, "logits_per_char": -0.6879240870475769, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 131, "native_id": 131, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.419678807258606, "incorrect_loss_raw": 1.3867979447046916, "correct_loss_per_char": 0.709839403629303, "incorrect_loss_per_char": 0.6933989723523458, "correct_loss_per_token": 1.419678807258606, "incorrect_loss_per_token": 1.3867979447046916, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4629367589950562, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.4629367589950562, "logits_per_char": -0.7314683794975281, "num_chars": 2}, {"sum_logits": -1.2904999256134033, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": true, "logits_per_token": -1.2904999256134033, "logits_per_char": -0.6452499628067017, "num_chars": 2}, {"sum_logits": -1.4069571495056152, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.4069571495056152, "logits_per_char": -0.7034785747528076, "num_chars": 2}, {"sum_logits": -1.419678807258606, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.419678807258606, "logits_per_char": -0.709839403629303, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 132, "native_id": 132, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4243524074554443, "incorrect_loss_raw": 1.3838024536768596, "correct_loss_per_char": 0.7121762037277222, "incorrect_loss_per_char": 0.6919012268384298, "correct_loss_per_token": 1.4243524074554443, "incorrect_loss_per_token": 1.3838024536768596, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4903028011322021, "num_tokens": 1, "num_tokens_all": 530, "is_greedy": false, "logits_per_token": -1.4903028011322021, "logits_per_char": -0.7451514005661011, "num_chars": 2}, {"sum_logits": -1.3019349575042725, "num_tokens": 1, "num_tokens_all": 530, "is_greedy": true, "logits_per_token": -1.3019349575042725, "logits_per_char": -0.6509674787521362, "num_chars": 2}, {"sum_logits": -1.4243524074554443, "num_tokens": 1, "num_tokens_all": 530, "is_greedy": false, "logits_per_token": -1.4243524074554443, "logits_per_char": -0.7121762037277222, "num_chars": 2}, {"sum_logits": -1.359169602394104, "num_tokens": 1, "num_tokens_all": 530, "is_greedy": false, "logits_per_token": -1.359169602394104, "logits_per_char": -0.679584801197052, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 133, "native_id": 133, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4313684701919556, "incorrect_loss_raw": 1.3799653848012288, "correct_loss_per_char": 0.7156842350959778, "incorrect_loss_per_char": 0.6899826924006144, "correct_loss_per_token": 1.4313684701919556, "incorrect_loss_per_token": 1.3799653848012288, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4218496084213257, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": false, "logits_per_token": -1.4218496084213257, "logits_per_char": -0.7109248042106628, "num_chars": 2}, {"sum_logits": -1.288206934928894, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": true, "logits_per_token": -1.288206934928894, "logits_per_char": -0.644103467464447, "num_chars": 2}, {"sum_logits": -1.4298396110534668, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": false, "logits_per_token": -1.4298396110534668, "logits_per_char": -0.7149198055267334, "num_chars": 2}, {"sum_logits": -1.4313684701919556, "num_tokens": 1, "num_tokens_all": 468, "is_greedy": false, "logits_per_token": -1.4313684701919556, "logits_per_char": -0.7156842350959778, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 134, "native_id": 134, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4303770065307617, "incorrect_loss_raw": 1.3799242575963337, "correct_loss_per_char": 0.7151885032653809, "incorrect_loss_per_char": 0.6899621287981669, "correct_loss_per_token": 1.4303770065307617, "incorrect_loss_per_token": 1.3799242575963337, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4789118766784668, "num_tokens": 1, "num_tokens_all": 492, "is_greedy": false, "logits_per_token": -1.4789118766784668, "logits_per_char": -0.7394559383392334, "num_chars": 2}, {"sum_logits": -1.4303770065307617, "num_tokens": 1, "num_tokens_all": 492, "is_greedy": false, "logits_per_token": -1.4303770065307617, "logits_per_char": -0.7151885032653809, "num_chars": 2}, {"sum_logits": -1.3242814540863037, "num_tokens": 1, "num_tokens_all": 492, "is_greedy": true, "logits_per_token": -1.3242814540863037, "logits_per_char": -0.6621407270431519, "num_chars": 2}, {"sum_logits": -1.336579442024231, "num_tokens": 1, "num_tokens_all": 492, "is_greedy": false, "logits_per_token": -1.336579442024231, "logits_per_char": -0.6682897210121155, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 135, "native_id": 135, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5167115926742554, "incorrect_loss_raw": 1.3537518978118896, "correct_loss_per_char": 0.7583557963371277, "incorrect_loss_per_char": 0.6768759489059448, "correct_loss_per_token": 1.5167115926742554, "incorrect_loss_per_token": 1.3537518978118896, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5167115926742554, "num_tokens": 1, "num_tokens_all": 509, "is_greedy": false, "logits_per_token": -1.5167115926742554, "logits_per_char": -0.7583557963371277, "num_chars": 2}, {"sum_logits": -1.3532575368881226, "num_tokens": 1, "num_tokens_all": 509, "is_greedy": false, "logits_per_token": -1.3532575368881226, "logits_per_char": -0.6766287684440613, "num_chars": 2}, {"sum_logits": -1.4119513034820557, "num_tokens": 1, "num_tokens_all": 509, "is_greedy": false, "logits_per_token": -1.4119513034820557, "logits_per_char": -0.7059756517410278, "num_chars": 2}, {"sum_logits": -1.2960468530654907, "num_tokens": 1, "num_tokens_all": 509, "is_greedy": true, "logits_per_token": -1.2960468530654907, "logits_per_char": -0.6480234265327454, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 136, "native_id": 136, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5618849992752075, "incorrect_loss_raw": 1.343727429707845, "correct_loss_per_char": 0.7809424996376038, "incorrect_loss_per_char": 0.6718637148539225, "correct_loss_per_token": 1.5618849992752075, "incorrect_loss_per_token": 1.343727429707845, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5618849992752075, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.5618849992752075, "logits_per_char": -0.7809424996376038, "num_chars": 2}, {"sum_logits": -1.375714659690857, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.375714659690857, "logits_per_char": -0.6878573298454285, "num_chars": 2}, {"sum_logits": -1.4323121309280396, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": false, "logits_per_token": -1.4323121309280396, "logits_per_char": -0.7161560654640198, "num_chars": 2}, {"sum_logits": -1.2231554985046387, "num_tokens": 1, "num_tokens_all": 473, "is_greedy": true, "logits_per_token": -1.2231554985046387, "logits_per_char": -0.6115777492523193, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 137, "native_id": 137, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3070074319839478, "incorrect_loss_raw": 1.4245014588038127, "correct_loss_per_char": 0.6535037159919739, "incorrect_loss_per_char": 0.7122507294019064, "correct_loss_per_token": 1.3070074319839478, "incorrect_loss_per_token": 1.4245014588038127, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.544496774673462, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.544496774673462, "logits_per_char": -0.772248387336731, "num_chars": 2}, {"sum_logits": -1.3817260265350342, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.3817260265350342, "logits_per_char": -0.6908630132675171, "num_chars": 2}, {"sum_logits": -1.347281575202942, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.347281575202942, "logits_per_char": -0.673640787601471, "num_chars": 2}, {"sum_logits": -1.3070074319839478, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": true, "logits_per_token": -1.3070074319839478, "logits_per_char": -0.6535037159919739, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 138, "native_id": 138, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1084989309310913, "incorrect_loss_raw": 1.5281322002410889, "correct_loss_per_char": 0.5542494654655457, "incorrect_loss_per_char": 0.7640661001205444, "correct_loss_per_token": 1.1084989309310913, "incorrect_loss_per_token": 1.5281322002410889, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1084989309310913, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": true, "logits_per_token": -1.1084989309310913, "logits_per_char": -0.5542494654655457, "num_chars": 2}, {"sum_logits": -1.3399615287780762, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": false, "logits_per_token": -1.3399615287780762, "logits_per_char": -0.6699807643890381, "num_chars": 2}, {"sum_logits": -1.566704511642456, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": false, "logits_per_token": -1.566704511642456, "logits_per_char": -0.783352255821228, "num_chars": 2}, {"sum_logits": -1.6777305603027344, "num_tokens": 1, "num_tokens_all": 457, "is_greedy": false, "logits_per_token": -1.6777305603027344, "logits_per_char": -0.8388652801513672, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 139, "native_id": 139, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.566038966178894, "incorrect_loss_raw": 1.3395310242970784, "correct_loss_per_char": 0.783019483089447, "incorrect_loss_per_char": 0.6697655121485392, "correct_loss_per_token": 1.566038966178894, "incorrect_loss_per_token": 1.3395310242970784, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.566038966178894, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": false, "logits_per_token": -1.566038966178894, "logits_per_char": -0.783019483089447, "num_chars": 2}, {"sum_logits": -1.3175206184387207, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": true, "logits_per_token": -1.3175206184387207, "logits_per_char": -0.6587603092193604, "num_chars": 2}, {"sum_logits": -1.383333444595337, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": false, "logits_per_token": -1.383333444595337, "logits_per_char": -0.6916667222976685, "num_chars": 2}, {"sum_logits": -1.3177390098571777, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": false, "logits_per_token": -1.3177390098571777, "logits_per_char": -0.6588695049285889, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 140, "native_id": 140, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4383093118667603, "incorrect_loss_raw": 1.3775728146235149, "correct_loss_per_char": 0.7191546559333801, "incorrect_loss_per_char": 0.6887864073117574, "correct_loss_per_token": 1.4383093118667603, "incorrect_loss_per_token": 1.3775728146235149, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4383093118667603, "num_tokens": 1, "num_tokens_all": 434, "is_greedy": false, "logits_per_token": -1.4383093118667603, "logits_per_char": -0.7191546559333801, "num_chars": 2}, {"sum_logits": -1.3531616926193237, "num_tokens": 1, "num_tokens_all": 434, "is_greedy": true, "logits_per_token": -1.3531616926193237, "logits_per_char": -0.6765808463096619, "num_chars": 2}, {"sum_logits": -1.40556001663208, "num_tokens": 1, "num_tokens_all": 434, "is_greedy": false, "logits_per_token": -1.40556001663208, "logits_per_char": -0.70278000831604, "num_chars": 2}, {"sum_logits": -1.3739967346191406, "num_tokens": 1, "num_tokens_all": 434, "is_greedy": false, "logits_per_token": -1.3739967346191406, "logits_per_char": -0.6869983673095703, "num_chars": 2}], "label": 0, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 141, "native_id": 141, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3234949111938477, "incorrect_loss_raw": 1.4212834040323894, "correct_loss_per_char": 0.6617474555969238, "incorrect_loss_per_char": 0.7106417020161947, "correct_loss_per_token": 1.3234949111938477, "incorrect_loss_per_token": 1.4212834040323894, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4825466871261597, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": false, "logits_per_token": -1.4825466871261597, "logits_per_char": -0.7412733435630798, "num_chars": 2}, {"sum_logits": -1.5158374309539795, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": false, "logits_per_token": -1.5158374309539795, "logits_per_char": -0.7579187154769897, "num_chars": 2}, {"sum_logits": -1.3234949111938477, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": false, "logits_per_token": -1.3234949111938477, "logits_per_char": -0.6617474555969238, "num_chars": 2}, {"sum_logits": -1.2654660940170288, "num_tokens": 1, "num_tokens_all": 461, "is_greedy": true, "logits_per_token": -1.2654660940170288, "logits_per_char": -0.6327330470085144, "num_chars": 2}], "label": 2, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 142, "native_id": 142, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3576092720031738, "incorrect_loss_raw": 1.4101048310597737, "correct_loss_per_char": 0.6788046360015869, "incorrect_loss_per_char": 0.7050524155298868, "correct_loss_per_token": 1.3576092720031738, "incorrect_loss_per_token": 1.4101048310597737, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5154061317443848, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.5154061317443848, "logits_per_char": -0.7577030658721924, "num_chars": 2}, {"sum_logits": -1.3576092720031738, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.3576092720031738, "logits_per_char": -0.6788046360015869, "num_chars": 2}, {"sum_logits": -1.4705195426940918, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": false, "logits_per_token": -1.4705195426940918, "logits_per_char": -0.7352597713470459, "num_chars": 2}, {"sum_logits": -1.2443888187408447, "num_tokens": 1, "num_tokens_all": 459, "is_greedy": true, "logits_per_token": -1.2443888187408447, "logits_per_char": -0.6221944093704224, "num_chars": 2}], "label": 1, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 143, "native_id": 143, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3350026607513428, "incorrect_loss_raw": 1.4293942054112752, "correct_loss_per_char": 0.6675013303756714, "incorrect_loss_per_char": 0.7146971027056376, "correct_loss_per_token": 1.3350026607513428, "incorrect_loss_per_token": 1.4293942054112752, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7106025218963623, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": false, "logits_per_token": -1.7106025218963623, "logits_per_char": -0.8553012609481812, "num_chars": 2}, {"sum_logits": -1.3328003883361816, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": false, "logits_per_token": -1.3328003883361816, "logits_per_char": -0.6664001941680908, "num_chars": 2}, {"sum_logits": -1.2447797060012817, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": true, "logits_per_token": -1.2447797060012817, "logits_per_char": -0.6223898530006409, "num_chars": 2}, {"sum_logits": -1.3350026607513428, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": false, "logits_per_token": -1.3350026607513428, "logits_per_char": -0.6675013303756714, "num_chars": 2}], "label": 3, "task_hash": "aaf0bf4441359de8ffba70cefb786807", "model_hash": "03418cf8091a9882619950ffb07429a5"}