diff --git "a/evals/core_9mcqa/task-002-arc_challenge:mc-predictions.jsonl" "b/evals/core_9mcqa/task-002-arc_challenge:mc-predictions.jsonl" new file mode 100644--- /dev/null +++ "b/evals/core_9mcqa/task-002-arc_challenge:mc-predictions.jsonl" @@ -0,0 +1,1172 @@ +{"doc_id": 0, "native_id": "Mercury_7175875", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.397988200187683, "incorrect_loss_raw": 1.3888261318206787, "correct_loss_per_char": 0.6989941000938416, "incorrect_loss_per_char": 0.6944130659103394, "correct_loss_per_token": 1.397988200187683, "incorrect_loss_per_token": 1.3888261318206787, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4621028900146484, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4621028900146484, "logits_per_char": -0.7310514450073242, "num_chars": 2}, {"sum_logits": -1.3747004270553589, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3747004270553589, "logits_per_char": -0.6873502135276794, "num_chars": 2}, {"sum_logits": -1.397988200187683, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.397988200187683, "logits_per_char": -0.6989941000938416, "num_chars": 2}, {"sum_logits": -1.3296750783920288, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.3296750783920288, "logits_per_char": -0.6648375391960144, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1, "native_id": "Mercury_SC_409171", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.446919560432434, "incorrect_loss_raw": 1.3840744495391846, "correct_loss_per_char": 0.723459780216217, "incorrect_loss_per_char": 0.6920372247695923, "correct_loss_per_token": 1.446919560432434, "incorrect_loss_per_token": 1.3840744495391846, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5252269506454468, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.5252269506454468, "logits_per_char": -0.7626134753227234, "num_chars": 2}, {"sum_logits": -1.446919560432434, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.446919560432434, "logits_per_char": -0.723459780216217, "num_chars": 2}, {"sum_logits": -1.4608603715896606, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4608603715896606, "logits_per_char": -0.7304301857948303, "num_chars": 2}, {"sum_logits": -1.1661360263824463, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.1661360263824463, "logits_per_char": -0.5830680131912231, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 2, "native_id": "Mercury_SC_408547", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4777233600616455, "incorrect_loss_raw": 1.3624563217163086, "correct_loss_per_char": 0.7388616800308228, "incorrect_loss_per_char": 0.6812281608581543, "correct_loss_per_token": 1.4777233600616455, "incorrect_loss_per_token": 1.3624563217163086, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3601540327072144, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3601540327072144, "logits_per_char": -0.6800770163536072, "num_chars": 2}, {"sum_logits": -1.3712773323059082, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3712773323059082, "logits_per_char": -0.6856386661529541, "num_chars": 2}, {"sum_logits": -1.4777233600616455, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4777233600616455, "logits_per_char": -0.7388616800308228, "num_chars": 2}, {"sum_logits": -1.3559376001358032, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.3559376001358032, "logits_per_char": -0.6779688000679016, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 3, "native_id": "Mercury_407327", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3535358905792236, "incorrect_loss_raw": 1.405942440032959, "correct_loss_per_char": 0.6767679452896118, "incorrect_loss_per_char": 0.7029712200164795, "correct_loss_per_token": 1.3535358905792236, "incorrect_loss_per_token": 1.405942440032959, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.378324270248413, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.378324270248413, "logits_per_char": -0.6891621351242065, "num_chars": 2}, {"sum_logits": -1.3499853610992432, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": true, "logits_per_token": -1.3499853610992432, "logits_per_char": -0.6749926805496216, "num_chars": 2}, {"sum_logits": -1.4895176887512207, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.4895176887512207, "logits_per_char": -0.7447588443756104, "num_chars": 2}, {"sum_logits": -1.3535358905792236, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.3535358905792236, "logits_per_char": -0.6767679452896118, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 4, "native_id": "MCAS_2006_9_44", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3830968141555786, "incorrect_loss_raw": 1.3931522766749065, "correct_loss_per_char": 0.6915484070777893, "incorrect_loss_per_char": 0.6965761383374532, "correct_loss_per_token": 1.3830968141555786, "incorrect_loss_per_token": 1.3931522766749065, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.441299557685852, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": false, "logits_per_token": -1.441299557685852, "logits_per_char": -0.720649778842926, "num_chars": 2}, {"sum_logits": -1.34433913230896, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": true, "logits_per_token": -1.34433913230896, "logits_per_char": -0.67216956615448, "num_chars": 2}, {"sum_logits": -1.3938181400299072, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": false, "logits_per_token": -1.3938181400299072, "logits_per_char": -0.6969090700149536, "num_chars": 2}, {"sum_logits": -1.3830968141555786, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": false, "logits_per_token": -1.3830968141555786, "logits_per_char": -0.6915484070777893, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 5, "native_id": "Mercury_7270393", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4363071918487549, "incorrect_loss_raw": 1.3890010913213093, "correct_loss_per_char": 0.7181535959243774, "incorrect_loss_per_char": 0.6945005456606547, "correct_loss_per_token": 1.4363071918487549, "incorrect_loss_per_token": 1.3890010913213093, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4634919166564941, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.4634919166564941, "logits_per_char": -0.7317459583282471, "num_chars": 2}, {"sum_logits": -1.4363071918487549, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.4363071918487549, "logits_per_char": -0.7181535959243774, "num_chars": 2}, {"sum_logits": -1.546506404876709, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.546506404876709, "logits_per_char": -0.7732532024383545, "num_chars": 2}, {"sum_logits": -1.157004952430725, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": true, "logits_per_token": -1.157004952430725, "logits_per_char": -0.5785024762153625, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 6, "native_id": "MCAS_2014_5_7", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3785089254379272, "incorrect_loss_raw": 1.4049282868703206, "correct_loss_per_char": 0.6892544627189636, "incorrect_loss_per_char": 0.7024641434351603, "correct_loss_per_token": 1.3785089254379272, "incorrect_loss_per_token": 1.4049282868703206, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5556796789169312, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -1.5556796789169312, "logits_per_char": -0.7778398394584656, "num_chars": 2}, {"sum_logits": -1.4483500719070435, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -1.4483500719070435, "logits_per_char": -0.7241750359535217, "num_chars": 2}, {"sum_logits": -1.3785089254379272, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -1.3785089254379272, "logits_per_char": -0.6892544627189636, "num_chars": 2}, {"sum_logits": -1.2107551097869873, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": true, "logits_per_token": -1.2107551097869873, "logits_per_char": -0.6053775548934937, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 7, "native_id": "Mercury_7086660", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4792078733444214, "incorrect_loss_raw": 1.3632452090581257, "correct_loss_per_char": 0.7396039366722107, "incorrect_loss_per_char": 0.6816226045290629, "correct_loss_per_token": 1.4792078733444214, "incorrect_loss_per_token": 1.3632452090581257, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3994245529174805, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.3994245529174805, "logits_per_char": -0.6997122764587402, "num_chars": 2}, {"sum_logits": -1.3884564638137817, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.3884564638137817, "logits_per_char": -0.6942282319068909, "num_chars": 2}, {"sum_logits": -1.4792078733444214, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4792078733444214, "logits_per_char": -0.7396039366722107, "num_chars": 2}, {"sum_logits": -1.3018546104431152, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.3018546104431152, "logits_per_char": -0.6509273052215576, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 8, "native_id": "Mercury_7168805", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4113965034484863, "incorrect_loss_raw": 1.3912594715754192, "correct_loss_per_char": 0.7056982517242432, "incorrect_loss_per_char": 0.6956297357877096, "correct_loss_per_token": 1.4113965034484863, "incorrect_loss_per_token": 1.3912594715754192, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4484248161315918, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.4484248161315918, "logits_per_char": -0.7242124080657959, "num_chars": 2}, {"sum_logits": -1.4113965034484863, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.4113965034484863, "logits_per_char": -0.7056982517242432, "num_chars": 2}, {"sum_logits": -1.5136619806289673, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.5136619806289673, "logits_per_char": -0.7568309903144836, "num_chars": 2}, {"sum_logits": -1.2116916179656982, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": true, "logits_per_token": -1.2116916179656982, "logits_per_char": -0.6058458089828491, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 9, "native_id": "MCAS_2003_8_11", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4753291606903076, "incorrect_loss_raw": 1.3654911120732625, "correct_loss_per_char": 0.7376645803451538, "incorrect_loss_per_char": 0.6827455560366312, "correct_loss_per_token": 1.4753291606903076, "incorrect_loss_per_token": 1.3654911120732625, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4753291606903076, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4753291606903076, "logits_per_char": -0.7376645803451538, "num_chars": 2}, {"sum_logits": -1.3668094873428345, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3668094873428345, "logits_per_char": -0.6834047436714172, "num_chars": 2}, {"sum_logits": -1.3118586540222168, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.3118586540222168, "logits_per_char": -0.6559293270111084, "num_chars": 2}, {"sum_logits": -1.4178051948547363, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4178051948547363, "logits_per_char": -0.7089025974273682, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 10, "native_id": "Mercury_7250058", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3871166706085205, "incorrect_loss_raw": 1.400418718655904, "correct_loss_per_char": 0.6935583353042603, "incorrect_loss_per_char": 0.700209359327952, "correct_loss_per_token": 1.3871166706085205, "incorrect_loss_per_token": 1.400418718655904, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5690540075302124, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.5690540075302124, "logits_per_char": -0.7845270037651062, "num_chars": 2}, {"sum_logits": -1.3871166706085205, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3871166706085205, "logits_per_char": -0.6935583353042603, "num_chars": 2}, {"sum_logits": -1.3611116409301758, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3611116409301758, "logits_per_char": -0.6805558204650879, "num_chars": 2}, {"sum_logits": -1.2710905075073242, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.2710905075073242, "logits_per_char": -0.6355452537536621, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 11, "native_id": "Mercury_7012740", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.427985668182373, "incorrect_loss_raw": 1.3784297307332356, "correct_loss_per_char": 0.7139928340911865, "incorrect_loss_per_char": 0.6892148653666178, "correct_loss_per_token": 1.427985668182373, "incorrect_loss_per_token": 1.3784297307332356, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.427985668182373, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.427985668182373, "logits_per_char": -0.7139928340911865, "num_chars": 2}, {"sum_logits": -1.3595397472381592, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3595397472381592, "logits_per_char": -0.6797698736190796, "num_chars": 2}, {"sum_logits": -1.433024287223816, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.433024287223816, "logits_per_char": -0.716512143611908, "num_chars": 2}, {"sum_logits": -1.342725157737732, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.342725157737732, "logits_per_char": -0.671362578868866, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 12, "native_id": "Mercury_LBS10610", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4635862112045288, "incorrect_loss_raw": 1.373959223429362, "correct_loss_per_char": 0.7317931056022644, "incorrect_loss_per_char": 0.686979611714681, "correct_loss_per_token": 1.4635862112045288, "incorrect_loss_per_token": 1.373959223429362, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.501111626625061, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.501111626625061, "logits_per_char": -0.7505558133125305, "num_chars": 2}, {"sum_logits": -1.4023430347442627, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4023430347442627, "logits_per_char": -0.7011715173721313, "num_chars": 2}, {"sum_logits": -1.4635862112045288, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4635862112045288, "logits_per_char": -0.7317931056022644, "num_chars": 2}, {"sum_logits": -1.2184230089187622, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.2184230089187622, "logits_per_char": -0.6092115044593811, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 13, "native_id": "Mercury_SC_407400", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5079089403152466, "incorrect_loss_raw": 1.356086015701294, "correct_loss_per_char": 0.7539544701576233, "incorrect_loss_per_char": 0.678043007850647, "correct_loss_per_token": 1.5079089403152466, "incorrect_loss_per_token": 1.356086015701294, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3083895444869995, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.3083895444869995, "logits_per_char": -0.6541947722434998, "num_chars": 2}, {"sum_logits": -1.3639917373657227, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3639917373657227, "logits_per_char": -0.6819958686828613, "num_chars": 2}, {"sum_logits": -1.5079089403152466, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.5079089403152466, "logits_per_char": -0.7539544701576233, "num_chars": 2}, {"sum_logits": -1.3958767652511597, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3958767652511597, "logits_per_char": -0.6979383826255798, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 14, "native_id": "Mercury_7212993", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.384588599205017, "incorrect_loss_raw": 1.3967829545338948, "correct_loss_per_char": 0.6922942996025085, "incorrect_loss_per_char": 0.6983914772669474, "correct_loss_per_token": 1.384588599205017, "incorrect_loss_per_token": 1.3967829545338948, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4871735572814941, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4871735572814941, "logits_per_char": -0.7435867786407471, "num_chars": 2}, {"sum_logits": -1.42227303981781, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.42227303981781, "logits_per_char": -0.711136519908905, "num_chars": 2}, {"sum_logits": -1.384588599205017, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.384588599205017, "logits_per_char": -0.6922942996025085, "num_chars": 2}, {"sum_logits": -1.2809022665023804, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2809022665023804, "logits_per_char": -0.6404511332511902, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 15, "native_id": "Mercury_SC_413240", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4269942045211792, "incorrect_loss_raw": 1.3820860385894775, "correct_loss_per_char": 0.7134971022605896, "incorrect_loss_per_char": 0.6910430192947388, "correct_loss_per_token": 1.4269942045211792, "incorrect_loss_per_token": 1.3820860385894775, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4269942045211792, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4269942045211792, "logits_per_char": -0.7134971022605896, "num_chars": 2}, {"sum_logits": -1.4667134284973145, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4667134284973145, "logits_per_char": -0.7333567142486572, "num_chars": 2}, {"sum_logits": -1.2912521362304688, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.2912521362304688, "logits_per_char": -0.6456260681152344, "num_chars": 2}, {"sum_logits": -1.3882925510406494, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3882925510406494, "logits_per_char": -0.6941462755203247, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 16, "native_id": "Mercury_7186358", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4770630598068237, "incorrect_loss_raw": 1.3667491674423218, "correct_loss_per_char": 0.7385315299034119, "incorrect_loss_per_char": 0.6833745837211609, "correct_loss_per_token": 1.4770630598068237, "incorrect_loss_per_token": 1.3667491674423218, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4866275787353516, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.4866275787353516, "logits_per_char": -0.7433137893676758, "num_chars": 2}, {"sum_logits": -1.286839485168457, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": true, "logits_per_token": -1.286839485168457, "logits_per_char": -0.6434197425842285, "num_chars": 2}, {"sum_logits": -1.4770630598068237, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.4770630598068237, "logits_per_char": -0.7385315299034119, "num_chars": 2}, {"sum_logits": -1.3267804384231567, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.3267804384231567, "logits_per_char": -0.6633902192115784, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 17, "native_id": "Mercury_7166425", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4144948720932007, "incorrect_loss_raw": 1.384604851404826, "correct_loss_per_char": 0.7072474360466003, "incorrect_loss_per_char": 0.692302425702413, "correct_loss_per_token": 1.4144948720932007, "incorrect_loss_per_token": 1.384604851404826, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.477158546447754, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.477158546447754, "logits_per_char": -0.738579273223877, "num_chars": 2}, {"sum_logits": -1.4144948720932007, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4144948720932007, "logits_per_char": -0.7072474360466003, "num_chars": 2}, {"sum_logits": -1.398897409439087, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.398897409439087, "logits_per_char": -0.6994487047195435, "num_chars": 2}, {"sum_logits": -1.2777585983276367, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.2777585983276367, "logits_per_char": -0.6388792991638184, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 18, "native_id": "MDSA_2007_8_3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3632878065109253, "incorrect_loss_raw": 1.4086589018503826, "correct_loss_per_char": 0.6816439032554626, "incorrect_loss_per_char": 0.7043294509251913, "correct_loss_per_token": 1.3632878065109253, "incorrect_loss_per_token": 1.4086589018503826, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3632878065109253, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.3632878065109253, "logits_per_char": -0.6816439032554626, "num_chars": 2}, {"sum_logits": -1.2173373699188232, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": true, "logits_per_token": -1.2173373699188232, "logits_per_char": -0.6086686849594116, "num_chars": 2}, {"sum_logits": -1.4906225204467773, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.4906225204467773, "logits_per_char": -0.7453112602233887, "num_chars": 2}, {"sum_logits": -1.5180168151855469, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.5180168151855469, "logits_per_char": -0.7590084075927734, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 19, "native_id": "Mercury_7094290", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4983736276626587, "incorrect_loss_raw": 1.357198675473531, "correct_loss_per_char": 0.7491868138313293, "incorrect_loss_per_char": 0.6785993377367655, "correct_loss_per_token": 1.4983736276626587, "incorrect_loss_per_token": 1.357198675473531, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3442573547363281, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3442573547363281, "logits_per_char": -0.6721286773681641, "num_chars": 2}, {"sum_logits": -1.408144235610962, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.408144235610962, "logits_per_char": -0.704072117805481, "num_chars": 2}, {"sum_logits": -1.4983736276626587, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4983736276626587, "logits_per_char": -0.7491868138313293, "num_chars": 2}, {"sum_logits": -1.3191944360733032, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.3191944360733032, "logits_per_char": -0.6595972180366516, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 20, "native_id": "Mercury_7186568", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3290735483169556, "incorrect_loss_raw": 1.4124455849329631, "correct_loss_per_char": 0.6645367741584778, "incorrect_loss_per_char": 0.7062227924664816, "correct_loss_per_token": 1.3290735483169556, "incorrect_loss_per_token": 1.4124455849329631, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4264668226242065, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4264668226242065, "logits_per_char": -0.7132334113121033, "num_chars": 2}, {"sum_logits": -1.3290735483169556, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.3290735483169556, "logits_per_char": -0.6645367741584778, "num_chars": 2}, {"sum_logits": -1.44638991355896, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.44638991355896, "logits_per_char": -0.72319495677948, "num_chars": 2}, {"sum_logits": -1.3644800186157227, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3644800186157227, "logits_per_char": -0.6822400093078613, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 21, "native_id": "Mercury_402216", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.254930019378662, "incorrect_loss_raw": 1.4414390325546265, "correct_loss_per_char": 0.627465009689331, "incorrect_loss_per_char": 0.7207195162773132, "correct_loss_per_token": 1.254930019378662, "incorrect_loss_per_token": 1.4414390325546265, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4025696516036987, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.4025696516036987, "logits_per_char": -0.7012848258018494, "num_chars": 2}, {"sum_logits": -1.254930019378662, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": true, "logits_per_token": -1.254930019378662, "logits_per_char": -0.627465009689331, "num_chars": 2}, {"sum_logits": -1.5117789506912231, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.5117789506912231, "logits_per_char": -0.7558894753456116, "num_chars": 2}, {"sum_logits": -1.4099684953689575, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.4099684953689575, "logits_per_char": -0.7049842476844788, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 22, "native_id": "Mercury_404894", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2811458110809326, "incorrect_loss_raw": 1.4315898021062214, "correct_loss_per_char": 0.6405729055404663, "incorrect_loss_per_char": 0.7157949010531107, "correct_loss_per_token": 1.2811458110809326, "incorrect_loss_per_token": 1.4315898021062214, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2811458110809326, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.2811458110809326, "logits_per_char": -0.6405729055404663, "num_chars": 2}, {"sum_logits": -1.4518910646438599, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4518910646438599, "logits_per_char": -0.7259455323219299, "num_chars": 2}, {"sum_logits": -1.461602807044983, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.461602807044983, "logits_per_char": -0.7308014035224915, "num_chars": 2}, {"sum_logits": -1.3812755346298218, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3812755346298218, "logits_per_char": -0.6906377673149109, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 23, "native_id": "MCAS_2002_8_11", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4639959335327148, "incorrect_loss_raw": 1.367892066637675, "correct_loss_per_char": 0.7319979667663574, "incorrect_loss_per_char": 0.6839460333188375, "correct_loss_per_token": 1.4639959335327148, "incorrect_loss_per_token": 1.367892066637675, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4072357416152954, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.4072357416152954, "logits_per_char": -0.7036178708076477, "num_chars": 2}, {"sum_logits": -1.3470733165740967, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": true, "logits_per_token": -1.3470733165740967, "logits_per_char": -0.6735366582870483, "num_chars": 2}, {"sum_logits": -1.4639959335327148, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.4639959335327148, "logits_per_char": -0.7319979667663574, "num_chars": 2}, {"sum_logits": -1.3493671417236328, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.3493671417236328, "logits_per_char": -0.6746835708618164, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 24, "native_id": "Mercury_SC_405086", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5320912599563599, "incorrect_loss_raw": 1.3509914080301921, "correct_loss_per_char": 0.7660456299781799, "incorrect_loss_per_char": 0.6754957040150961, "correct_loss_per_token": 1.5320912599563599, "incorrect_loss_per_token": 1.3509914080301921, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.439789056777954, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.439789056777954, "logits_per_char": -0.719894528388977, "num_chars": 2}, {"sum_logits": -1.5320912599563599, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.5320912599563599, "logits_per_char": -0.7660456299781799, "num_chars": 2}, {"sum_logits": -1.3745814561843872, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3745814561843872, "logits_per_char": -0.6872907280921936, "num_chars": 2}, {"sum_logits": -1.2386037111282349, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.2386037111282349, "logits_per_char": -0.6193018555641174, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 25, "native_id": "Mercury_SC_408324", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2877824306488037, "incorrect_loss_raw": 1.4259060223897297, "correct_loss_per_char": 0.6438912153244019, "incorrect_loss_per_char": 0.7129530111948649, "correct_loss_per_token": 1.2877824306488037, "incorrect_loss_per_token": 1.4259060223897297, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4045650959014893, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4045650959014893, "logits_per_char": -0.7022825479507446, "num_chars": 2}, {"sum_logits": -1.4172793626785278, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4172793626785278, "logits_per_char": -0.7086396813392639, "num_chars": 2}, {"sum_logits": -1.4558736085891724, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4558736085891724, "logits_per_char": -0.7279368042945862, "num_chars": 2}, {"sum_logits": -1.2877824306488037, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.2877824306488037, "logits_per_char": -0.6438912153244019, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 26, "native_id": "Mercury_7218820", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3753517866134644, "incorrect_loss_raw": 1.4019577900568645, "correct_loss_per_char": 0.6876758933067322, "incorrect_loss_per_char": 0.7009788950284322, "correct_loss_per_token": 1.3753517866134644, "incorrect_loss_per_token": 1.4019577900568645, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5581165552139282, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.5581165552139282, "logits_per_char": -0.7790582776069641, "num_chars": 2}, {"sum_logits": -1.3753517866134644, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.3753517866134644, "logits_per_char": -0.6876758933067322, "num_chars": 2}, {"sum_logits": -1.355040192604065, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.355040192604065, "logits_per_char": -0.6775200963020325, "num_chars": 2}, {"sum_logits": -1.2927166223526, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -1.2927166223526, "logits_per_char": -0.6463583111763, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 27, "native_id": "Mercury_412202", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5551942586898804, "incorrect_loss_raw": 1.3558075030644734, "correct_loss_per_char": 0.7775971293449402, "incorrect_loss_per_char": 0.6779037515322367, "correct_loss_per_token": 1.5551942586898804, "incorrect_loss_per_token": 1.3558075030644734, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3841828107833862, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.3841828107833862, "logits_per_char": -0.6920914053916931, "num_chars": 2}, {"sum_logits": -1.5551942586898804, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.5551942586898804, "logits_per_char": -0.7775971293449402, "num_chars": 2}, {"sum_logits": -1.5475950241088867, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.5475950241088867, "logits_per_char": -0.7737975120544434, "num_chars": 2}, {"sum_logits": -1.1356446743011475, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": true, "logits_per_token": -1.1356446743011475, "logits_per_char": -0.5678223371505737, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 28, "native_id": "Mercury_SC_409139", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4345836639404297, "incorrect_loss_raw": 1.374965786933899, "correct_loss_per_char": 0.7172918319702148, "incorrect_loss_per_char": 0.6874828934669495, "correct_loss_per_token": 1.4345836639404297, "incorrect_loss_per_token": 1.374965786933899, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3830417394638062, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3830417394638062, "logits_per_char": -0.6915208697319031, "num_chars": 2}, {"sum_logits": -1.361572504043579, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.361572504043579, "logits_per_char": -0.6807862520217896, "num_chars": 2}, {"sum_logits": -1.4345836639404297, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4345836639404297, "logits_per_char": -0.7172918319702148, "num_chars": 2}, {"sum_logits": -1.3802831172943115, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3802831172943115, "logits_per_char": -0.6901415586471558, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 29, "native_id": "Mercury_400687", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3116027116775513, "incorrect_loss_raw": 1.4250654776891072, "correct_loss_per_char": 0.6558013558387756, "incorrect_loss_per_char": 0.7125327388445536, "correct_loss_per_token": 1.3116027116775513, "incorrect_loss_per_token": 1.4250654776891072, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.279414415359497, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.279414415359497, "logits_per_char": -0.6397072076797485, "num_chars": 2}, {"sum_logits": -1.3116027116775513, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3116027116775513, "logits_per_char": -0.6558013558387756, "num_chars": 2}, {"sum_logits": -1.4372761249542236, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4372761249542236, "logits_per_char": -0.7186380624771118, "num_chars": 2}, {"sum_logits": -1.558505892753601, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.558505892753601, "logits_per_char": -0.7792529463768005, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 30, "native_id": "Mercury_7171605", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3680602312088013, "incorrect_loss_raw": 1.4005444447199504, "correct_loss_per_char": 0.6840301156044006, "incorrect_loss_per_char": 0.7002722223599752, "correct_loss_per_token": 1.3680602312088013, "incorrect_loss_per_token": 1.4005444447199504, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.350295901298523, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.350295901298523, "logits_per_char": -0.6751479506492615, "num_chars": 2}, {"sum_logits": -1.4066121578216553, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4066121578216553, "logits_per_char": -0.7033060789108276, "num_chars": 2}, {"sum_logits": -1.4447252750396729, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4447252750396729, "logits_per_char": -0.7223626375198364, "num_chars": 2}, {"sum_logits": -1.3680602312088013, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3680602312088013, "logits_per_char": -0.6840301156044006, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 31, "native_id": "Mercury_7210245", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4167405366897583, "incorrect_loss_raw": 1.3828882773717244, "correct_loss_per_char": 0.7083702683448792, "incorrect_loss_per_char": 0.6914441386858622, "correct_loss_per_token": 1.4167405366897583, "incorrect_loss_per_token": 1.3828882773717244, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4103784561157227, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4103784561157227, "logits_per_char": -0.7051892280578613, "num_chars": 2}, {"sum_logits": -1.4314663410186768, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4314663410186768, "logits_per_char": -0.7157331705093384, "num_chars": 2}, {"sum_logits": -1.4167405366897583, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4167405366897583, "logits_per_char": -0.7083702683448792, "num_chars": 2}, {"sum_logits": -1.306820034980774, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.306820034980774, "logits_per_char": -0.653410017490387, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 32, "native_id": "AKDE&ED_2008_4_25", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3481658697128296, "incorrect_loss_raw": 1.4105889399846394, "correct_loss_per_char": 0.6740829348564148, "incorrect_loss_per_char": 0.7052944699923197, "correct_loss_per_token": 1.3481658697128296, "incorrect_loss_per_token": 1.4105889399846394, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3481658697128296, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3481658697128296, "logits_per_char": -0.6740829348564148, "num_chars": 2}, {"sum_logits": -1.312814712524414, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.312814712524414, "logits_per_char": -0.656407356262207, "num_chars": 2}, {"sum_logits": -1.3570955991744995, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3570955991744995, "logits_per_char": -0.6785477995872498, "num_chars": 2}, {"sum_logits": -1.5618565082550049, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5618565082550049, "logits_per_char": -0.7809282541275024, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 33, "native_id": "AKDE&ED_2008_4_19", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.43429434299469, "incorrect_loss_raw": 1.3778936465581257, "correct_loss_per_char": 0.717147171497345, "incorrect_loss_per_char": 0.6889468232790629, "correct_loss_per_token": 1.43429434299469, "incorrect_loss_per_token": 1.3778936465581257, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4383512735366821, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4383512735366821, "logits_per_char": -0.7191756367683411, "num_chars": 2}, {"sum_logits": -1.3875019550323486, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3875019550323486, "logits_per_char": -0.6937509775161743, "num_chars": 2}, {"sum_logits": -1.43429434299469, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.43429434299469, "logits_per_char": -0.717147171497345, "num_chars": 2}, {"sum_logits": -1.3078277111053467, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.3078277111053467, "logits_per_char": -0.6539138555526733, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 34, "native_id": "Mercury_SC_400402", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4060875177383423, "incorrect_loss_raw": 1.387630860010783, "correct_loss_per_char": 0.7030437588691711, "incorrect_loss_per_char": 0.6938154300053915, "correct_loss_per_token": 1.4060875177383423, "incorrect_loss_per_token": 1.387630860010783, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4060875177383423, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4060875177383423, "logits_per_char": -0.7030437588691711, "num_chars": 2}, {"sum_logits": -1.4325939416885376, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4325939416885376, "logits_per_char": -0.7162969708442688, "num_chars": 2}, {"sum_logits": -1.3961933851242065, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3961933851242065, "logits_per_char": -0.6980966925621033, "num_chars": 2}, {"sum_logits": -1.3341052532196045, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.3341052532196045, "logits_per_char": -0.6670526266098022, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 35, "native_id": "Mercury_7234308", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.495455265045166, "incorrect_loss_raw": 1.3591935634613037, "correct_loss_per_char": 0.747727632522583, "incorrect_loss_per_char": 0.6795967817306519, "correct_loss_per_token": 1.495455265045166, "incorrect_loss_per_token": 1.3591935634613037, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.495455265045166, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.495455265045166, "logits_per_char": -0.747727632522583, "num_chars": 2}, {"sum_logits": -1.3823622465133667, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.3823622465133667, "logits_per_char": -0.6911811232566833, "num_chars": 2}, {"sum_logits": -1.387900948524475, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.387900948524475, "logits_per_char": -0.6939504742622375, "num_chars": 2}, {"sum_logits": -1.3073174953460693, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": true, "logits_per_token": -1.3073174953460693, "logits_per_char": -0.6536587476730347, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 36, "native_id": "ACTAAP_2014_5_8", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4052988290786743, "incorrect_loss_raw": 1.3898063898086548, "correct_loss_per_char": 0.7026494145393372, "incorrect_loss_per_char": 0.6949031949043274, "correct_loss_per_token": 1.4052988290786743, "incorrect_loss_per_token": 1.3898063898086548, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5213143825531006, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.5213143825531006, "logits_per_char": -0.7606571912765503, "num_chars": 2}, {"sum_logits": -1.4052988290786743, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.4052988290786743, "logits_per_char": -0.7026494145393372, "num_chars": 2}, {"sum_logits": -1.3134233951568604, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": true, "logits_per_token": -1.3134233951568604, "logits_per_char": -0.6567116975784302, "num_chars": 2}, {"sum_logits": -1.3346813917160034, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.3346813917160034, "logits_per_char": -0.6673406958580017, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 37, "native_id": "Mercury_400407", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4471793174743652, "incorrect_loss_raw": 1.3755839665730794, "correct_loss_per_char": 0.7235896587371826, "incorrect_loss_per_char": 0.6877919832865397, "correct_loss_per_token": 1.4471793174743652, "incorrect_loss_per_token": 1.3755839665730794, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.416001796722412, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.416001796722412, "logits_per_char": -0.708000898361206, "num_chars": 2}, {"sum_logits": -1.4471793174743652, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4471793174743652, "logits_per_char": -0.7235896587371826, "num_chars": 2}, {"sum_logits": -1.3797852993011475, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3797852993011475, "logits_per_char": -0.6898926496505737, "num_chars": 2}, {"sum_logits": -1.3309648036956787, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.3309648036956787, "logits_per_char": -0.6654824018478394, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 38, "native_id": "Mercury_7116288", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4251152276992798, "incorrect_loss_raw": 1.3841789960861206, "correct_loss_per_char": 0.7125576138496399, "incorrect_loss_per_char": 0.6920894980430603, "correct_loss_per_token": 1.4251152276992798, "incorrect_loss_per_token": 1.3841789960861206, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5126274824142456, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.5126274824142456, "logits_per_char": -0.7563137412071228, "num_chars": 2}, {"sum_logits": -1.3885818719863892, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3885818719863892, "logits_per_char": -0.6942909359931946, "num_chars": 2}, {"sum_logits": -1.4251152276992798, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4251152276992798, "logits_per_char": -0.7125576138496399, "num_chars": 2}, {"sum_logits": -1.251327633857727, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.251327633857727, "logits_per_char": -0.6256638169288635, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 39, "native_id": "MCAS_2004_9_15-v1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3351547718048096, "incorrect_loss_raw": 1.4113860527674358, "correct_loss_per_char": 0.6675773859024048, "incorrect_loss_per_char": 0.7056930263837179, "correct_loss_per_token": 1.3351547718048096, "incorrect_loss_per_token": 1.4113860527674358, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.386871099472046, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.386871099472046, "logits_per_char": -0.693435549736023, "num_chars": 2}, {"sum_logits": -1.3351547718048096, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": true, "logits_per_token": -1.3351547718048096, "logits_per_char": -0.6675773859024048, "num_chars": 2}, {"sum_logits": -1.3650844097137451, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.3650844097137451, "logits_per_char": -0.6825422048568726, "num_chars": 2}, {"sum_logits": -1.4822026491165161, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.4822026491165161, "logits_per_char": -0.7411013245582581, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 40, "native_id": "NYSEDREGENTS_2015_4_26", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4590177536010742, "incorrect_loss_raw": 1.3688896497090657, "correct_loss_per_char": 0.7295088768005371, "incorrect_loss_per_char": 0.6844448248545328, "correct_loss_per_token": 1.4590177536010742, "incorrect_loss_per_token": 1.3688896497090657, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4251750707626343, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4251750707626343, "logits_per_char": -0.7125875353813171, "num_chars": 2}, {"sum_logits": -1.3015923500061035, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.3015923500061035, "logits_per_char": -0.6507961750030518, "num_chars": 2}, {"sum_logits": -1.4590177536010742, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4590177536010742, "logits_per_char": -0.7295088768005371, "num_chars": 2}, {"sum_logits": -1.3799015283584595, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3799015283584595, "logits_per_char": -0.6899507641792297, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 41, "native_id": "Mercury_SC_401620", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3422521352767944, "incorrect_loss_raw": 1.4109796285629272, "correct_loss_per_char": 0.6711260676383972, "incorrect_loss_per_char": 0.7054898142814636, "correct_loss_per_token": 1.3422521352767944, "incorrect_loss_per_token": 1.4109796285629272, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3422521352767944, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3422521352767944, "logits_per_char": -0.6711260676383972, "num_chars": 2}, {"sum_logits": -1.5001671314239502, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.5001671314239502, "logits_per_char": -0.7500835657119751, "num_chars": 2}, {"sum_logits": -1.421318531036377, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.421318531036377, "logits_per_char": -0.7106592655181885, "num_chars": 2}, {"sum_logits": -1.3114532232284546, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.3114532232284546, "logits_per_char": -0.6557266116142273, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 42, "native_id": "Mercury_400877", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1854678392410278, "incorrect_loss_raw": 1.4951266050338745, "correct_loss_per_char": 0.5927339196205139, "incorrect_loss_per_char": 0.7475633025169373, "correct_loss_per_token": 1.1854678392410278, "incorrect_loss_per_token": 1.4951266050338745, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6911193132400513, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.6911193132400513, "logits_per_char": -0.8455596566200256, "num_chars": 2}, {"sum_logits": -1.5857325792312622, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5857325792312622, "logits_per_char": -0.7928662896156311, "num_chars": 2}, {"sum_logits": -1.1854678392410278, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.1854678392410278, "logits_per_char": -0.5927339196205139, "num_chars": 2}, {"sum_logits": -1.20852792263031, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.20852792263031, "logits_per_char": -0.604263961315155, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 43, "native_id": "Mercury_7174213", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.311279058456421, "incorrect_loss_raw": 1.417931040128072, "correct_loss_per_char": 0.6556395292282104, "incorrect_loss_per_char": 0.708965520064036, "correct_loss_per_token": 1.311279058456421, "incorrect_loss_per_token": 1.417931040128072, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4050403833389282, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4050403833389282, "logits_per_char": -0.7025201916694641, "num_chars": 2}, {"sum_logits": -1.430394172668457, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.430394172668457, "logits_per_char": -0.7151970863342285, "num_chars": 2}, {"sum_logits": -1.418358564376831, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.418358564376831, "logits_per_char": -0.7091792821884155, "num_chars": 2}, {"sum_logits": -1.311279058456421, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.311279058456421, "logits_per_char": -0.6556395292282104, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 44, "native_id": "NYSEDREGENTS_2008_8_34", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3865052461624146, "incorrect_loss_raw": 1.3919140100479126, "correct_loss_per_char": 0.6932526230812073, "incorrect_loss_per_char": 0.6959570050239563, "correct_loss_per_token": 1.3865052461624146, "incorrect_loss_per_token": 1.3919140100479126, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4559755325317383, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.4559755325317383, "logits_per_char": -0.7279877662658691, "num_chars": 2}, {"sum_logits": -1.3865052461624146, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.3865052461624146, "logits_per_char": -0.6932526230812073, "num_chars": 2}, {"sum_logits": -1.3985204696655273, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.3985204696655273, "logits_per_char": -0.6992602348327637, "num_chars": 2}, {"sum_logits": -1.3212460279464722, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.3212460279464722, "logits_per_char": -0.6606230139732361, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 45, "native_id": "Mercury_7212398", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3964276313781738, "incorrect_loss_raw": 1.3935542901357014, "correct_loss_per_char": 0.6982138156890869, "incorrect_loss_per_char": 0.6967771450678507, "correct_loss_per_token": 1.3964276313781738, "incorrect_loss_per_token": 1.3935542901357014, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2607978582382202, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2607978582382202, "logits_per_char": -0.6303989291191101, "num_chars": 2}, {"sum_logits": -1.3964276313781738, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3964276313781738, "logits_per_char": -0.6982138156890869, "num_chars": 2}, {"sum_logits": -1.4669586420059204, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4669586420059204, "logits_per_char": -0.7334793210029602, "num_chars": 2}, {"sum_logits": -1.4529063701629639, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4529063701629639, "logits_per_char": -0.7264531850814819, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 46, "native_id": "Mercury_SC_401290", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3922626972198486, "incorrect_loss_raw": 1.3934355974197388, "correct_loss_per_char": 0.6961313486099243, "incorrect_loss_per_char": 0.6967177987098694, "correct_loss_per_token": 1.3922626972198486, "incorrect_loss_per_token": 1.3934355974197388, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4888988733291626, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4888988733291626, "logits_per_char": -0.7444494366645813, "num_chars": 2}, {"sum_logits": -1.393348217010498, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.393348217010498, "logits_per_char": -0.696674108505249, "num_chars": 2}, {"sum_logits": -1.3922626972198486, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3922626972198486, "logits_per_char": -0.6961313486099243, "num_chars": 2}, {"sum_logits": -1.2980597019195557, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.2980597019195557, "logits_per_char": -0.6490298509597778, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 47, "native_id": "Mercury_SC_402120", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3236907720565796, "incorrect_loss_raw": 1.4140862623850505, "correct_loss_per_char": 0.6618453860282898, "incorrect_loss_per_char": 0.7070431311925253, "correct_loss_per_token": 1.3236907720565796, "incorrect_loss_per_token": 1.4140862623850505, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4609055519104004, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4609055519104004, "logits_per_char": -0.7304527759552002, "num_chars": 2}, {"sum_logits": -1.3675249814987183, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3675249814987183, "logits_per_char": -0.6837624907493591, "num_chars": 2}, {"sum_logits": -1.3236907720565796, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.3236907720565796, "logits_per_char": -0.6618453860282898, "num_chars": 2}, {"sum_logits": -1.4138282537460327, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4138282537460327, "logits_per_char": -0.7069141268730164, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 48, "native_id": "Mercury_184975", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4209388494491577, "incorrect_loss_raw": 1.3869564930597942, "correct_loss_per_char": 0.7104694247245789, "incorrect_loss_per_char": 0.6934782465298971, "correct_loss_per_token": 1.4209388494491577, "incorrect_loss_per_token": 1.3869564930597942, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.25450599193573, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.25450599193573, "logits_per_char": -0.627252995967865, "num_chars": 2}, {"sum_logits": -1.4380825757980347, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4380825757980347, "logits_per_char": -0.7190412878990173, "num_chars": 2}, {"sum_logits": -1.4209388494491577, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4209388494491577, "logits_per_char": -0.7104694247245789, "num_chars": 2}, {"sum_logits": -1.4682809114456177, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4682809114456177, "logits_per_char": -0.7341404557228088, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 49, "native_id": "Mercury_SC_400578", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.272545576095581, "incorrect_loss_raw": 1.4334601163864136, "correct_loss_per_char": 0.6362727880477905, "incorrect_loss_per_char": 0.7167300581932068, "correct_loss_per_token": 1.272545576095581, "incorrect_loss_per_token": 1.4334601163864136, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.272545576095581, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.272545576095581, "logits_per_char": -0.6362727880477905, "num_chars": 2}, {"sum_logits": -1.4315146207809448, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4315146207809448, "logits_per_char": -0.7157573103904724, "num_chars": 2}, {"sum_logits": -1.4410219192504883, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4410219192504883, "logits_per_char": -0.7205109596252441, "num_chars": 2}, {"sum_logits": -1.4278438091278076, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4278438091278076, "logits_per_char": -0.7139219045639038, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 50, "native_id": "MCAS_2001_8_4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.235752820968628, "incorrect_loss_raw": 1.4476688305536907, "correct_loss_per_char": 0.617876410484314, "incorrect_loss_per_char": 0.7238344152768453, "correct_loss_per_token": 1.235752820968628, "incorrect_loss_per_token": 1.4476688305536907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5161396265029907, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.5161396265029907, "logits_per_char": -0.7580698132514954, "num_chars": 2}, {"sum_logits": -1.4091459512710571, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.4091459512710571, "logits_per_char": -0.7045729756355286, "num_chars": 2}, {"sum_logits": -1.417720913887024, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.417720913887024, "logits_per_char": -0.708860456943512, "num_chars": 2}, {"sum_logits": -1.235752820968628, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -1.235752820968628, "logits_per_char": -0.617876410484314, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 51, "native_id": "MCAS_2003_5_33", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.413991093635559, "incorrect_loss_raw": 1.3834590117136638, "correct_loss_per_char": 0.7069955468177795, "incorrect_loss_per_char": 0.6917295058568319, "correct_loss_per_token": 1.413991093635559, "incorrect_loss_per_token": 1.3834590117136638, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3780256509780884, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.3780256509780884, "logits_per_char": -0.6890128254890442, "num_chars": 2}, {"sum_logits": -1.3851592540740967, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3851592540740967, "logits_per_char": -0.6925796270370483, "num_chars": 2}, {"sum_logits": -1.413991093635559, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.413991093635559, "logits_per_char": -0.7069955468177795, "num_chars": 2}, {"sum_logits": -1.3871921300888062, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3871921300888062, "logits_per_char": -0.6935960650444031, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 52, "native_id": "Mercury_7068513", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4101371765136719, "incorrect_loss_raw": 1.3855602741241455, "correct_loss_per_char": 0.7050685882568359, "incorrect_loss_per_char": 0.6927801370620728, "correct_loss_per_token": 1.4101371765136719, "incorrect_loss_per_token": 1.3855602741241455, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4101371765136719, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4101371765136719, "logits_per_char": -0.7050685882568359, "num_chars": 2}, {"sum_logits": -1.3352694511413574, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.3352694511413574, "logits_per_char": -0.6676347255706787, "num_chars": 2}, {"sum_logits": -1.4086788892745972, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4086788892745972, "logits_per_char": -0.7043394446372986, "num_chars": 2}, {"sum_logits": -1.412732481956482, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.412732481956482, "logits_per_char": -0.706366240978241, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 53, "native_id": "AKDE&ED_2008_4_26", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5497522354125977, "incorrect_loss_raw": 1.3497350613276164, "correct_loss_per_char": 0.7748761177062988, "incorrect_loss_per_char": 0.6748675306638082, "correct_loss_per_token": 1.5497522354125977, "incorrect_loss_per_token": 1.3497350613276164, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2144371271133423, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.2144371271133423, "logits_per_char": -0.6072185635566711, "num_chars": 2}, {"sum_logits": -1.3315861225128174, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3315861225128174, "logits_per_char": -0.6657930612564087, "num_chars": 2}, {"sum_logits": -1.5497522354125977, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5497522354125977, "logits_per_char": -0.7748761177062988, "num_chars": 2}, {"sum_logits": -1.5031819343566895, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5031819343566895, "logits_per_char": -0.7515909671783447, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 54, "native_id": "Mercury_7235638", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4758590459823608, "incorrect_loss_raw": 1.3663634459177654, "correct_loss_per_char": 0.7379295229911804, "incorrect_loss_per_char": 0.6831817229588827, "correct_loss_per_token": 1.4758590459823608, "incorrect_loss_per_token": 1.3663634459177654, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.394613265991211, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.394613265991211, "logits_per_char": -0.6973066329956055, "num_chars": 2}, {"sum_logits": -1.280682921409607, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.280682921409607, "logits_per_char": -0.6403414607048035, "num_chars": 2}, {"sum_logits": -1.4758590459823608, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4758590459823608, "logits_per_char": -0.7379295229911804, "num_chars": 2}, {"sum_logits": -1.423794150352478, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.423794150352478, "logits_per_char": -0.711897075176239, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 55, "native_id": "MDSA_2009_5_20", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3552876710891724, "incorrect_loss_raw": 1.4042641321818035, "correct_loss_per_char": 0.6776438355445862, "incorrect_loss_per_char": 0.7021320660909017, "correct_loss_per_token": 1.3552876710891724, "incorrect_loss_per_token": 1.4042641321818035, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3383188247680664, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": true, "logits_per_token": -1.3383188247680664, "logits_per_char": -0.6691594123840332, "num_chars": 2}, {"sum_logits": -1.3552876710891724, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.3552876710891724, "logits_per_char": -0.6776438355445862, "num_chars": 2}, {"sum_logits": -1.487191081047058, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.487191081047058, "logits_per_char": -0.743595540523529, "num_chars": 2}, {"sum_logits": -1.3872824907302856, "num_tokens": 1, "num_tokens_all": 438, "is_greedy": false, "logits_per_token": -1.3872824907302856, "logits_per_char": -0.6936412453651428, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 56, "native_id": "Mercury_178325", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4065685272216797, "incorrect_loss_raw": 1.388219912846883, "correct_loss_per_char": 0.7032842636108398, "incorrect_loss_per_char": 0.6941099564234415, "correct_loss_per_token": 1.4065685272216797, "incorrect_loss_per_token": 1.388219912846883, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4825870990753174, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4825870990753174, "logits_per_char": -0.7412935495376587, "num_chars": 2}, {"sum_logits": -1.2936718463897705, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.2936718463897705, "logits_per_char": -0.6468359231948853, "num_chars": 2}, {"sum_logits": -1.4065685272216797, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4065685272216797, "logits_per_char": -0.7032842636108398, "num_chars": 2}, {"sum_logits": -1.3884007930755615, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3884007930755615, "logits_per_char": -0.6942003965377808, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 57, "native_id": "Mercury_7212678", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4123799800872803, "incorrect_loss_raw": 1.389673391977946, "correct_loss_per_char": 0.7061899900436401, "incorrect_loss_per_char": 0.694836695988973, "correct_loss_per_token": 1.4123799800872803, "incorrect_loss_per_token": 1.389673391977946, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4123799800872803, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4123799800872803, "logits_per_char": -0.7061899900436401, "num_chars": 2}, {"sum_logits": -1.4313533306121826, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4313533306121826, "logits_per_char": -0.7156766653060913, "num_chars": 2}, {"sum_logits": -1.4967378377914429, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4967378377914429, "logits_per_char": -0.7483689188957214, "num_chars": 2}, {"sum_logits": -1.2409290075302124, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.2409290075302124, "logits_per_char": -0.6204645037651062, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 58, "native_id": "TAKS_2009_8_32", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3332411050796509, "incorrect_loss_raw": 1.414170503616333, "correct_loss_per_char": 0.6666205525398254, "incorrect_loss_per_char": 0.7070852518081665, "correct_loss_per_token": 1.3332411050796509, "incorrect_loss_per_token": 1.414170503616333, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3332411050796509, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3332411050796509, "logits_per_char": -0.6666205525398254, "num_chars": 2}, {"sum_logits": -1.3703694343566895, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3703694343566895, "logits_per_char": -0.6851847171783447, "num_chars": 2}, {"sum_logits": -1.5133593082427979, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.5133593082427979, "logits_per_char": -0.7566796541213989, "num_chars": 2}, {"sum_logits": -1.3587827682495117, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3587827682495117, "logits_per_char": -0.6793913841247559, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 59, "native_id": "Mercury_412681", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4604763984680176, "incorrect_loss_raw": 1.3733298778533936, "correct_loss_per_char": 0.7302381992340088, "incorrect_loss_per_char": 0.6866649389266968, "correct_loss_per_token": 1.4604763984680176, "incorrect_loss_per_token": 1.3733298778533936, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4249024391174316, "num_tokens": 1, "num_tokens_all": 446, "is_greedy": false, "logits_per_token": -1.4249024391174316, "logits_per_char": -0.7124512195587158, "num_chars": 2}, {"sum_logits": -1.2318321466445923, "num_tokens": 1, "num_tokens_all": 446, "is_greedy": true, "logits_per_token": -1.2318321466445923, "logits_per_char": -0.6159160733222961, "num_chars": 2}, {"sum_logits": -1.4604763984680176, "num_tokens": 1, "num_tokens_all": 446, "is_greedy": false, "logits_per_token": -1.4604763984680176, "logits_per_char": -0.7302381992340088, "num_chars": 2}, {"sum_logits": -1.4632550477981567, "num_tokens": 1, "num_tokens_all": 446, "is_greedy": false, "logits_per_token": -1.4632550477981567, "logits_per_char": -0.7316275238990784, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 60, "native_id": "Mercury_400440", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5708990097045898, "incorrect_loss_raw": 1.3476874430974324, "correct_loss_per_char": 0.7854495048522949, "incorrect_loss_per_char": 0.6738437215487162, "correct_loss_per_token": 1.5708990097045898, "incorrect_loss_per_token": 1.3476874430974324, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2809480428695679, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.2809480428695679, "logits_per_char": -0.6404740214347839, "num_chars": 2}, {"sum_logits": -1.2242724895477295, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2242724895477295, "logits_per_char": -0.6121362447738647, "num_chars": 2}, {"sum_logits": -1.537841796875, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.537841796875, "logits_per_char": -0.7689208984375, "num_chars": 2}, {"sum_logits": -1.5708990097045898, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5708990097045898, "logits_per_char": -0.7854495048522949, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 61, "native_id": "Mercury_SC_416529", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4298427104949951, "incorrect_loss_raw": 1.3784658511479695, "correct_loss_per_char": 0.7149213552474976, "incorrect_loss_per_char": 0.6892329255739847, "correct_loss_per_token": 1.4298427104949951, "incorrect_loss_per_token": 1.3784658511479695, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3360531330108643, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.3360531330108643, "logits_per_char": -0.6680265665054321, "num_chars": 2}, {"sum_logits": -1.4298427104949951, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4298427104949951, "logits_per_char": -0.7149213552474976, "num_chars": 2}, {"sum_logits": -1.3773508071899414, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3773508071899414, "logits_per_char": -0.6886754035949707, "num_chars": 2}, {"sum_logits": -1.421993613243103, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.421993613243103, "logits_per_char": -0.7109968066215515, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 62, "native_id": "MCAS_2006_8_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4012553691864014, "incorrect_loss_raw": 1.3933494885762532, "correct_loss_per_char": 0.7006276845932007, "incorrect_loss_per_char": 0.6966747442881266, "correct_loss_per_token": 1.4012553691864014, "incorrect_loss_per_token": 1.3933494885762532, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3173730373382568, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.3173730373382568, "logits_per_char": -0.6586865186691284, "num_chars": 2}, {"sum_logits": -1.3233025074005127, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3233025074005127, "logits_per_char": -0.6616512537002563, "num_chars": 2}, {"sum_logits": -1.5393729209899902, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.5393729209899902, "logits_per_char": -0.7696864604949951, "num_chars": 2}, {"sum_logits": -1.4012553691864014, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4012553691864014, "logits_per_char": -0.7006276845932007, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 63, "native_id": "TIMSS_2003_8_pg80", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5407114028930664, "incorrect_loss_raw": 1.349445382754008, "correct_loss_per_char": 0.7703557014465332, "incorrect_loss_per_char": 0.674722691377004, "correct_loss_per_token": 1.5407114028930664, "incorrect_loss_per_token": 1.349445382754008, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3954373598098755, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3954373598098755, "logits_per_char": -0.6977186799049377, "num_chars": 2}, {"sum_logits": -1.351023554801941, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.351023554801941, "logits_per_char": -0.6755117774009705, "num_chars": 2}, {"sum_logits": -1.5407114028930664, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.5407114028930664, "logits_per_char": -0.7703557014465332, "num_chars": 2}, {"sum_logits": -1.3018752336502075, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.3018752336502075, "logits_per_char": -0.6509376168251038, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 64, "native_id": "Mercury_416645", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2982721328735352, "incorrect_loss_raw": 1.4228577613830566, "correct_loss_per_char": 0.6491360664367676, "incorrect_loss_per_char": 0.7114288806915283, "correct_loss_per_token": 1.2982721328735352, "incorrect_loss_per_token": 1.4228577613830566, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.385758876800537, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.385758876800537, "logits_per_char": -0.6928794384002686, "num_chars": 2}, {"sum_logits": -1.4140914678573608, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.4140914678573608, "logits_per_char": -0.7070457339286804, "num_chars": 2}, {"sum_logits": -1.468722939491272, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.468722939491272, "logits_per_char": -0.734361469745636, "num_chars": 2}, {"sum_logits": -1.2982721328735352, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": true, "logits_per_token": -1.2982721328735352, "logits_per_char": -0.6491360664367676, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 65, "native_id": "Mercury_406777", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2328286170959473, "incorrect_loss_raw": 1.4503378868103027, "correct_loss_per_char": 0.6164143085479736, "incorrect_loss_per_char": 0.7251689434051514, "correct_loss_per_token": 1.2328286170959473, "incorrect_loss_per_token": 1.4503378868103027, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2328286170959473, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2328286170959473, "logits_per_char": -0.6164143085479736, "num_chars": 2}, {"sum_logits": -1.4872727394104004, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4872727394104004, "logits_per_char": -0.7436363697052002, "num_chars": 2}, {"sum_logits": -1.4256486892700195, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4256486892700195, "logits_per_char": -0.7128243446350098, "num_chars": 2}, {"sum_logits": -1.4380922317504883, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4380922317504883, "logits_per_char": -0.7190461158752441, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 66, "native_id": "Mercury_LBS11018", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4407808780670166, "incorrect_loss_raw": 1.3765786091486614, "correct_loss_per_char": 0.7203904390335083, "incorrect_loss_per_char": 0.6882893045743307, "correct_loss_per_token": 1.4407808780670166, "incorrect_loss_per_token": 1.3765786091486614, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4407808780670166, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4407808780670166, "logits_per_char": -0.7203904390335083, "num_chars": 2}, {"sum_logits": -1.4507125616073608, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4507125616073608, "logits_per_char": -0.7253562808036804, "num_chars": 2}, {"sum_logits": -1.3253620862960815, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.3253620862960815, "logits_per_char": -0.6626810431480408, "num_chars": 2}, {"sum_logits": -1.3536611795425415, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3536611795425415, "logits_per_char": -0.6768305897712708, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 67, "native_id": "Mercury_7139878", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4642465114593506, "incorrect_loss_raw": 1.370130221048991, "correct_loss_per_char": 0.7321232557296753, "incorrect_loss_per_char": 0.6850651105244955, "correct_loss_per_token": 1.4642465114593506, "incorrect_loss_per_token": 1.370130221048991, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4563069343566895, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4563069343566895, "logits_per_char": -0.7281534671783447, "num_chars": 2}, {"sum_logits": -1.4000349044799805, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4000349044799805, "logits_per_char": -0.7000174522399902, "num_chars": 2}, {"sum_logits": -1.4642465114593506, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4642465114593506, "logits_per_char": -0.7321232557296753, "num_chars": 2}, {"sum_logits": -1.2540488243103027, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.2540488243103027, "logits_per_char": -0.6270244121551514, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 68, "native_id": "Mercury_417147", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4094126224517822, "incorrect_loss_raw": 1.3932361602783203, "correct_loss_per_char": 0.7047063112258911, "incorrect_loss_per_char": 0.6966180801391602, "correct_loss_per_token": 1.4094126224517822, "incorrect_loss_per_token": 1.3932361602783203, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2282419204711914, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2282419204711914, "logits_per_char": -0.6141209602355957, "num_chars": 2}, {"sum_logits": -1.4094126224517822, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4094126224517822, "logits_per_char": -0.7047063112258911, "num_chars": 2}, {"sum_logits": -1.4261448383331299, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4261448383331299, "logits_per_char": -0.7130724191665649, "num_chars": 2}, {"sum_logits": -1.5253217220306396, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5253217220306396, "logits_per_char": -0.7626608610153198, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 69, "native_id": "Mercury_7016765", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2923403978347778, "incorrect_loss_raw": 1.4270854791005452, "correct_loss_per_char": 0.6461701989173889, "incorrect_loss_per_char": 0.7135427395502726, "correct_loss_per_token": 1.2923403978347778, "incorrect_loss_per_token": 1.4270854791005452, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2923403978347778, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.2923403978347778, "logits_per_char": -0.6461701989173889, "num_chars": 2}, {"sum_logits": -1.4470900297164917, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4470900297164917, "logits_per_char": -0.7235450148582458, "num_chars": 2}, {"sum_logits": -1.4872729778289795, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4872729778289795, "logits_per_char": -0.7436364889144897, "num_chars": 2}, {"sum_logits": -1.3468934297561646, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3468934297561646, "logits_per_char": -0.6734467148780823, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 70, "native_id": "Mercury_415303", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2999411821365356, "incorrect_loss_raw": 1.4361280997594197, "correct_loss_per_char": 0.6499705910682678, "incorrect_loss_per_char": 0.7180640498797098, "correct_loss_per_token": 1.2999411821365356, "incorrect_loss_per_token": 1.4361280997594197, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2999411821365356, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2999411821365356, "logits_per_char": -0.6499705910682678, "num_chars": 2}, {"sum_logits": -1.3483628034591675, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3483628034591675, "logits_per_char": -0.6741814017295837, "num_chars": 2}, {"sum_logits": -1.3962655067443848, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3962655067443848, "logits_per_char": -0.6981327533721924, "num_chars": 2}, {"sum_logits": -1.563755989074707, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.563755989074707, "logits_per_char": -0.7818779945373535, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 71, "native_id": "Mercury_7215845", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.391758918762207, "incorrect_loss_raw": 1.3905737400054932, "correct_loss_per_char": 0.6958794593811035, "incorrect_loss_per_char": 0.6952868700027466, "correct_loss_per_token": 1.391758918762207, "incorrect_loss_per_token": 1.3905737400054932, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4013946056365967, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4013946056365967, "logits_per_char": -0.7006973028182983, "num_chars": 2}, {"sum_logits": -1.3564338684082031, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.3564338684082031, "logits_per_char": -0.6782169342041016, "num_chars": 2}, {"sum_logits": -1.391758918762207, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.391758918762207, "logits_per_char": -0.6958794593811035, "num_chars": 2}, {"sum_logits": -1.4138927459716797, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4138927459716797, "logits_per_char": -0.7069463729858398, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 72, "native_id": "Mercury_7136885", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3578734397888184, "incorrect_loss_raw": 1.4036486148834229, "correct_loss_per_char": 0.6789367198944092, "incorrect_loss_per_char": 0.7018243074417114, "correct_loss_per_token": 1.3578734397888184, "incorrect_loss_per_token": 1.4036486148834229, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3241276741027832, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.3241276741027832, "logits_per_char": -0.6620638370513916, "num_chars": 2}, {"sum_logits": -1.3578734397888184, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3578734397888184, "logits_per_char": -0.6789367198944092, "num_chars": 2}, {"sum_logits": -1.4535799026489258, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4535799026489258, "logits_per_char": -0.7267899513244629, "num_chars": 2}, {"sum_logits": -1.4332382678985596, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4332382678985596, "logits_per_char": -0.7166191339492798, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 73, "native_id": "Mercury_SC_400059", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3786530494689941, "incorrect_loss_raw": 1.3950483798980713, "correct_loss_per_char": 0.6893265247344971, "incorrect_loss_per_char": 0.6975241899490356, "correct_loss_per_token": 1.3786530494689941, "incorrect_loss_per_token": 1.3950483798980713, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.328975796699524, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.328975796699524, "logits_per_char": -0.664487898349762, "num_chars": 2}, {"sum_logits": -1.3958772420883179, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3958772420883179, "logits_per_char": -0.6979386210441589, "num_chars": 2}, {"sum_logits": -1.460292100906372, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.460292100906372, "logits_per_char": -0.730146050453186, "num_chars": 2}, {"sum_logits": -1.3786530494689941, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3786530494689941, "logits_per_char": -0.6893265247344971, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 74, "native_id": "Mercury_7044328", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3903518915176392, "incorrect_loss_raw": 1.3901880184809368, "correct_loss_per_char": 0.6951759457588196, "incorrect_loss_per_char": 0.6950940092404684, "correct_loss_per_token": 1.3903518915176392, "incorrect_loss_per_token": 1.3901880184809368, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4133042097091675, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4133042097091675, "logits_per_char": -0.7066521048545837, "num_chars": 2}, {"sum_logits": -1.3719372749328613, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.3719372749328613, "logits_per_char": -0.6859686374664307, "num_chars": 2}, {"sum_logits": -1.3853225708007812, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3853225708007812, "logits_per_char": -0.6926612854003906, "num_chars": 2}, {"sum_logits": -1.3903518915176392, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3903518915176392, "logits_per_char": -0.6951759457588196, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 75, "native_id": "MEA_2010_8_1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.340164065361023, "incorrect_loss_raw": 1.40938933690389, "correct_loss_per_char": 0.6700820326805115, "incorrect_loss_per_char": 0.704694668451945, "correct_loss_per_token": 1.340164065361023, "incorrect_loss_per_token": 1.40938933690389, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.340164065361023, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.340164065361023, "logits_per_char": -0.6700820326805115, "num_chars": 2}, {"sum_logits": -1.4066203832626343, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4066203832626343, "logits_per_char": -0.7033101916313171, "num_chars": 2}, {"sum_logits": -1.3324514627456665, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.3324514627456665, "logits_per_char": -0.6662257313728333, "num_chars": 2}, {"sum_logits": -1.4890961647033691, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4890961647033691, "logits_per_char": -0.7445480823516846, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 76, "native_id": "Mercury_414099", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3849178552627563, "incorrect_loss_raw": 1.3953638474146526, "correct_loss_per_char": 0.6924589276313782, "incorrect_loss_per_char": 0.6976819237073263, "correct_loss_per_token": 1.3849178552627563, "incorrect_loss_per_token": 1.3953638474146526, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4721769094467163, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.4721769094467163, "logits_per_char": -0.7360884547233582, "num_chars": 2}, {"sum_logits": -1.3849178552627563, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.3849178552627563, "logits_per_char": -0.6924589276313782, "num_chars": 2}, {"sum_logits": -1.408974528312683, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.408974528312683, "logits_per_char": -0.7044872641563416, "num_chars": 2}, {"sum_logits": -1.304940104484558, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.304940104484558, "logits_per_char": -0.652470052242279, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 77, "native_id": "Mercury_410807", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.440745234489441, "incorrect_loss_raw": 1.3866607348124187, "correct_loss_per_char": 0.7203726172447205, "incorrect_loss_per_char": 0.6933303674062093, "correct_loss_per_token": 1.440745234489441, "incorrect_loss_per_token": 1.3866607348124187, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5886545181274414, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.5886545181274414, "logits_per_char": -0.7943272590637207, "num_chars": 2}, {"sum_logits": -1.440745234489441, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.440745234489441, "logits_per_char": -0.7203726172447205, "num_chars": 2}, {"sum_logits": -1.3785005807876587, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.3785005807876587, "logits_per_char": -0.6892502903938293, "num_chars": 2}, {"sum_logits": -1.1928271055221558, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.1928271055221558, "logits_per_char": -0.5964135527610779, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 78, "native_id": "Mercury_403234", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4371287822723389, "incorrect_loss_raw": 1.376583496729533, "correct_loss_per_char": 0.7185643911361694, "incorrect_loss_per_char": 0.6882917483647665, "correct_loss_per_token": 1.4371287822723389, "incorrect_loss_per_token": 1.376583496729533, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3677988052368164, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.3677988052368164, "logits_per_char": -0.6838994026184082, "num_chars": 2}, {"sum_logits": -1.4371287822723389, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.4371287822723389, "logits_per_char": -0.7185643911361694, "num_chars": 2}, {"sum_logits": -1.404880166053772, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.404880166053772, "logits_per_char": -0.702440083026886, "num_chars": 2}, {"sum_logits": -1.3570715188980103, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.3570715188980103, "logits_per_char": -0.6785357594490051, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 79, "native_id": "Mercury_7011323", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4366817474365234, "incorrect_loss_raw": 1.3775147596995037, "correct_loss_per_char": 0.7183408737182617, "incorrect_loss_per_char": 0.6887573798497518, "correct_loss_per_token": 1.4366817474365234, "incorrect_loss_per_token": 1.3775147596995037, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4366817474365234, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4366817474365234, "logits_per_char": -0.7183408737182617, "num_chars": 2}, {"sum_logits": -1.4257972240447998, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4257972240447998, "logits_per_char": -0.7128986120223999, "num_chars": 2}, {"sum_logits": -1.3280154466629028, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.3280154466629028, "logits_per_char": -0.6640077233314514, "num_chars": 2}, {"sum_logits": -1.378731608390808, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.378731608390808, "logits_per_char": -0.689365804195404, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 80, "native_id": "Mercury_7109463", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2492502927780151, "incorrect_loss_raw": 1.4434632062911987, "correct_loss_per_char": 0.6246251463890076, "incorrect_loss_per_char": 0.7217316031455994, "correct_loss_per_token": 1.2492502927780151, "incorrect_loss_per_token": 1.4434632062911987, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4858577251434326, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4858577251434326, "logits_per_char": -0.7429288625717163, "num_chars": 2}, {"sum_logits": -1.408957600593567, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.408957600593567, "logits_per_char": -0.7044788002967834, "num_chars": 2}, {"sum_logits": -1.4355742931365967, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4355742931365967, "logits_per_char": -0.7177871465682983, "num_chars": 2}, {"sum_logits": -1.2492502927780151, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.2492502927780151, "logits_per_char": -0.6246251463890076, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 81, "native_id": "Mercury_SC_401277", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5118509531021118, "incorrect_loss_raw": 1.3529771566390991, "correct_loss_per_char": 0.7559254765510559, "incorrect_loss_per_char": 0.6764885783195496, "correct_loss_per_token": 1.5118509531021118, "incorrect_loss_per_token": 1.3529771566390991, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.393306016921997, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.393306016921997, "logits_per_char": -0.6966530084609985, "num_chars": 2}, {"sum_logits": -1.3094912767410278, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.3094912767410278, "logits_per_char": -0.6547456383705139, "num_chars": 2}, {"sum_logits": -1.5118509531021118, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.5118509531021118, "logits_per_char": -0.7559254765510559, "num_chars": 2}, {"sum_logits": -1.3561341762542725, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3561341762542725, "logits_per_char": -0.6780670881271362, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 82, "native_id": "MCAS_2005_5_25", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4183506965637207, "incorrect_loss_raw": 1.381646752357483, "correct_loss_per_char": 0.7091753482818604, "incorrect_loss_per_char": 0.6908233761787415, "correct_loss_per_token": 1.4183506965637207, "incorrect_loss_per_token": 1.381646752357483, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3090362548828125, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.3090362548828125, "logits_per_char": -0.6545181274414062, "num_chars": 2}, {"sum_logits": -1.4093945026397705, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4093945026397705, "logits_per_char": -0.7046972513198853, "num_chars": 2}, {"sum_logits": -1.4183506965637207, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4183506965637207, "logits_per_char": -0.7091753482818604, "num_chars": 2}, {"sum_logits": -1.4265094995498657, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4265094995498657, "logits_per_char": -0.7132547497749329, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 83, "native_id": "Mercury_SC_401272", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4585354328155518, "incorrect_loss_raw": 1.3934059540430705, "correct_loss_per_char": 0.7292677164077759, "incorrect_loss_per_char": 0.6967029770215353, "correct_loss_per_token": 1.4585354328155518, "incorrect_loss_per_token": 1.3934059540430705, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.109203815460205, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.109203815460205, "logits_per_char": -0.5546019077301025, "num_chars": 2}, {"sum_logits": -1.4409781694412231, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4409781694412231, "logits_per_char": -0.7204890847206116, "num_chars": 2}, {"sum_logits": -1.6300358772277832, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.6300358772277832, "logits_per_char": -0.8150179386138916, "num_chars": 2}, {"sum_logits": -1.4585354328155518, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4585354328155518, "logits_per_char": -0.7292677164077759, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 84, "native_id": "Mercury_7103600", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3537031412124634, "incorrect_loss_raw": 1.4061710039774578, "correct_loss_per_char": 0.6768515706062317, "incorrect_loss_per_char": 0.7030855019887289, "correct_loss_per_token": 1.3537031412124634, "incorrect_loss_per_token": 1.4061710039774578, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4391484260559082, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -1.4391484260559082, "logits_per_char": -0.7195742130279541, "num_chars": 2}, {"sum_logits": -1.3286643028259277, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": true, "logits_per_token": -1.3286643028259277, "logits_per_char": -0.6643321514129639, "num_chars": 2}, {"sum_logits": -1.450700283050537, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -1.450700283050537, "logits_per_char": -0.7253501415252686, "num_chars": 2}, {"sum_logits": -1.3537031412124634, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -1.3537031412124634, "logits_per_char": -0.6768515706062317, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 85, "native_id": "MDSA_2009_8_2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.214440941810608, "incorrect_loss_raw": 1.4612552324930828, "correct_loss_per_char": 0.607220470905304, "incorrect_loss_per_char": 0.7306276162465414, "correct_loss_per_token": 1.214440941810608, "incorrect_loss_per_token": 1.4612552324930828, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.214440941810608, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.214440941810608, "logits_per_char": -0.607220470905304, "num_chars": 2}, {"sum_logits": -1.3487335443496704, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3487335443496704, "logits_per_char": -0.6743667721748352, "num_chars": 2}, {"sum_logits": -1.4886568784713745, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4886568784713745, "logits_per_char": -0.7443284392356873, "num_chars": 2}, {"sum_logits": -1.5463752746582031, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.5463752746582031, "logits_per_char": -0.7731876373291016, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 86, "native_id": "Mercury_7127943", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.477421522140503, "incorrect_loss_raw": 1.365816553433736, "correct_loss_per_char": 0.7387107610702515, "incorrect_loss_per_char": 0.682908276716868, "correct_loss_per_token": 1.477421522140503, "incorrect_loss_per_token": 1.365816553433736, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.477421522140503, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.477421522140503, "logits_per_char": -0.7387107610702515, "num_chars": 2}, {"sum_logits": -1.3560398817062378, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.3560398817062378, "logits_per_char": -0.6780199408531189, "num_chars": 2}, {"sum_logits": -1.4576884508132935, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.4576884508132935, "logits_per_char": -0.7288442254066467, "num_chars": 2}, {"sum_logits": -1.2837213277816772, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": true, "logits_per_token": -1.2837213277816772, "logits_per_char": -0.6418606638908386, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 87, "native_id": "ACTAAP_2009_7_8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4365358352661133, "incorrect_loss_raw": 1.3822110493977864, "correct_loss_per_char": 0.7182679176330566, "incorrect_loss_per_char": 0.6911055246988932, "correct_loss_per_token": 1.4365358352661133, "incorrect_loss_per_token": 1.3822110493977864, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4052257537841797, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.4052257537841797, "logits_per_char": -0.7026128768920898, "num_chars": 2}, {"sum_logits": -1.2623521089553833, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": true, "logits_per_token": -1.2623521089553833, "logits_per_char": -0.6311760544776917, "num_chars": 2}, {"sum_logits": -1.4790552854537964, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.4790552854537964, "logits_per_char": -0.7395276427268982, "num_chars": 2}, {"sum_logits": -1.4365358352661133, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.4365358352661133, "logits_per_char": -0.7182679176330566, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 88, "native_id": "MCAS_2006_9_43", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4082180261611938, "incorrect_loss_raw": 1.38645605246226, "correct_loss_per_char": 0.7041090130805969, "incorrect_loss_per_char": 0.69322802623113, "correct_loss_per_token": 1.4082180261611938, "incorrect_loss_per_token": 1.38645605246226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4272708892822266, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4272708892822266, "logits_per_char": -0.7136354446411133, "num_chars": 2}, {"sum_logits": -1.434432864189148, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.434432864189148, "logits_per_char": -0.717216432094574, "num_chars": 2}, {"sum_logits": -1.4082180261611938, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4082180261611938, "logits_per_char": -0.7041090130805969, "num_chars": 2}, {"sum_logits": -1.2976644039154053, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.2976644039154053, "logits_per_char": -0.6488322019577026, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 89, "native_id": "Mercury_7252088", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4599491357803345, "incorrect_loss_raw": 1.368347446123759, "correct_loss_per_char": 0.7299745678901672, "incorrect_loss_per_char": 0.6841737230618795, "correct_loss_per_token": 1.4599491357803345, "incorrect_loss_per_token": 1.368347446123759, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.382866621017456, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.382866621017456, "logits_per_char": -0.691433310508728, "num_chars": 2}, {"sum_logits": -1.3409801721572876, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.3409801721572876, "logits_per_char": -0.6704900860786438, "num_chars": 2}, {"sum_logits": -1.4599491357803345, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4599491357803345, "logits_per_char": -0.7299745678901672, "num_chars": 2}, {"sum_logits": -1.3811955451965332, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3811955451965332, "logits_per_char": -0.6905977725982666, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 90, "native_id": "Mercury_7084665", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3539265394210815, "incorrect_loss_raw": 1.4067549308141072, "correct_loss_per_char": 0.6769632697105408, "incorrect_loss_per_char": 0.7033774654070536, "correct_loss_per_token": 1.3539265394210815, "incorrect_loss_per_token": 1.4067549308141072, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3539265394210815, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3539265394210815, "logits_per_char": -0.6769632697105408, "num_chars": 2}, {"sum_logits": -1.430119514465332, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.430119514465332, "logits_per_char": -0.715059757232666, "num_chars": 2}, {"sum_logits": -1.3129802942276, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.3129802942276, "logits_per_char": -0.6564901471138, "num_chars": 2}, {"sum_logits": -1.4771649837493896, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4771649837493896, "logits_per_char": -0.7385824918746948, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 91, "native_id": "FCAT_2008_5_2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3714433908462524, "incorrect_loss_raw": 1.3992059628168743, "correct_loss_per_char": 0.6857216954231262, "incorrect_loss_per_char": 0.6996029814084371, "correct_loss_per_token": 1.3714433908462524, "incorrect_loss_per_token": 1.3992059628168743, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4960423707962036, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4960423707962036, "logits_per_char": -0.7480211853981018, "num_chars": 2}, {"sum_logits": -1.3589674234390259, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.3589674234390259, "logits_per_char": -0.6794837117195129, "num_chars": 2}, {"sum_logits": -1.342608094215393, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.342608094215393, "logits_per_char": -0.6713040471076965, "num_chars": 2}, {"sum_logits": -1.3714433908462524, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.3714433908462524, "logits_per_char": -0.6857216954231262, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 92, "native_id": "Mercury_SC_414041", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3618690967559814, "incorrect_loss_raw": 1.4001223246256511, "correct_loss_per_char": 0.6809345483779907, "incorrect_loss_per_char": 0.7000611623128256, "correct_loss_per_token": 1.3618690967559814, "incorrect_loss_per_token": 1.4001223246256511, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4322052001953125, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4322052001953125, "logits_per_char": -0.7161026000976562, "num_chars": 2}, {"sum_logits": -1.4110279083251953, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4110279083251953, "logits_per_char": -0.7055139541625977, "num_chars": 2}, {"sum_logits": -1.3571338653564453, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.3571338653564453, "logits_per_char": -0.6785669326782227, "num_chars": 2}, {"sum_logits": -1.3618690967559814, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3618690967559814, "logits_per_char": -0.6809345483779907, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 93, "native_id": "MCAS_2014_8_20", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4578609466552734, "incorrect_loss_raw": 1.3709252675374348, "correct_loss_per_char": 0.7289304733276367, "incorrect_loss_per_char": 0.6854626337687174, "correct_loss_per_token": 1.4578609466552734, "incorrect_loss_per_token": 1.3709252675374348, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4578609466552734, "num_tokens": 1, "num_tokens_all": 427, "is_greedy": false, "logits_per_token": -1.4578609466552734, "logits_per_char": -0.7289304733276367, "num_chars": 2}, {"sum_logits": -1.406790852546692, "num_tokens": 1, "num_tokens_all": 427, "is_greedy": false, "logits_per_token": -1.406790852546692, "logits_per_char": -0.703395426273346, "num_chars": 2}, {"sum_logits": -1.4282346963882446, "num_tokens": 1, "num_tokens_all": 427, "is_greedy": false, "logits_per_token": -1.4282346963882446, "logits_per_char": -0.7141173481941223, "num_chars": 2}, {"sum_logits": -1.2777502536773682, "num_tokens": 1, "num_tokens_all": 427, "is_greedy": true, "logits_per_token": -1.2777502536773682, "logits_per_char": -0.6388751268386841, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 94, "native_id": "Mercury_SC_401116", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4136061668395996, "incorrect_loss_raw": 1.3849382797876995, "correct_loss_per_char": 0.7068030834197998, "incorrect_loss_per_char": 0.6924691398938497, "correct_loss_per_token": 1.4136061668395996, "incorrect_loss_per_token": 1.3849382797876995, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.357657790184021, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.357657790184021, "logits_per_char": -0.6788288950920105, "num_chars": 2}, {"sum_logits": -1.4136061668395996, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4136061668395996, "logits_per_char": -0.7068030834197998, "num_chars": 2}, {"sum_logits": -1.4679288864135742, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4679288864135742, "logits_per_char": -0.7339644432067871, "num_chars": 2}, {"sum_logits": -1.329228162765503, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.329228162765503, "logits_per_char": -0.6646140813827515, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 95, "native_id": "Mercury_7064680", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4456044435501099, "incorrect_loss_raw": 1.3734780550003052, "correct_loss_per_char": 0.7228022217750549, "incorrect_loss_per_char": 0.6867390275001526, "correct_loss_per_token": 1.4456044435501099, "incorrect_loss_per_token": 1.3734780550003052, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3128035068511963, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.3128035068511963, "logits_per_char": -0.6564017534255981, "num_chars": 2}, {"sum_logits": -1.4176734685897827, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4176734685897827, "logits_per_char": -0.7088367342948914, "num_chars": 2}, {"sum_logits": -1.4456044435501099, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4456044435501099, "logits_per_char": -0.7228022217750549, "num_chars": 2}, {"sum_logits": -1.3899571895599365, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3899571895599365, "logits_per_char": -0.6949785947799683, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 96, "native_id": "Mercury_7211680", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.487714171409607, "incorrect_loss_raw": 1.3596568902333577, "correct_loss_per_char": 0.7438570857048035, "incorrect_loss_per_char": 0.6798284451166788, "correct_loss_per_token": 1.487714171409607, "incorrect_loss_per_token": 1.3596568902333577, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3462920188903809, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.3462920188903809, "logits_per_char": -0.6731460094451904, "num_chars": 2}, {"sum_logits": -1.400224208831787, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.400224208831787, "logits_per_char": -0.7001121044158936, "num_chars": 2}, {"sum_logits": -1.487714171409607, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.487714171409607, "logits_per_char": -0.7438570857048035, "num_chars": 2}, {"sum_logits": -1.3324544429779053, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.3324544429779053, "logits_per_char": -0.6662272214889526, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 97, "native_id": "Mercury_180373", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4525271654129028, "incorrect_loss_raw": 1.3817766904830933, "correct_loss_per_char": 0.7262635827064514, "incorrect_loss_per_char": 0.6908883452415466, "correct_loss_per_token": 1.4525271654129028, "incorrect_loss_per_token": 1.3817766904830933, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1792593002319336, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.1792593002319336, "logits_per_char": -0.5896296501159668, "num_chars": 2}, {"sum_logits": -1.4525271654129028, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4525271654129028, "logits_per_char": -0.7262635827064514, "num_chars": 2}, {"sum_logits": -1.5248311758041382, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.5248311758041382, "logits_per_char": -0.7624155879020691, "num_chars": 2}, {"sum_logits": -1.441239595413208, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.441239595413208, "logits_per_char": -0.720619797706604, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 98, "native_id": "Mercury_7216248", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3752870559692383, "incorrect_loss_raw": 1.4057555198669434, "correct_loss_per_char": 0.6876435279846191, "incorrect_loss_per_char": 0.7028777599334717, "correct_loss_per_token": 1.3752870559692383, "incorrect_loss_per_token": 1.4057555198669434, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5728341341018677, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.5728341341018677, "logits_per_char": -0.7864170670509338, "num_chars": 2}, {"sum_logits": -1.3752870559692383, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3752870559692383, "logits_per_char": -0.6876435279846191, "num_chars": 2}, {"sum_logits": -1.4204100370407104, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4204100370407104, "logits_per_char": -0.7102050185203552, "num_chars": 2}, {"sum_logits": -1.224022388458252, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.224022388458252, "logits_per_char": -0.612011194229126, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 99, "native_id": "Mercury_SC_417677", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4193708896636963, "incorrect_loss_raw": 1.381229559580485, "correct_loss_per_char": 0.7096854448318481, "incorrect_loss_per_char": 0.6906147797902426, "correct_loss_per_token": 1.4193708896636963, "incorrect_loss_per_token": 1.381229559580485, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3162336349487305, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.3162336349487305, "logits_per_char": -0.6581168174743652, "num_chars": 2}, {"sum_logits": -1.4193708896636963, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4193708896636963, "logits_per_char": -0.7096854448318481, "num_chars": 2}, {"sum_logits": -1.454024314880371, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.454024314880371, "logits_per_char": -0.7270121574401855, "num_chars": 2}, {"sum_logits": -1.3734307289123535, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3734307289123535, "logits_per_char": -0.6867153644561768, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 100, "native_id": "Mercury_7221655", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4540857076644897, "incorrect_loss_raw": 1.3704817295074463, "correct_loss_per_char": 0.7270428538322449, "incorrect_loss_per_char": 0.6852408647537231, "correct_loss_per_token": 1.4540857076644897, "incorrect_loss_per_token": 1.3704817295074463, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3805454969406128, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.3805454969406128, "logits_per_char": -0.6902727484703064, "num_chars": 2}, {"sum_logits": -1.4088579416275024, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4088579416275024, "logits_per_char": -0.7044289708137512, "num_chars": 2}, {"sum_logits": -1.4540857076644897, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4540857076644897, "logits_per_char": -0.7270428538322449, "num_chars": 2}, {"sum_logits": -1.3220417499542236, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.3220417499542236, "logits_per_char": -0.6610208749771118, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 101, "native_id": "MCAS_2006_9_12", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.43082857131958, "incorrect_loss_raw": 1.3772837320963542, "correct_loss_per_char": 0.71541428565979, "incorrect_loss_per_char": 0.6886418660481771, "correct_loss_per_token": 1.43082857131958, "incorrect_loss_per_token": 1.3772837320963542, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.386902093887329, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.386902093887329, "logits_per_char": -0.6934510469436646, "num_chars": 2}, {"sum_logits": -1.4064993858337402, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4064993858337402, "logits_per_char": -0.7032496929168701, "num_chars": 2}, {"sum_logits": -1.3384497165679932, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.3384497165679932, "logits_per_char": -0.6692248582839966, "num_chars": 2}, {"sum_logits": -1.43082857131958, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.43082857131958, "logits_per_char": -0.71541428565979, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 102, "native_id": "MCAS_2004_9_2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.289422869682312, "incorrect_loss_raw": 1.4272480010986328, "correct_loss_per_char": 0.644711434841156, "incorrect_loss_per_char": 0.7136240005493164, "correct_loss_per_token": 1.289422869682312, "incorrect_loss_per_token": 1.4272480010986328, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.289422869682312, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.289422869682312, "logits_per_char": -0.644711434841156, "num_chars": 2}, {"sum_logits": -1.395805835723877, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.395805835723877, "logits_per_char": -0.6979029178619385, "num_chars": 2}, {"sum_logits": -1.4742313623428345, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4742313623428345, "logits_per_char": -0.7371156811714172, "num_chars": 2}, {"sum_logits": -1.411706805229187, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.411706805229187, "logits_per_char": -0.7058534026145935, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 103, "native_id": "Mercury_180005", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2936002016067505, "incorrect_loss_raw": 1.4269063472747803, "correct_loss_per_char": 0.6468001008033752, "incorrect_loss_per_char": 0.7134531736373901, "correct_loss_per_token": 1.2936002016067505, "incorrect_loss_per_token": 1.4269063472747803, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.329005479812622, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.329005479812622, "logits_per_char": -0.664502739906311, "num_chars": 2}, {"sum_logits": -1.2936002016067505, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2936002016067505, "logits_per_char": -0.6468001008033752, "num_chars": 2}, {"sum_logits": -1.4677621126174927, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4677621126174927, "logits_per_char": -0.7338810563087463, "num_chars": 2}, {"sum_logits": -1.483951449394226, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.483951449394226, "logits_per_char": -0.741975724697113, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 104, "native_id": "Mercury_7071523", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.433212399482727, "incorrect_loss_raw": 1.3834383487701416, "correct_loss_per_char": 0.7166061997413635, "incorrect_loss_per_char": 0.6917191743850708, "correct_loss_per_token": 1.433212399482727, "incorrect_loss_per_token": 1.3834383487701416, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.54269278049469, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.54269278049469, "logits_per_char": -0.771346390247345, "num_chars": 2}, {"sum_logits": -1.2789075374603271, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.2789075374603271, "logits_per_char": -0.6394537687301636, "num_chars": 2}, {"sum_logits": -1.433212399482727, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.433212399482727, "logits_per_char": -0.7166061997413635, "num_chars": 2}, {"sum_logits": -1.3287147283554077, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3287147283554077, "logits_per_char": -0.6643573641777039, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 105, "native_id": "Mercury_7263375", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.439397931098938, "incorrect_loss_raw": 1.3746631542841594, "correct_loss_per_char": 0.719698965549469, "incorrect_loss_per_char": 0.6873315771420797, "correct_loss_per_token": 1.439397931098938, "incorrect_loss_per_token": 1.3746631542841594, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3919811248779297, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.3919811248779297, "logits_per_char": -0.6959905624389648, "num_chars": 2}, {"sum_logits": -1.439397931098938, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.439397931098938, "logits_per_char": -0.719698965549469, "num_chars": 2}, {"sum_logits": -1.4210779666900635, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4210779666900635, "logits_per_char": -0.7105389833450317, "num_chars": 2}, {"sum_logits": -1.3109303712844849, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.3109303712844849, "logits_per_char": -0.6554651856422424, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 106, "native_id": "TIMSS_2011_8_pg102", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3325053453445435, "incorrect_loss_raw": 1.4150770505269368, "correct_loss_per_char": 0.6662526726722717, "incorrect_loss_per_char": 0.7075385252634684, "correct_loss_per_token": 1.3325053453445435, "incorrect_loss_per_token": 1.4150770505269368, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5047776699066162, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.5047776699066162, "logits_per_char": -0.7523888349533081, "num_chars": 2}, {"sum_logits": -1.2718262672424316, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.2718262672424316, "logits_per_char": -0.6359131336212158, "num_chars": 2}, {"sum_logits": -1.4686272144317627, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4686272144317627, "logits_per_char": -0.7343136072158813, "num_chars": 2}, {"sum_logits": -1.3325053453445435, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3325053453445435, "logits_per_char": -0.6662526726722717, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 107, "native_id": "Mercury_406550", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4571894407272339, "incorrect_loss_raw": 1.3811311721801758, "correct_loss_per_char": 0.7285947203636169, "incorrect_loss_per_char": 0.6905655860900879, "correct_loss_per_token": 1.4571894407272339, "incorrect_loss_per_token": 1.3811311721801758, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5571216344833374, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.5571216344833374, "logits_per_char": -0.7785608172416687, "num_chars": 2}, {"sum_logits": -1.4571894407272339, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4571894407272339, "logits_per_char": -0.7285947203636169, "num_chars": 2}, {"sum_logits": -1.4260693788528442, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4260693788528442, "logits_per_char": -0.7130346894264221, "num_chars": 2}, {"sum_logits": -1.1602025032043457, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.1602025032043457, "logits_per_char": -0.5801012516021729, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 108, "native_id": "Mercury_SC_400057", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2435129880905151, "incorrect_loss_raw": 1.4480210145314534, "correct_loss_per_char": 0.6217564940452576, "incorrect_loss_per_char": 0.7240105072657267, "correct_loss_per_token": 1.2435129880905151, "incorrect_loss_per_token": 1.4480210145314534, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3381365537643433, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3381365537643433, "logits_per_char": -0.6690682768821716, "num_chars": 2}, {"sum_logits": -1.4336328506469727, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4336328506469727, "logits_per_char": -0.7168164253234863, "num_chars": 2}, {"sum_logits": -1.5722936391830444, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.5722936391830444, "logits_per_char": -0.7861468195915222, "num_chars": 2}, {"sum_logits": -1.2435129880905151, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.2435129880905151, "logits_per_char": -0.6217564940452576, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 109, "native_id": "TAKS_2009_5_26", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3507269620895386, "incorrect_loss_raw": 1.4077613353729248, "correct_loss_per_char": 0.6753634810447693, "incorrect_loss_per_char": 0.7038806676864624, "correct_loss_per_token": 1.3507269620895386, "incorrect_loss_per_token": 1.4077613353729248, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3507269620895386, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.3507269620895386, "logits_per_char": -0.6753634810447693, "num_chars": 2}, {"sum_logits": -1.3743966817855835, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3743966817855835, "logits_per_char": -0.6871983408927917, "num_chars": 2}, {"sum_logits": -1.4562007188796997, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4562007188796997, "logits_per_char": -0.7281003594398499, "num_chars": 2}, {"sum_logits": -1.3926866054534912, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3926866054534912, "logits_per_char": -0.6963433027267456, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 110, "native_id": "LEAP_2007_8_10417", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.516597867012024, "incorrect_loss_raw": 1.3546475172042847, "correct_loss_per_char": 0.758298933506012, "incorrect_loss_per_char": 0.6773237586021423, "correct_loss_per_token": 1.516597867012024, "incorrect_loss_per_token": 1.3546475172042847, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.516597867012024, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.516597867012024, "logits_per_char": -0.758298933506012, "num_chars": 2}, {"sum_logits": -1.3556538820266724, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.3556538820266724, "logits_per_char": -0.6778269410133362, "num_chars": 2}, {"sum_logits": -1.430957317352295, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.430957317352295, "logits_per_char": -0.7154786586761475, "num_chars": 2}, {"sum_logits": -1.2773313522338867, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": true, "logits_per_token": -1.2773313522338867, "logits_per_char": -0.6386656761169434, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 111, "native_id": "Mercury_7027405", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5242009162902832, "incorrect_loss_raw": 1.3522796233495076, "correct_loss_per_char": 0.7621004581451416, "incorrect_loss_per_char": 0.6761398116747538, "correct_loss_per_token": 1.5242009162902832, "incorrect_loss_per_token": 1.3522796233495076, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5242009162902832, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.5242009162902832, "logits_per_char": -0.7621004581451416, "num_chars": 2}, {"sum_logits": -1.3012492656707764, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3012492656707764, "logits_per_char": -0.6506246328353882, "num_chars": 2}, {"sum_logits": -1.4593944549560547, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4593944549560547, "logits_per_char": -0.7296972274780273, "num_chars": 2}, {"sum_logits": -1.296195149421692, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.296195149421692, "logits_per_char": -0.648097574710846, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 112, "native_id": "Mercury_7058415", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4105077981948853, "incorrect_loss_raw": 1.385355552037557, "correct_loss_per_char": 0.7052538990974426, "incorrect_loss_per_char": 0.6926777760187784, "correct_loss_per_token": 1.4105077981948853, "incorrect_loss_per_token": 1.385355552037557, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4105077981948853, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4105077981948853, "logits_per_char": -0.7052538990974426, "num_chars": 2}, {"sum_logits": -1.3304696083068848, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.3304696083068848, "logits_per_char": -0.6652348041534424, "num_chars": 2}, {"sum_logits": -1.4769439697265625, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4769439697265625, "logits_per_char": -0.7384719848632812, "num_chars": 2}, {"sum_logits": -1.3486530780792236, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3486530780792236, "logits_per_char": -0.6743265390396118, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 113, "native_id": "Mercury_7215828", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4580448865890503, "incorrect_loss_raw": 1.3720715443293254, "correct_loss_per_char": 0.7290224432945251, "incorrect_loss_per_char": 0.6860357721646627, "correct_loss_per_token": 1.4580448865890503, "incorrect_loss_per_token": 1.3720715443293254, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4580448865890503, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4580448865890503, "logits_per_char": -0.7290224432945251, "num_chars": 2}, {"sum_logits": -1.2743055820465088, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.2743055820465088, "logits_per_char": -0.6371527910232544, "num_chars": 2}, {"sum_logits": -1.4634007215499878, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4634007215499878, "logits_per_char": -0.7317003607749939, "num_chars": 2}, {"sum_logits": -1.3785083293914795, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3785083293914795, "logits_per_char": -0.6892541646957397, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 114, "native_id": "Mercury_7064575", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3316107988357544, "incorrect_loss_raw": 1.415716568628947, "correct_loss_per_char": 0.6658053994178772, "incorrect_loss_per_char": 0.7078582843144735, "correct_loss_per_token": 1.3316107988357544, "incorrect_loss_per_token": 1.415716568628947, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2814379930496216, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.2814379930496216, "logits_per_char": -0.6407189965248108, "num_chars": 2}, {"sum_logits": -1.3316107988357544, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3316107988357544, "logits_per_char": -0.6658053994178772, "num_chars": 2}, {"sum_logits": -1.4847058057785034, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4847058057785034, "logits_per_char": -0.7423529028892517, "num_chars": 2}, {"sum_logits": -1.4810059070587158, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4810059070587158, "logits_per_char": -0.7405029535293579, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 115, "native_id": "Mercury_7097493", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4656777381896973, "incorrect_loss_raw": 1.368183175722758, "correct_loss_per_char": 0.7328388690948486, "incorrect_loss_per_char": 0.684091587861379, "correct_loss_per_token": 1.4656777381896973, "incorrect_loss_per_token": 1.368183175722758, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.275851845741272, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.275851845741272, "logits_per_char": -0.637925922870636, "num_chars": 2}, {"sum_logits": -1.3990963697433472, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3990963697433472, "logits_per_char": -0.6995481848716736, "num_chars": 2}, {"sum_logits": -1.4296013116836548, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4296013116836548, "logits_per_char": -0.7148006558418274, "num_chars": 2}, {"sum_logits": -1.4656777381896973, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4656777381896973, "logits_per_char": -0.7328388690948486, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 116, "native_id": "AKDE&ED_2008_8_47", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3923498392105103, "incorrect_loss_raw": 1.3902812401453655, "correct_loss_per_char": 0.6961749196052551, "incorrect_loss_per_char": 0.6951406200726827, "correct_loss_per_token": 1.3923498392105103, "incorrect_loss_per_token": 1.3902812401453655, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3893225193023682, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3893225193023682, "logits_per_char": -0.6946612596511841, "num_chars": 2}, {"sum_logits": -1.3923498392105103, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3923498392105103, "logits_per_char": -0.6961749196052551, "num_chars": 2}, {"sum_logits": -1.41304612159729, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.41304612159729, "logits_per_char": -0.706523060798645, "num_chars": 2}, {"sum_logits": -1.368475079536438, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.368475079536438, "logits_per_char": -0.684237539768219, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 117, "native_id": "Mercury_405136", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4296693801879883, "incorrect_loss_raw": 1.3825922807057698, "correct_loss_per_char": 0.7148346900939941, "incorrect_loss_per_char": 0.6912961403528849, "correct_loss_per_token": 1.4296693801879883, "incorrect_loss_per_token": 1.3825922807057698, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2927403450012207, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2927403450012207, "logits_per_char": -0.6463701725006104, "num_chars": 2}, {"sum_logits": -1.469346284866333, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.469346284866333, "logits_per_char": -0.7346731424331665, "num_chars": 2}, {"sum_logits": -1.4296693801879883, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4296693801879883, "logits_per_char": -0.7148346900939941, "num_chars": 2}, {"sum_logits": -1.3856902122497559, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3856902122497559, "logits_per_char": -0.6928451061248779, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 118, "native_id": "Mercury_415086", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.307532548904419, "incorrect_loss_raw": 1.4201006889343262, "correct_loss_per_char": 0.6537662744522095, "incorrect_loss_per_char": 0.7100503444671631, "correct_loss_per_token": 1.307532548904419, "incorrect_loss_per_token": 1.4201006889343262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4730747938156128, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4730747938156128, "logits_per_char": -0.7365373969078064, "num_chars": 2}, {"sum_logits": -1.3939950466156006, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3939950466156006, "logits_per_char": -0.6969975233078003, "num_chars": 2}, {"sum_logits": -1.3932322263717651, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3932322263717651, "logits_per_char": -0.6966161131858826, "num_chars": 2}, {"sum_logits": -1.307532548904419, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.307532548904419, "logits_per_char": -0.6537662744522095, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 119, "native_id": "Mercury_7228725", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4128751754760742, "incorrect_loss_raw": 1.383548895517985, "correct_loss_per_char": 0.7064375877380371, "incorrect_loss_per_char": 0.6917744477589926, "correct_loss_per_token": 1.4128751754760742, "incorrect_loss_per_token": 1.383548895517985, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.399922490119934, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.399922490119934, "logits_per_char": -0.699961245059967, "num_chars": 2}, {"sum_logits": -1.3836249113082886, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3836249113082886, "logits_per_char": -0.6918124556541443, "num_chars": 2}, {"sum_logits": -1.4128751754760742, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4128751754760742, "logits_per_char": -0.7064375877380371, "num_chars": 2}, {"sum_logits": -1.3670992851257324, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.3670992851257324, "logits_per_char": -0.6835496425628662, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 120, "native_id": "Mercury_7201740", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4542558193206787, "incorrect_loss_raw": 1.3762805064519246, "correct_loss_per_char": 0.7271279096603394, "incorrect_loss_per_char": 0.6881402532259623, "correct_loss_per_token": 1.4542558193206787, "incorrect_loss_per_token": 1.3762805064519246, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4542558193206787, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.4542558193206787, "logits_per_char": -0.7271279096603394, "num_chars": 2}, {"sum_logits": -1.360144019126892, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.360144019126892, "logits_per_char": -0.680072009563446, "num_chars": 2}, {"sum_logits": -1.517587423324585, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.517587423324585, "logits_per_char": -0.7587937116622925, "num_chars": 2}, {"sum_logits": -1.2511100769042969, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.2511100769042969, "logits_per_char": -0.6255550384521484, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 121, "native_id": "NYSEDREGENTS_2010_4_4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0370665788650513, "incorrect_loss_raw": 1.3708504438400269, "correct_loss_per_char": 0.5185332894325256, "incorrect_loss_per_char": 0.6854252219200134, "correct_loss_per_token": 1.0370665788650513, "incorrect_loss_per_token": 1.3708504438400269, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0370665788650513, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.0370665788650513, "logits_per_char": -0.5185332894325256, "num_chars": 2}, {"sum_logits": -1.2368009090423584, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.2368009090423584, "logits_per_char": -0.6184004545211792, "num_chars": 2}, {"sum_logits": -1.5048999786376953, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.5048999786376953, "logits_per_char": -0.7524499893188477, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 122, "native_id": "MEAP_2005_8_21", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3078235387802124, "incorrect_loss_raw": 1.4199693997701008, "correct_loss_per_char": 0.6539117693901062, "incorrect_loss_per_char": 0.7099846998850504, "correct_loss_per_token": 1.3078235387802124, "incorrect_loss_per_token": 1.4199693997701008, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4000319242477417, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4000319242477417, "logits_per_char": -0.7000159621238708, "num_chars": 2}, {"sum_logits": -1.4055317640304565, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4055317640304565, "logits_per_char": -0.7027658820152283, "num_chars": 2}, {"sum_logits": -1.4543445110321045, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4543445110321045, "logits_per_char": -0.7271722555160522, "num_chars": 2}, {"sum_logits": -1.3078235387802124, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.3078235387802124, "logits_per_char": -0.6539117693901062, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 123, "native_id": "Mercury_7026355", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3252733945846558, "incorrect_loss_raw": 1.4156904617945354, "correct_loss_per_char": 0.6626366972923279, "incorrect_loss_per_char": 0.7078452308972677, "correct_loss_per_token": 1.3252733945846558, "incorrect_loss_per_token": 1.4156904617945354, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3252733945846558, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.3252733945846558, "logits_per_char": -0.6626366972923279, "num_chars": 2}, {"sum_logits": -1.361169457435608, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.361169457435608, "logits_per_char": -0.680584728717804, "num_chars": 2}, {"sum_logits": -1.5041706562042236, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.5041706562042236, "logits_per_char": -0.7520853281021118, "num_chars": 2}, {"sum_logits": -1.3817312717437744, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3817312717437744, "logits_per_char": -0.6908656358718872, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 124, "native_id": "Mercury_7249708", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3416990041732788, "incorrect_loss_raw": 1.406984766324361, "correct_loss_per_char": 0.6708495020866394, "incorrect_loss_per_char": 0.7034923831621805, "correct_loss_per_token": 1.3416990041732788, "incorrect_loss_per_token": 1.406984766324361, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3778738975524902, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3778738975524902, "logits_per_char": -0.6889369487762451, "num_chars": 2}, {"sum_logits": -1.3416990041732788, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.3416990041732788, "logits_per_char": -0.6708495020866394, "num_chars": 2}, {"sum_logits": -1.3961923122406006, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3961923122406006, "logits_per_char": -0.6980961561203003, "num_chars": 2}, {"sum_logits": -1.4468880891799927, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4468880891799927, "logits_per_char": -0.7234440445899963, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 125, "native_id": "Mercury_7107170", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3990671634674072, "incorrect_loss_raw": 1.390477220217387, "correct_loss_per_char": 0.6995335817337036, "incorrect_loss_per_char": 0.6952386101086935, "correct_loss_per_token": 1.3990671634674072, "incorrect_loss_per_token": 1.390477220217387, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2747608423233032, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.2747608423233032, "logits_per_char": -0.6373804211616516, "num_chars": 2}, {"sum_logits": -1.4576401710510254, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4576401710510254, "logits_per_char": -0.7288200855255127, "num_chars": 2}, {"sum_logits": -1.439030647277832, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.439030647277832, "logits_per_char": -0.719515323638916, "num_chars": 2}, {"sum_logits": -1.3990671634674072, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3990671634674072, "logits_per_char": -0.6995335817337036, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 126, "native_id": "Mercury_183820", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6507247686386108, "incorrect_loss_raw": 1.3186676502227783, "correct_loss_per_char": 0.8253623843193054, "incorrect_loss_per_char": 0.6593338251113892, "correct_loss_per_token": 1.6507247686386108, "incorrect_loss_per_token": 1.3186676502227783, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6507247686386108, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.6507247686386108, "logits_per_char": -0.8253623843193054, "num_chars": 2}, {"sum_logits": -1.3277684450149536, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3277684450149536, "logits_per_char": -0.6638842225074768, "num_chars": 2}, {"sum_logits": -1.3170623779296875, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3170623779296875, "logits_per_char": -0.6585311889648438, "num_chars": 2}, {"sum_logits": -1.3111721277236938, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.3111721277236938, "logits_per_char": -0.6555860638618469, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 127, "native_id": "Mercury_SC_401357", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4157848358154297, "incorrect_loss_raw": 1.3823448419570923, "correct_loss_per_char": 0.7078924179077148, "incorrect_loss_per_char": 0.6911724209785461, "correct_loss_per_token": 1.4157848358154297, "incorrect_loss_per_token": 1.3823448419570923, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4118002653121948, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4118002653121948, "logits_per_char": -0.7059001326560974, "num_chars": 2}, {"sum_logits": -1.4157848358154297, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4157848358154297, "logits_per_char": -0.7078924179077148, "num_chars": 2}, {"sum_logits": -1.3813204765319824, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3813204765319824, "logits_per_char": -0.6906602382659912, "num_chars": 2}, {"sum_logits": -1.3539137840270996, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.3539137840270996, "logits_per_char": -0.6769568920135498, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 128, "native_id": "NYSEDREGENTS_2008_8_11", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4641189575195312, "incorrect_loss_raw": 1.3706518809000652, "correct_loss_per_char": 0.7320594787597656, "incorrect_loss_per_char": 0.6853259404500326, "correct_loss_per_token": 1.4641189575195312, "incorrect_loss_per_token": 1.3706518809000652, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4326555728912354, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4326555728912354, "logits_per_char": -0.7163277864456177, "num_chars": 2}, {"sum_logits": -1.2511162757873535, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.2511162757873535, "logits_per_char": -0.6255581378936768, "num_chars": 2}, {"sum_logits": -1.4281837940216064, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4281837940216064, "logits_per_char": -0.7140918970108032, "num_chars": 2}, {"sum_logits": -1.4641189575195312, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4641189575195312, "logits_per_char": -0.7320594787597656, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 129, "native_id": "Mercury_416650", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4502158164978027, "incorrect_loss_raw": 1.3756980895996094, "correct_loss_per_char": 0.7251079082489014, "incorrect_loss_per_char": 0.6878490447998047, "correct_loss_per_token": 1.4502158164978027, "incorrect_loss_per_token": 1.3756980895996094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4858448505401611, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4858448505401611, "logits_per_char": -0.7429224252700806, "num_chars": 2}, {"sum_logits": -1.2843983173370361, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.2843983173370361, "logits_per_char": -0.6421991586685181, "num_chars": 2}, {"sum_logits": -1.4502158164978027, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4502158164978027, "logits_per_char": -0.7251079082489014, "num_chars": 2}, {"sum_logits": -1.3568511009216309, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3568511009216309, "logits_per_char": -0.6784255504608154, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 130, "native_id": "NCEOGA_2013_5_20", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4972501993179321, "incorrect_loss_raw": 1.361089547475179, "correct_loss_per_char": 0.7486250996589661, "incorrect_loss_per_char": 0.6805447737375895, "correct_loss_per_token": 1.4972501993179321, "incorrect_loss_per_token": 1.361089547475179, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4643827676773071, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4643827676773071, "logits_per_char": -0.7321913838386536, "num_chars": 2}, {"sum_logits": -1.4972501993179321, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4972501993179321, "logits_per_char": -0.7486250996589661, "num_chars": 2}, {"sum_logits": -1.3547697067260742, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3547697067260742, "logits_per_char": -0.6773848533630371, "num_chars": 2}, {"sum_logits": -1.2641161680221558, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2641161680221558, "logits_per_char": -0.6320580840110779, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 131, "native_id": "Mercury_400500", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4972399473190308, "incorrect_loss_raw": 1.3659677902857463, "correct_loss_per_char": 0.7486199736595154, "incorrect_loss_per_char": 0.6829838951428732, "correct_loss_per_token": 1.4972399473190308, "incorrect_loss_per_token": 1.3659677902857463, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5602266788482666, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5602266788482666, "logits_per_char": -0.7801133394241333, "num_chars": 2}, {"sum_logits": -1.2905915975570679, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.2905915975570679, "logits_per_char": -0.6452957987785339, "num_chars": 2}, {"sum_logits": -1.4972399473190308, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4972399473190308, "logits_per_char": -0.7486199736595154, "num_chars": 2}, {"sum_logits": -1.2470850944519043, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.2470850944519043, "logits_per_char": -0.6235425472259521, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 132, "native_id": "Mercury_SC_401366", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4394328594207764, "incorrect_loss_raw": 1.3770310878753662, "correct_loss_per_char": 0.7197164297103882, "incorrect_loss_per_char": 0.6885155439376831, "correct_loss_per_token": 1.4394328594207764, "incorrect_loss_per_token": 1.3770310878753662, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3645614385604858, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3645614385604858, "logits_per_char": -0.6822807192802429, "num_chars": 2}, {"sum_logits": -1.2936382293701172, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.2936382293701172, "logits_per_char": -0.6468191146850586, "num_chars": 2}, {"sum_logits": -1.4728935956954956, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4728935956954956, "logits_per_char": -0.7364467978477478, "num_chars": 2}, {"sum_logits": -1.4394328594207764, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4394328594207764, "logits_per_char": -0.7197164297103882, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 133, "native_id": "Mercury_7141610", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.367479920387268, "incorrect_loss_raw": 1.404821753501892, "correct_loss_per_char": 0.683739960193634, "incorrect_loss_per_char": 0.702410876750946, "correct_loss_per_token": 1.367479920387268, "incorrect_loss_per_token": 1.404821753501892, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5554174184799194, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.5554174184799194, "logits_per_char": -0.7777087092399597, "num_chars": 2}, {"sum_logits": -1.367479920387268, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.367479920387268, "logits_per_char": -0.683739960193634, "num_chars": 2}, {"sum_logits": -1.3905903100967407, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3905903100967407, "logits_per_char": -0.6952951550483704, "num_chars": 2}, {"sum_logits": -1.2684575319290161, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.2684575319290161, "logits_per_char": -0.6342287659645081, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 134, "native_id": "Mercury_7247013", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3318830728530884, "incorrect_loss_raw": 1.4130228360493977, "correct_loss_per_char": 0.6659415364265442, "incorrect_loss_per_char": 0.7065114180246989, "correct_loss_per_token": 1.3318830728530884, "incorrect_loss_per_token": 1.4130228360493977, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3472232818603516, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3472232818603516, "logits_per_char": -0.6736116409301758, "num_chars": 2}, {"sum_logits": -1.3318830728530884, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.3318830728530884, "logits_per_char": -0.6659415364265442, "num_chars": 2}, {"sum_logits": -1.4536398649215698, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4536398649215698, "logits_per_char": -0.7268199324607849, "num_chars": 2}, {"sum_logits": -1.438205361366272, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.438205361366272, "logits_per_char": -0.719102680683136, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 135, "native_id": "NYSEDREGENTS_2008_8_30", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4444324970245361, "incorrect_loss_raw": 1.3776332139968872, "correct_loss_per_char": 0.7222162485122681, "incorrect_loss_per_char": 0.6888166069984436, "correct_loss_per_token": 1.4444324970245361, "incorrect_loss_per_token": 1.3776332139968872, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.286516547203064, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.286516547203064, "logits_per_char": -0.643258273601532, "num_chars": 2}, {"sum_logits": -1.4444324970245361, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4444324970245361, "logits_per_char": -0.7222162485122681, "num_chars": 2}, {"sum_logits": -1.4911653995513916, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4911653995513916, "logits_per_char": -0.7455826997756958, "num_chars": 2}, {"sum_logits": -1.355217695236206, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.355217695236206, "logits_per_char": -0.677608847618103, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 136, "native_id": "ACTAAP_2011_5_16", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4121100902557373, "incorrect_loss_raw": 1.4041302998860676, "correct_loss_per_char": 0.7060550451278687, "incorrect_loss_per_char": 0.7020651499430338, "correct_loss_per_token": 1.4121100902557373, "incorrect_loss_per_token": 1.4041302998860676, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5922448635101318, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.5922448635101318, "logits_per_char": -0.7961224317550659, "num_chars": 2}, {"sum_logits": -1.4121100902557373, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4121100902557373, "logits_per_char": -0.7060550451278687, "num_chars": 2}, {"sum_logits": -1.502976655960083, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.502976655960083, "logits_per_char": -0.7514883279800415, "num_chars": 2}, {"sum_logits": -1.1171693801879883, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.1171693801879883, "logits_per_char": -0.5585846900939941, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 137, "native_id": "Mercury_7093153", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.392372965812683, "incorrect_loss_raw": 1.3994484345118205, "correct_loss_per_char": 0.6961864829063416, "incorrect_loss_per_char": 0.6997242172559103, "correct_loss_per_token": 1.392372965812683, "incorrect_loss_per_token": 1.3994484345118205, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3312935829162598, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3312935829162598, "logits_per_char": -0.6656467914581299, "num_chars": 2}, {"sum_logits": -1.392372965812683, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.392372965812683, "logits_per_char": -0.6961864829063416, "num_chars": 2}, {"sum_logits": -1.5423256158828735, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.5423256158828735, "logits_per_char": -0.7711628079414368, "num_chars": 2}, {"sum_logits": -1.3247261047363281, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.3247261047363281, "logits_per_char": -0.6623630523681641, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 138, "native_id": "Mercury_7013965", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4565120935440063, "incorrect_loss_raw": 1.3737234274546306, "correct_loss_per_char": 0.7282560467720032, "incorrect_loss_per_char": 0.6868617137273153, "correct_loss_per_token": 1.4565120935440063, "incorrect_loss_per_token": 1.3737234274546306, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4565120935440063, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4565120935440063, "logits_per_char": -0.7282560467720032, "num_chars": 2}, {"sum_logits": -1.401892900466919, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.401892900466919, "logits_per_char": -0.7009464502334595, "num_chars": 2}, {"sum_logits": -1.465827226638794, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.465827226638794, "logits_per_char": -0.732913613319397, "num_chars": 2}, {"sum_logits": -1.2534501552581787, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.2534501552581787, "logits_per_char": -0.6267250776290894, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 139, "native_id": "Mercury_7034843", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4295299053192139, "incorrect_loss_raw": 1.3825725317001343, "correct_loss_per_char": 0.7147649526596069, "incorrect_loss_per_char": 0.6912862658500671, "correct_loss_per_token": 1.4295299053192139, "incorrect_loss_per_token": 1.3825725317001343, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3003594875335693, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.3003594875335693, "logits_per_char": -0.6501797437667847, "num_chars": 2}, {"sum_logits": -1.4295299053192139, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4295299053192139, "logits_per_char": -0.7147649526596069, "num_chars": 2}, {"sum_logits": -1.3799258470535278, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3799258470535278, "logits_per_char": -0.6899629235267639, "num_chars": 2}, {"sum_logits": -1.4674322605133057, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4674322605133057, "logits_per_char": -0.7337161302566528, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 140, "native_id": "Mercury_SC_407610", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2294621467590332, "incorrect_loss_raw": 1.450908859570821, "correct_loss_per_char": 0.6147310733795166, "incorrect_loss_per_char": 0.7254544297854105, "correct_loss_per_token": 1.2294621467590332, "incorrect_loss_per_token": 1.450908859570821, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5086625814437866, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.5086625814437866, "logits_per_char": -0.7543312907218933, "num_chars": 2}, {"sum_logits": -1.423828363418579, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.423828363418579, "logits_per_char": -0.7119141817092896, "num_chars": 2}, {"sum_logits": -1.4202356338500977, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4202356338500977, "logits_per_char": -0.7101178169250488, "num_chars": 2}, {"sum_logits": -1.2294621467590332, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.2294621467590332, "logits_per_char": -0.6147310733795166, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 141, "native_id": "Mercury_405947", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4301891326904297, "incorrect_loss_raw": 1.3787716229756672, "correct_loss_per_char": 0.7150945663452148, "incorrect_loss_per_char": 0.6893858114878336, "correct_loss_per_token": 1.4301891326904297, "incorrect_loss_per_token": 1.3787716229756672, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.432844877243042, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.432844877243042, "logits_per_char": -0.716422438621521, "num_chars": 2}, {"sum_logits": -1.3984805345535278, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3984805345535278, "logits_per_char": -0.6992402672767639, "num_chars": 2}, {"sum_logits": -1.4301891326904297, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4301891326904297, "logits_per_char": -0.7150945663452148, "num_chars": 2}, {"sum_logits": -1.3049894571304321, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.3049894571304321, "logits_per_char": -0.6524947285652161, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 142, "native_id": "AKDE&ED_2012_8_6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5382421016693115, "incorrect_loss_raw": 1.3477483987808228, "correct_loss_per_char": 0.7691210508346558, "incorrect_loss_per_char": 0.6738741993904114, "correct_loss_per_token": 1.5382421016693115, "incorrect_loss_per_token": 1.3477483987808228, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5382421016693115, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5382421016693115, "logits_per_char": -0.7691210508346558, "num_chars": 2}, {"sum_logits": -1.3613325357437134, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3613325357437134, "logits_per_char": -0.6806662678718567, "num_chars": 2}, {"sum_logits": -1.4267241954803467, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4267241954803467, "logits_per_char": -0.7133620977401733, "num_chars": 2}, {"sum_logits": -1.2551884651184082, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.2551884651184082, "logits_per_char": -0.6275942325592041, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 143, "native_id": "Mercury_7011130", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4096916913986206, "incorrect_loss_raw": 1.3905071417490642, "correct_loss_per_char": 0.7048458456993103, "incorrect_loss_per_char": 0.6952535708745321, "correct_loss_per_token": 1.4096916913986206, "incorrect_loss_per_token": 1.3905071417490642, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4096916913986206, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.4096916913986206, "logits_per_char": -0.7048458456993103, "num_chars": 2}, {"sum_logits": -1.4240949153900146, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.4240949153900146, "logits_per_char": -0.7120474576950073, "num_chars": 2}, {"sum_logits": -1.5053093433380127, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.5053093433380127, "logits_per_char": -0.7526546716690063, "num_chars": 2}, {"sum_logits": -1.242117166519165, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.242117166519165, "logits_per_char": -0.6210585832595825, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 144, "native_id": "Mercury_LBS11022", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4093763828277588, "incorrect_loss_raw": 1.3882675170898438, "correct_loss_per_char": 0.7046881914138794, "incorrect_loss_per_char": 0.6941337585449219, "correct_loss_per_token": 1.4093763828277588, "incorrect_loss_per_token": 1.3882675170898438, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4093763828277588, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4093763828277588, "logits_per_char": -0.7046881914138794, "num_chars": 2}, {"sum_logits": -1.3790851831436157, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3790851831436157, "logits_per_char": -0.6895425915718079, "num_chars": 2}, {"sum_logits": -1.4844226837158203, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4844226837158203, "logits_per_char": -0.7422113418579102, "num_chars": 2}, {"sum_logits": -1.3012946844100952, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.3012946844100952, "logits_per_char": -0.6506473422050476, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 145, "native_id": "TIMSS_1995_8_J1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4318323135375977, "incorrect_loss_raw": 1.3795565764109294, "correct_loss_per_char": 0.7159161567687988, "incorrect_loss_per_char": 0.6897782882054647, "correct_loss_per_token": 1.4318323135375977, "incorrect_loss_per_token": 1.3795565764109294, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4432218074798584, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.4432218074798584, "logits_per_char": -0.7216109037399292, "num_chars": 2}, {"sum_logits": -1.2790402173995972, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": true, "logits_per_token": -1.2790402173995972, "logits_per_char": -0.6395201086997986, "num_chars": 2}, {"sum_logits": -1.4318323135375977, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.4318323135375977, "logits_per_char": -0.7159161567687988, "num_chars": 2}, {"sum_logits": -1.4164077043533325, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.4164077043533325, "logits_per_char": -0.7082038521766663, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 146, "native_id": "Mercury_SC_408366", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3416335582733154, "incorrect_loss_raw": 1.40861980120341, "correct_loss_per_char": 0.6708167791366577, "incorrect_loss_per_char": 0.704309900601705, "correct_loss_per_token": 1.3416335582733154, "incorrect_loss_per_token": 1.40861980120341, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3604828119277954, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3604828119277954, "logits_per_char": -0.6802414059638977, "num_chars": 2}, {"sum_logits": -1.3416335582733154, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.3416335582733154, "logits_per_char": -0.6708167791366577, "num_chars": 2}, {"sum_logits": -1.4150898456573486, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4150898456573486, "logits_per_char": -0.7075449228286743, "num_chars": 2}, {"sum_logits": -1.4502867460250854, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4502867460250854, "logits_per_char": -0.7251433730125427, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 147, "native_id": "Mercury_7009993", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.364174485206604, "incorrect_loss_raw": 1.403128703435262, "correct_loss_per_char": 0.682087242603302, "incorrect_loss_per_char": 0.701564351717631, "correct_loss_per_token": 1.364174485206604, "incorrect_loss_per_token": 1.403128703435262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.364174485206604, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.364174485206604, "logits_per_char": -0.682087242603302, "num_chars": 2}, {"sum_logits": -1.316830039024353, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.316830039024353, "logits_per_char": -0.6584150195121765, "num_chars": 2}, {"sum_logits": -1.497174620628357, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.497174620628357, "logits_per_char": -0.7485873103141785, "num_chars": 2}, {"sum_logits": -1.3953814506530762, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3953814506530762, "logits_per_char": -0.6976907253265381, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 148, "native_id": "Mercury_401699", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.147487998008728, "incorrect_loss_raw": 1.4965437253316243, "correct_loss_per_char": 0.573743999004364, "incorrect_loss_per_char": 0.7482718626658121, "correct_loss_per_token": 1.147487998008728, "incorrect_loss_per_token": 1.4965437253316243, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5391072034835815, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.5391072034835815, "logits_per_char": -0.7695536017417908, "num_chars": 2}, {"sum_logits": -1.582161784172058, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.582161784172058, "logits_per_char": -0.791080892086029, "num_chars": 2}, {"sum_logits": -1.147487998008728, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.147487998008728, "logits_per_char": -0.573743999004364, "num_chars": 2}, {"sum_logits": -1.3683621883392334, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3683621883392334, "logits_per_char": -0.6841810941696167, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 149, "native_id": "Mercury_7056858", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3914794921875, "incorrect_loss_raw": 1.3918997446695964, "correct_loss_per_char": 0.69573974609375, "incorrect_loss_per_char": 0.6959498723347982, "correct_loss_per_token": 1.3914794921875, "incorrect_loss_per_token": 1.3918997446695964, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3914794921875, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3914794921875, "logits_per_char": -0.69573974609375, "num_chars": 2}, {"sum_logits": -1.392409324645996, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.392409324645996, "logits_per_char": -0.696204662322998, "num_chars": 2}, {"sum_logits": -1.4416364431381226, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4416364431381226, "logits_per_char": -0.7208182215690613, "num_chars": 2}, {"sum_logits": -1.3416534662246704, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.3416534662246704, "logits_per_char": -0.6708267331123352, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 150, "native_id": "Mercury_7027160", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4513099193572998, "incorrect_loss_raw": 1.3722002903620403, "correct_loss_per_char": 0.7256549596786499, "incorrect_loss_per_char": 0.6861001451810201, "correct_loss_per_token": 1.4513099193572998, "incorrect_loss_per_token": 1.3722002903620403, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4338592290878296, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4338592290878296, "logits_per_char": -0.7169296145439148, "num_chars": 2}, {"sum_logits": -1.3137109279632568, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.3137109279632568, "logits_per_char": -0.6568554639816284, "num_chars": 2}, {"sum_logits": -1.4513099193572998, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4513099193572998, "logits_per_char": -0.7256549596786499, "num_chars": 2}, {"sum_logits": -1.3690307140350342, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3690307140350342, "logits_per_char": -0.6845153570175171, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 151, "native_id": "Mercury_400811", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3886466026306152, "incorrect_loss_raw": 1.3958054780960083, "correct_loss_per_char": 0.6943233013153076, "incorrect_loss_per_char": 0.6979027390480042, "correct_loss_per_token": 1.3886466026306152, "incorrect_loss_per_token": 1.3958054780960083, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3468035459518433, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3468035459518433, "logits_per_char": -0.6734017729759216, "num_chars": 2}, {"sum_logits": -1.3278454542160034, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.3278454542160034, "logits_per_char": -0.6639227271080017, "num_chars": 2}, {"sum_logits": -1.5127674341201782, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.5127674341201782, "logits_per_char": -0.7563837170600891, "num_chars": 2}, {"sum_logits": -1.3886466026306152, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3886466026306152, "logits_per_char": -0.6943233013153076, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 152, "native_id": "Mercury_SC_400062", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4865753650665283, "incorrect_loss_raw": 1.3666819334030151, "correct_loss_per_char": 0.7432876825332642, "incorrect_loss_per_char": 0.6833409667015076, "correct_loss_per_token": 1.4865753650665283, "incorrect_loss_per_token": 1.3666819334030151, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2408429384231567, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2408429384231567, "logits_per_char": -0.6204214692115784, "num_chars": 2}, {"sum_logits": -1.3417404890060425, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3417404890060425, "logits_per_char": -0.6708702445030212, "num_chars": 2}, {"sum_logits": -1.5174623727798462, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5174623727798462, "logits_per_char": -0.7587311863899231, "num_chars": 2}, {"sum_logits": -1.4865753650665283, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4865753650665283, "logits_per_char": -0.7432876825332642, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 153, "native_id": "Mercury_400699", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.417904019355774, "incorrect_loss_raw": 1.381827433904012, "correct_loss_per_char": 0.708952009677887, "incorrect_loss_per_char": 0.690913716952006, "correct_loss_per_token": 1.417904019355774, "incorrect_loss_per_token": 1.381827433904012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3771342039108276, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3771342039108276, "logits_per_char": -0.6885671019554138, "num_chars": 2}, {"sum_logits": -1.417904019355774, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.417904019355774, "logits_per_char": -0.708952009677887, "num_chars": 2}, {"sum_logits": -1.446129322052002, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.446129322052002, "logits_per_char": -0.723064661026001, "num_chars": 2}, {"sum_logits": -1.3222187757492065, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.3222187757492065, "logits_per_char": -0.6611093878746033, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 154, "native_id": "Mercury_7029803", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.415191888809204, "incorrect_loss_raw": 1.383363405863444, "correct_loss_per_char": 0.707595944404602, "incorrect_loss_per_char": 0.691681702931722, "correct_loss_per_token": 1.415191888809204, "incorrect_loss_per_token": 1.383363405863444, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3609451055526733, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3609451055526733, "logits_per_char": -0.6804725527763367, "num_chars": 2}, {"sum_logits": -1.415191888809204, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.415191888809204, "logits_per_char": -0.707595944404602, "num_chars": 2}, {"sum_logits": -1.3403749465942383, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.3403749465942383, "logits_per_char": -0.6701874732971191, "num_chars": 2}, {"sum_logits": -1.4487701654434204, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4487701654434204, "logits_per_char": -0.7243850827217102, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 155, "native_id": "Mercury_SC_401372", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4951640367507935, "incorrect_loss_raw": 1.364721377690633, "correct_loss_per_char": 0.7475820183753967, "incorrect_loss_per_char": 0.6823606888453165, "correct_loss_per_token": 1.4951640367507935, "incorrect_loss_per_token": 1.364721377690633, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2345889806747437, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.2345889806747437, "logits_per_char": -0.6172944903373718, "num_chars": 2}, {"sum_logits": -1.3787683248519897, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3787683248519897, "logits_per_char": -0.6893841624259949, "num_chars": 2}, {"sum_logits": -1.480806827545166, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.480806827545166, "logits_per_char": -0.740403413772583, "num_chars": 2}, {"sum_logits": -1.4951640367507935, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4951640367507935, "logits_per_char": -0.7475820183753967, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 156, "native_id": "Mercury_7271128", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1118144989013672, "incorrect_loss_raw": 1.5149225393931072, "correct_loss_per_char": 0.5559072494506836, "incorrect_loss_per_char": 0.7574612696965536, "correct_loss_per_token": 1.1118144989013672, "incorrect_loss_per_token": 1.5149225393931072, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.418783187866211, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.418783187866211, "logits_per_char": -0.7093915939331055, "num_chars": 2}, {"sum_logits": -1.6792173385620117, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.6792173385620117, "logits_per_char": -0.8396086692810059, "num_chars": 2}, {"sum_logits": -1.1118144989013672, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.1118144989013672, "logits_per_char": -0.5559072494506836, "num_chars": 2}, {"sum_logits": -1.4467670917510986, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4467670917510986, "logits_per_char": -0.7233835458755493, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 157, "native_id": "Mercury_407260", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3826041221618652, "incorrect_loss_raw": 1.409980336825053, "correct_loss_per_char": 0.6913020610809326, "incorrect_loss_per_char": 0.7049901684125265, "correct_loss_per_token": 1.3826041221618652, "incorrect_loss_per_token": 1.409980336825053, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1572558879852295, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.1572558879852295, "logits_per_char": -0.5786279439926147, "num_chars": 2}, {"sum_logits": -1.3826041221618652, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3826041221618652, "logits_per_char": -0.6913020610809326, "num_chars": 2}, {"sum_logits": -1.4801414012908936, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4801414012908936, "logits_per_char": -0.7400707006454468, "num_chars": 2}, {"sum_logits": -1.5925437211990356, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5925437211990356, "logits_per_char": -0.7962718605995178, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 158, "native_id": "Mercury_SC_416155", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4170725345611572, "incorrect_loss_raw": 1.382936676343282, "correct_loss_per_char": 0.7085362672805786, "incorrect_loss_per_char": 0.691468338171641, "correct_loss_per_token": 1.4170725345611572, "incorrect_loss_per_token": 1.382936676343282, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3672579526901245, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3672579526901245, "logits_per_char": -0.6836289763450623, "num_chars": 2}, {"sum_logits": -1.4383717775344849, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4383717775344849, "logits_per_char": -0.7191858887672424, "num_chars": 2}, {"sum_logits": -1.4170725345611572, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4170725345611572, "logits_per_char": -0.7085362672805786, "num_chars": 2}, {"sum_logits": -1.3431802988052368, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.3431802988052368, "logits_per_char": -0.6715901494026184, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 159, "native_id": "Mercury_402145", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.384367823600769, "incorrect_loss_raw": 1.3930003643035889, "correct_loss_per_char": 0.6921839118003845, "incorrect_loss_per_char": 0.6965001821517944, "correct_loss_per_token": 1.384367823600769, "incorrect_loss_per_token": 1.3930003643035889, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4412996768951416, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4412996768951416, "logits_per_char": -0.7206498384475708, "num_chars": 2}, {"sum_logits": -1.384367823600769, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.384367823600769, "logits_per_char": -0.6921839118003845, "num_chars": 2}, {"sum_logits": -1.3441884517669678, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.3441884517669678, "logits_per_char": -0.6720942258834839, "num_chars": 2}, {"sum_logits": -1.3935129642486572, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3935129642486572, "logits_per_char": -0.6967564821243286, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 160, "native_id": "AIMS_2009_4_5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4198259115219116, "incorrect_loss_raw": 1.3821499745051067, "correct_loss_per_char": 0.7099129557609558, "incorrect_loss_per_char": 0.6910749872525533, "correct_loss_per_token": 1.4198259115219116, "incorrect_loss_per_token": 1.3821499745051067, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4198259115219116, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4198259115219116, "logits_per_char": -0.7099129557609558, "num_chars": 2}, {"sum_logits": -1.3353559970855713, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.3353559970855713, "logits_per_char": -0.6676779985427856, "num_chars": 2}, {"sum_logits": -1.4342665672302246, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4342665672302246, "logits_per_char": -0.7171332836151123, "num_chars": 2}, {"sum_logits": -1.376827359199524, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.376827359199524, "logits_per_char": -0.688413679599762, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 161, "native_id": "TIMSS_2003_4_pg7", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3950363397598267, "incorrect_loss_raw": 1.3933825890223186, "correct_loss_per_char": 0.6975181698799133, "incorrect_loss_per_char": 0.6966912945111593, "correct_loss_per_token": 1.3950363397598267, "incorrect_loss_per_token": 1.3933825890223186, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.398310899734497, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.398310899734497, "logits_per_char": -0.6991554498672485, "num_chars": 2}, {"sum_logits": -1.4407835006713867, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4407835006713867, "logits_per_char": -0.7203917503356934, "num_chars": 2}, {"sum_logits": -1.3410533666610718, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.3410533666610718, "logits_per_char": -0.6705266833305359, "num_chars": 2}, {"sum_logits": -1.3950363397598267, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3950363397598267, "logits_per_char": -0.6975181698799133, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 162, "native_id": "Mercury_7142415", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5434515476226807, "incorrect_loss_raw": 1.35804283618927, "correct_loss_per_char": 0.7717257738113403, "incorrect_loss_per_char": 0.679021418094635, "correct_loss_per_token": 1.5434515476226807, "incorrect_loss_per_token": 1.35804283618927, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5434515476226807, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.5434515476226807, "logits_per_char": -0.7717257738113403, "num_chars": 2}, {"sum_logits": -1.4336109161376953, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.4336109161376953, "logits_per_char": -0.7168054580688477, "num_chars": 2}, {"sum_logits": -1.5234707593917847, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.5234707593917847, "logits_per_char": -0.7617353796958923, "num_chars": 2}, {"sum_logits": -1.11704683303833, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": true, "logits_per_token": -1.11704683303833, "logits_per_char": -0.558523416519165, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 163, "native_id": "Mercury_7212818", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4383748769760132, "incorrect_loss_raw": 1.3837439219156902, "correct_loss_per_char": 0.7191874384880066, "incorrect_loss_per_char": 0.6918719609578451, "correct_loss_per_token": 1.4383748769760132, "incorrect_loss_per_token": 1.3837439219156902, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.201751947402954, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.201751947402954, "logits_per_char": -0.600875973701477, "num_chars": 2}, {"sum_logits": -1.4383748769760132, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4383748769760132, "logits_per_char": -0.7191874384880066, "num_chars": 2}, {"sum_logits": -1.5267330408096313, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.5267330408096313, "logits_per_char": -0.7633665204048157, "num_chars": 2}, {"sum_logits": -1.4227467775344849, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4227467775344849, "logits_per_char": -0.7113733887672424, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 164, "native_id": "Mercury_SC_413299", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.54500150680542, "incorrect_loss_raw": 1.348035415013631, "correct_loss_per_char": 0.77250075340271, "incorrect_loss_per_char": 0.6740177075068156, "correct_loss_per_token": 1.54500150680542, "incorrect_loss_per_token": 1.348035415013631, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.54500150680542, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.54500150680542, "logits_per_char": -0.77250075340271, "num_chars": 2}, {"sum_logits": -1.4736883640289307, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.4736883640289307, "logits_per_char": -0.7368441820144653, "num_chars": 2}, {"sum_logits": -1.32298743724823, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.32298743724823, "logits_per_char": -0.661493718624115, "num_chars": 2}, {"sum_logits": -1.247430443763733, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.247430443763733, "logits_per_char": -0.6237152218818665, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 165, "native_id": "Mercury_7132020", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.438672423362732, "incorrect_loss_raw": 1.4058027664820354, "correct_loss_per_char": 0.719336211681366, "incorrect_loss_per_char": 0.7029013832410177, "correct_loss_per_token": 1.438672423362732, "incorrect_loss_per_token": 1.4058027664820354, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.438672423362732, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.438672423362732, "logits_per_char": -0.719336211681366, "num_chars": 2}, {"sum_logits": -1.6041104793548584, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.6041104793548584, "logits_per_char": -0.8020552396774292, "num_chars": 2}, {"sum_logits": -1.0790477991104126, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.0790477991104126, "logits_per_char": -0.5395238995552063, "num_chars": 2}, {"sum_logits": -1.534250020980835, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.534250020980835, "logits_per_char": -0.7671250104904175, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 166, "native_id": "MEA_2014_8_10", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.9718114137649536, "incorrect_loss_raw": 1.4182405074437459, "correct_loss_per_char": 0.9859057068824768, "incorrect_loss_per_char": 0.7091202537218729, "correct_loss_per_token": 1.9718114137649536, "incorrect_loss_per_token": 1.4182405074437459, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7422369718551636, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -0.7422369718551636, "logits_per_char": -0.3711184859275818, "num_chars": 2}, {"sum_logits": -1.4523682594299316, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4523682594299316, "logits_per_char": -0.7261841297149658, "num_chars": 2}, {"sum_logits": -1.9718114137649536, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.9718114137649536, "logits_per_char": -0.9859057068824768, "num_chars": 2}, {"sum_logits": -2.0601162910461426, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -2.0601162910461426, "logits_per_char": -1.0300581455230713, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 167, "native_id": "TIMSS_1995_8_N2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3716421127319336, "incorrect_loss_raw": 1.410338004430135, "correct_loss_per_char": 0.6858210563659668, "incorrect_loss_per_char": 0.7051690022150675, "correct_loss_per_token": 1.3716421127319336, "incorrect_loss_per_token": 1.410338004430135, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6061979532241821, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.6061979532241821, "logits_per_char": -0.8030989766120911, "num_chars": 2}, {"sum_logits": -1.3716421127319336, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3716421127319336, "logits_per_char": -0.6858210563659668, "num_chars": 2}, {"sum_logits": -1.4253727197647095, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4253727197647095, "logits_per_char": -0.7126863598823547, "num_chars": 2}, {"sum_logits": -1.1994433403015137, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.1994433403015137, "logits_per_char": -0.5997216701507568, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 168, "native_id": "Mercury_7024465", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.351754069328308, "incorrect_loss_raw": 1.407613714536031, "correct_loss_per_char": 0.675877034664154, "incorrect_loss_per_char": 0.7038068572680155, "correct_loss_per_token": 1.351754069328308, "incorrect_loss_per_token": 1.407613714536031, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.351754069328308, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.351754069328308, "logits_per_char": -0.675877034664154, "num_chars": 2}, {"sum_logits": -1.2984912395477295, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2984912395477295, "logits_per_char": -0.6492456197738647, "num_chars": 2}, {"sum_logits": -1.5229836702346802, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5229836702346802, "logits_per_char": -0.7614918351173401, "num_chars": 2}, {"sum_logits": -1.4013662338256836, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4013662338256836, "logits_per_char": -0.7006831169128418, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 169, "native_id": "Mercury_SC_415762", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3882426023483276, "incorrect_loss_raw": 1.3926202058792114, "correct_loss_per_char": 0.6941213011741638, "incorrect_loss_per_char": 0.6963101029396057, "correct_loss_per_token": 1.3882426023483276, "incorrect_loss_per_token": 1.3926202058792114, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3546453714370728, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.3546453714370728, "logits_per_char": -0.6773226857185364, "num_chars": 2}, {"sum_logits": -1.4664936065673828, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4664936065673828, "logits_per_char": -0.7332468032836914, "num_chars": 2}, {"sum_logits": -1.3567216396331787, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3567216396331787, "logits_per_char": -0.6783608198165894, "num_chars": 2}, {"sum_logits": -1.3882426023483276, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3882426023483276, "logits_per_char": -0.6941213011741638, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 170, "native_id": "Mercury_415093", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4503355026245117, "incorrect_loss_raw": 1.3804296652475994, "correct_loss_per_char": 0.7251677513122559, "incorrect_loss_per_char": 0.6902148326237997, "correct_loss_per_token": 1.4503355026245117, "incorrect_loss_per_token": 1.3804296652475994, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.541059970855713, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.541059970855713, "logits_per_char": -0.7705299854278564, "num_chars": 2}, {"sum_logits": -1.2074564695358276, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": true, "logits_per_token": -1.2074564695358276, "logits_per_char": -0.6037282347679138, "num_chars": 2}, {"sum_logits": -1.3927725553512573, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.3927725553512573, "logits_per_char": -0.6963862776756287, "num_chars": 2}, {"sum_logits": -1.4503355026245117, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.4503355026245117, "logits_per_char": -0.7251677513122559, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 171, "native_id": "LEAP_2005_8_10404", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2588566541671753, "incorrect_loss_raw": 1.4384831190109253, "correct_loss_per_char": 0.6294283270835876, "incorrect_loss_per_char": 0.7192415595054626, "correct_loss_per_token": 1.2588566541671753, "incorrect_loss_per_token": 1.4384831190109253, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4658317565917969, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4658317565917969, "logits_per_char": -0.7329158782958984, "num_chars": 2}, {"sum_logits": -1.3992873430252075, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3992873430252075, "logits_per_char": -0.6996436715126038, "num_chars": 2}, {"sum_logits": -1.4503302574157715, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4503302574157715, "logits_per_char": -0.7251651287078857, "num_chars": 2}, {"sum_logits": -1.2588566541671753, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.2588566541671753, "logits_per_char": -0.6294283270835876, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 172, "native_id": "AIMS_2008_8_6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.395702838897705, "incorrect_loss_raw": 1.3960902690887451, "correct_loss_per_char": 0.6978514194488525, "incorrect_loss_per_char": 0.6980451345443726, "correct_loss_per_token": 1.395702838897705, "incorrect_loss_per_token": 1.3960902690887451, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5289758443832397, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.5289758443832397, "logits_per_char": -0.7644879221916199, "num_chars": 2}, {"sum_logits": -1.395702838897705, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.395702838897705, "logits_per_char": -0.6978514194488525, "num_chars": 2}, {"sum_logits": -1.4221159219741821, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4221159219741821, "logits_per_char": -0.7110579609870911, "num_chars": 2}, {"sum_logits": -1.2371790409088135, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.2371790409088135, "logits_per_char": -0.6185895204544067, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 173, "native_id": "Mercury_7057173", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.29816472530365, "incorrect_loss_raw": 1.4272875388463337, "correct_loss_per_char": 0.649082362651825, "incorrect_loss_per_char": 0.7136437694231669, "correct_loss_per_token": 1.29816472530365, "incorrect_loss_per_token": 1.4272875388463337, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4557386636734009, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4557386636734009, "logits_per_char": -0.7278693318367004, "num_chars": 2}, {"sum_logits": -1.2919009923934937, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2919009923934937, "logits_per_char": -0.6459504961967468, "num_chars": 2}, {"sum_logits": -1.534222960472107, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.534222960472107, "logits_per_char": -0.7671114802360535, "num_chars": 2}, {"sum_logits": -1.29816472530365, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.29816472530365, "logits_per_char": -0.649082362651825, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 174, "native_id": "TIMSS_2007_8_pg60", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5022050142288208, "incorrect_loss_raw": 1.3599743445714314, "correct_loss_per_char": 0.7511025071144104, "incorrect_loss_per_char": 0.6799871722857157, "correct_loss_per_token": 1.5022050142288208, "incorrect_loss_per_token": 1.3599743445714314, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3467066287994385, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.3467066287994385, "logits_per_char": -0.6733533143997192, "num_chars": 2}, {"sum_logits": -1.3495862483978271, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3495862483978271, "logits_per_char": -0.6747931241989136, "num_chars": 2}, {"sum_logits": -1.5022050142288208, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.5022050142288208, "logits_per_char": -0.7511025071144104, "num_chars": 2}, {"sum_logits": -1.3836301565170288, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3836301565170288, "logits_per_char": -0.6918150782585144, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 175, "native_id": "AIMS_2009_8_14", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.329716682434082, "incorrect_loss_raw": 1.4140984217325847, "correct_loss_per_char": 0.664858341217041, "incorrect_loss_per_char": 0.7070492108662924, "correct_loss_per_token": 1.329716682434082, "incorrect_loss_per_token": 1.4140984217325847, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.329716682434082, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": true, "logits_per_token": -1.329716682434082, "logits_per_char": -0.664858341217041, "num_chars": 2}, {"sum_logits": -1.3719782829284668, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.3719782829284668, "logits_per_char": -0.6859891414642334, "num_chars": 2}, {"sum_logits": -1.4265122413635254, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.4265122413635254, "logits_per_char": -0.7132561206817627, "num_chars": 2}, {"sum_logits": -1.4438047409057617, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.4438047409057617, "logits_per_char": -0.7219023704528809, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 176, "native_id": "Mercury_185010", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4623816013336182, "incorrect_loss_raw": 1.3675914605458577, "correct_loss_per_char": 0.7311908006668091, "incorrect_loss_per_char": 0.6837957302729288, "correct_loss_per_token": 1.4623816013336182, "incorrect_loss_per_token": 1.3675914605458577, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4623816013336182, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4623816013336182, "logits_per_char": -0.7311908006668091, "num_chars": 2}, {"sum_logits": -1.346786379814148, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.346786379814148, "logits_per_char": -0.673393189907074, "num_chars": 2}, {"sum_logits": -1.3701237440109253, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3701237440109253, "logits_per_char": -0.6850618720054626, "num_chars": 2}, {"sum_logits": -1.3858642578125, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3858642578125, "logits_per_char": -0.69293212890625, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 177, "native_id": "Mercury_7206938", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1537798643112183, "incorrect_loss_raw": 1.4859191179275513, "correct_loss_per_char": 0.5768899321556091, "incorrect_loss_per_char": 0.7429595589637756, "correct_loss_per_token": 1.1537798643112183, "incorrect_loss_per_token": 1.4859191179275513, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1537798643112183, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.1537798643112183, "logits_per_char": -0.5768899321556091, "num_chars": 2}, {"sum_logits": -1.4188644886016846, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4188644886016846, "logits_per_char": -0.7094322443008423, "num_chars": 2}, {"sum_logits": -1.4568018913269043, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4568018913269043, "logits_per_char": -0.7284009456634521, "num_chars": 2}, {"sum_logits": -1.582090973854065, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.582090973854065, "logits_per_char": -0.7910454869270325, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 178, "native_id": "Mercury_402501", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3274518251419067, "incorrect_loss_raw": 1.4237722555796306, "correct_loss_per_char": 0.6637259125709534, "incorrect_loss_per_char": 0.7118861277898153, "correct_loss_per_token": 1.3274518251419067, "incorrect_loss_per_token": 1.4237722555796306, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5984183549880981, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.5984183549880981, "logits_per_char": -0.7992091774940491, "num_chars": 2}, {"sum_logits": -1.3274518251419067, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3274518251419067, "logits_per_char": -0.6637259125709534, "num_chars": 2}, {"sum_logits": -1.4525419473648071, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4525419473648071, "logits_per_char": -0.7262709736824036, "num_chars": 2}, {"sum_logits": -1.2203564643859863, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2203564643859863, "logits_per_char": -0.6101782321929932, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 179, "native_id": "MCAS_2011_8_15365", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5051361322402954, "incorrect_loss_raw": 1.3569842179616292, "correct_loss_per_char": 0.7525680661201477, "incorrect_loss_per_char": 0.6784921089808146, "correct_loss_per_token": 1.5051361322402954, "incorrect_loss_per_token": 1.3569842179616292, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3122050762176514, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.3122050762176514, "logits_per_char": -0.6561025381088257, "num_chars": 2}, {"sum_logits": -1.3617175817489624, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3617175817489624, "logits_per_char": -0.6808587908744812, "num_chars": 2}, {"sum_logits": -1.5051361322402954, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.5051361322402954, "logits_per_char": -0.7525680661201477, "num_chars": 2}, {"sum_logits": -1.397029995918274, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.397029995918274, "logits_per_char": -0.698514997959137, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 180, "native_id": "Mercury_SC_401766", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4821126461029053, "incorrect_loss_raw": 1.3633371988932292, "correct_loss_per_char": 0.7410563230514526, "incorrect_loss_per_char": 0.6816685994466146, "correct_loss_per_token": 1.4821126461029053, "incorrect_loss_per_token": 1.3633371988932292, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4821126461029053, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4821126461029053, "logits_per_char": -0.7410563230514526, "num_chars": 2}, {"sum_logits": -1.4368178844451904, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4368178844451904, "logits_per_char": -0.7184089422225952, "num_chars": 2}, {"sum_logits": -1.3375608921051025, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3375608921051025, "logits_per_char": -0.6687804460525513, "num_chars": 2}, {"sum_logits": -1.3156328201293945, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.3156328201293945, "logits_per_char": -0.6578164100646973, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 181, "native_id": "Mercury_7162400", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2569228410720825, "incorrect_loss_raw": 1.4439884424209595, "correct_loss_per_char": 0.6284614205360413, "incorrect_loss_per_char": 0.7219942212104797, "correct_loss_per_token": 1.2569228410720825, "incorrect_loss_per_token": 1.4439884424209595, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.51316237449646, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.51316237449646, "logits_per_char": -0.75658118724823, "num_chars": 2}, {"sum_logits": -1.2949525117874146, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.2949525117874146, "logits_per_char": -0.6474762558937073, "num_chars": 2}, {"sum_logits": -1.523850440979004, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.523850440979004, "logits_per_char": -0.761925220489502, "num_chars": 2}, {"sum_logits": -1.2569228410720825, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": true, "logits_per_token": -1.2569228410720825, "logits_per_char": -0.6284614205360413, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 182, "native_id": "Mercury_7086695", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.460150122642517, "incorrect_loss_raw": 1.3699838320414226, "correct_loss_per_char": 0.7300750613212585, "incorrect_loss_per_char": 0.6849919160207113, "correct_loss_per_token": 1.460150122642517, "incorrect_loss_per_token": 1.3699838320414226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.329150915145874, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.329150915145874, "logits_per_char": -0.664575457572937, "num_chars": 2}, {"sum_logits": -1.3312238454818726, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3312238454818726, "logits_per_char": -0.6656119227409363, "num_chars": 2}, {"sum_logits": -1.460150122642517, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.460150122642517, "logits_per_char": -0.7300750613212585, "num_chars": 2}, {"sum_logits": -1.449576735496521, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.449576735496521, "logits_per_char": -0.7247883677482605, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 183, "native_id": "Mercury_SC_402994", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.485764980316162, "incorrect_loss_raw": 1.3628419240315754, "correct_loss_per_char": 0.742882490158081, "incorrect_loss_per_char": 0.6814209620157877, "correct_loss_per_token": 1.485764980316162, "incorrect_loss_per_token": 1.3628419240315754, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3693217039108276, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3693217039108276, "logits_per_char": -0.6846608519554138, "num_chars": 2}, {"sum_logits": -1.4548670053482056, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4548670053482056, "logits_per_char": -0.7274335026741028, "num_chars": 2}, {"sum_logits": -1.485764980316162, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.485764980316162, "logits_per_char": -0.742882490158081, "num_chars": 2}, {"sum_logits": -1.2643370628356934, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.2643370628356934, "logits_per_char": -0.6321685314178467, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 184, "native_id": "Mercury_7056298", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5212022066116333, "incorrect_loss_raw": 1.3515195449193318, "correct_loss_per_char": 0.7606011033058167, "incorrect_loss_per_char": 0.6757597724596659, "correct_loss_per_token": 1.5212022066116333, "incorrect_loss_per_token": 1.3515195449193318, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5212022066116333, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.5212022066116333, "logits_per_char": -0.7606011033058167, "num_chars": 2}, {"sum_logits": -1.448679804801941, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.448679804801941, "logits_per_char": -0.7243399024009705, "num_chars": 2}, {"sum_logits": -1.2934130430221558, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.2934130430221558, "logits_per_char": -0.6467065215110779, "num_chars": 2}, {"sum_logits": -1.312465786933899, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.312465786933899, "logits_per_char": -0.6562328934669495, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 185, "native_id": "Mercury_409115", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.443321943283081, "incorrect_loss_raw": 1.3866613308588664, "correct_loss_per_char": 0.7216609716415405, "incorrect_loss_per_char": 0.6933306654294332, "correct_loss_per_token": 1.443321943283081, "incorrect_loss_per_token": 1.3866613308588664, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.483452558517456, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.483452558517456, "logits_per_char": -0.741726279258728, "num_chars": 2}, {"sum_logits": -1.5285072326660156, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.5285072326660156, "logits_per_char": -0.7642536163330078, "num_chars": 2}, {"sum_logits": -1.443321943283081, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.443321943283081, "logits_per_char": -0.7216609716415405, "num_chars": 2}, {"sum_logits": -1.1480242013931274, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": true, "logits_per_token": -1.1480242013931274, "logits_per_char": -0.5740121006965637, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 186, "native_id": "Mercury_409647", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2348320484161377, "incorrect_loss_raw": 1.448162833849589, "correct_loss_per_char": 0.6174160242080688, "incorrect_loss_per_char": 0.7240814169247946, "correct_loss_per_token": 1.2348320484161377, "incorrect_loss_per_token": 1.448162833849589, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4637705087661743, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": false, "logits_per_token": -1.4637705087661743, "logits_per_char": -0.7318852543830872, "num_chars": 2}, {"sum_logits": -1.4029366970062256, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": false, "logits_per_token": -1.4029366970062256, "logits_per_char": -0.7014683485031128, "num_chars": 2}, {"sum_logits": -1.4777812957763672, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": false, "logits_per_token": -1.4777812957763672, "logits_per_char": -0.7388906478881836, "num_chars": 2}, {"sum_logits": -1.2348320484161377, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": true, "logits_per_token": -1.2348320484161377, "logits_per_char": -0.6174160242080688, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 187, "native_id": "Mercury_414352", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3443185091018677, "incorrect_loss_raw": 1.4063104391098022, "correct_loss_per_char": 0.6721592545509338, "incorrect_loss_per_char": 0.7031552195549011, "correct_loss_per_token": 1.3443185091018677, "incorrect_loss_per_token": 1.4063104391098022, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3443185091018677, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.3443185091018677, "logits_per_char": -0.6721592545509338, "num_chars": 2}, {"sum_logits": -1.399395227432251, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.399395227432251, "logits_per_char": -0.6996976137161255, "num_chars": 2}, {"sum_logits": -1.4219894409179688, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4219894409179688, "logits_per_char": -0.7109947204589844, "num_chars": 2}, {"sum_logits": -1.397546648979187, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.397546648979187, "logits_per_char": -0.6987733244895935, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 188, "native_id": "Mercury_185325", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4700019359588623, "incorrect_loss_raw": 1.3658448060353596, "correct_loss_per_char": 0.7350009679794312, "incorrect_loss_per_char": 0.6829224030176798, "correct_loss_per_token": 1.4700019359588623, "incorrect_loss_per_token": 1.3658448060353596, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3381946086883545, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3381946086883545, "logits_per_char": -0.6690973043441772, "num_chars": 2}, {"sum_logits": -1.439047932624817, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.439047932624817, "logits_per_char": -0.7195239663124084, "num_chars": 2}, {"sum_logits": -1.4700019359588623, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4700019359588623, "logits_per_char": -0.7350009679794312, "num_chars": 2}, {"sum_logits": -1.3202918767929077, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.3202918767929077, "logits_per_char": -0.6601459383964539, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 189, "native_id": "Mercury_SC_412374", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4704052209854126, "incorrect_loss_raw": 1.367895523707072, "correct_loss_per_char": 0.7352026104927063, "incorrect_loss_per_char": 0.683947761853536, "correct_loss_per_token": 1.4704052209854126, "incorrect_loss_per_token": 1.367895523707072, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3999557495117188, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3999557495117188, "logits_per_char": -0.6999778747558594, "num_chars": 2}, {"sum_logits": -1.4450328350067139, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4450328350067139, "logits_per_char": -0.7225164175033569, "num_chars": 2}, {"sum_logits": -1.4704052209854126, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4704052209854126, "logits_per_char": -0.7352026104927063, "num_chars": 2}, {"sum_logits": -1.2586979866027832, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2586979866027832, "logits_per_char": -0.6293489933013916, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 190, "native_id": "Mercury_SC_401818", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.427091121673584, "incorrect_loss_raw": 1.404823104540507, "correct_loss_per_char": 0.713545560836792, "incorrect_loss_per_char": 0.7024115522702535, "correct_loss_per_token": 1.427091121673584, "incorrect_loss_per_token": 1.404823104540507, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6283831596374512, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.6283831596374512, "logits_per_char": -0.8141915798187256, "num_chars": 2}, {"sum_logits": -1.4862881898880005, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4862881898880005, "logits_per_char": -0.7431440949440002, "num_chars": 2}, {"sum_logits": -1.427091121673584, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.427091121673584, "logits_per_char": -0.713545560836792, "num_chars": 2}, {"sum_logits": -1.0997979640960693, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.0997979640960693, "logits_per_char": -0.5498989820480347, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 191, "native_id": "Mercury_SC_413549", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6291450262069702, "incorrect_loss_raw": 1.3374435901641846, "correct_loss_per_char": 0.8145725131034851, "incorrect_loss_per_char": 0.6687217950820923, "correct_loss_per_token": 1.6291450262069702, "incorrect_loss_per_token": 1.3374435901641846, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4724316596984863, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4724316596984863, "logits_per_char": -0.7362158298492432, "num_chars": 2}, {"sum_logits": -1.6291450262069702, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.6291450262069702, "logits_per_char": -0.8145725131034851, "num_chars": 2}, {"sum_logits": -1.4416133165359497, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4416133165359497, "logits_per_char": -0.7208066582679749, "num_chars": 2}, {"sum_logits": -1.0982857942581177, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.0982857942581177, "logits_per_char": -0.5491428971290588, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 192, "native_id": "Mercury_7093958", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.431937575340271, "incorrect_loss_raw": 1.3765720923741658, "correct_loss_per_char": 0.7159687876701355, "incorrect_loss_per_char": 0.6882860461870829, "correct_loss_per_token": 1.431937575340271, "incorrect_loss_per_token": 1.3765720923741658, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.431937575340271, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.431937575340271, "logits_per_char": -0.7159687876701355, "num_chars": 2}, {"sum_logits": -1.350359320640564, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.350359320640564, "logits_per_char": -0.675179660320282, "num_chars": 2}, {"sum_logits": -1.4097726345062256, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4097726345062256, "logits_per_char": -0.7048863172531128, "num_chars": 2}, {"sum_logits": -1.369584321975708, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.369584321975708, "logits_per_char": -0.684792160987854, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 193, "native_id": "Mercury_7102323", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3255178928375244, "incorrect_loss_raw": 1.4141714175542195, "correct_loss_per_char": 0.6627589464187622, "incorrect_loss_per_char": 0.7070857087771097, "correct_loss_per_token": 1.3255178928375244, "incorrect_loss_per_token": 1.4141714175542195, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4500048160552979, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4500048160552979, "logits_per_char": -0.7250024080276489, "num_chars": 2}, {"sum_logits": -1.3518011569976807, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3518011569976807, "logits_per_char": -0.6759005784988403, "num_chars": 2}, {"sum_logits": -1.4407082796096802, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4407082796096802, "logits_per_char": -0.7203541398048401, "num_chars": 2}, {"sum_logits": -1.3255178928375244, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.3255178928375244, "logits_per_char": -0.6627589464187622, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 194, "native_id": "Mercury_7222793", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4644523859024048, "incorrect_loss_raw": 1.371522307395935, "correct_loss_per_char": 0.7322261929512024, "incorrect_loss_per_char": 0.6857611536979675, "correct_loss_per_token": 1.4644523859024048, "incorrect_loss_per_token": 1.371522307395935, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5108518600463867, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.5108518600463867, "logits_per_char": -0.7554259300231934, "num_chars": 2}, {"sum_logits": -1.4644523859024048, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.4644523859024048, "logits_per_char": -0.7322261929512024, "num_chars": 2}, {"sum_logits": -1.3123220205307007, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.3123220205307007, "logits_per_char": -0.6561610102653503, "num_chars": 2}, {"sum_logits": -1.2913930416107178, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.2913930416107178, "logits_per_char": -0.6456965208053589, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 195, "native_id": "Mercury_SC_400701", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.477332592010498, "incorrect_loss_raw": 1.3659745852152507, "correct_loss_per_char": 0.738666296005249, "incorrect_loss_per_char": 0.6829872926076254, "correct_loss_per_token": 1.477332592010498, "incorrect_loss_per_token": 1.3659745852152507, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.477332592010498, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.477332592010498, "logits_per_char": -0.738666296005249, "num_chars": 2}, {"sum_logits": -1.477036714553833, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.477036714553833, "logits_per_char": -0.7385183572769165, "num_chars": 2}, {"sum_logits": -1.3532222509384155, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3532222509384155, "logits_per_char": -0.6766111254692078, "num_chars": 2}, {"sum_logits": -1.2676647901535034, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2676647901535034, "logits_per_char": -0.6338323950767517, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 196, "native_id": "Mercury_409301", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4755733013153076, "incorrect_loss_raw": 1.3691572745641072, "correct_loss_per_char": 0.7377866506576538, "incorrect_loss_per_char": 0.6845786372820536, "correct_loss_per_token": 1.4755733013153076, "incorrect_loss_per_token": 1.3691572745641072, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4080826044082642, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.4080826044082642, "logits_per_char": -0.7040413022041321, "num_chars": 2}, {"sum_logits": -1.4755733013153076, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.4755733013153076, "logits_per_char": -0.7377866506576538, "num_chars": 2}, {"sum_logits": -1.476100206375122, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.476100206375122, "logits_per_char": -0.738050103187561, "num_chars": 2}, {"sum_logits": -1.2232890129089355, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": true, "logits_per_token": -1.2232890129089355, "logits_per_char": -0.6116445064544678, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 197, "native_id": "Mercury_SC_400383", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4090447425842285, "incorrect_loss_raw": 1.3855931361516316, "correct_loss_per_char": 0.7045223712921143, "incorrect_loss_per_char": 0.6927965680758158, "correct_loss_per_token": 1.4090447425842285, "incorrect_loss_per_token": 1.3855931361516316, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4654213190078735, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4654213190078735, "logits_per_char": -0.7327106595039368, "num_chars": 2}, {"sum_logits": -1.3381952047348022, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.3381952047348022, "logits_per_char": -0.6690976023674011, "num_chars": 2}, {"sum_logits": -1.4090447425842285, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4090447425842285, "logits_per_char": -0.7045223712921143, "num_chars": 2}, {"sum_logits": -1.3531628847122192, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3531628847122192, "logits_per_char": -0.6765814423561096, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 198, "native_id": "CSZ_2005_5_CSZ10021", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4193788766860962, "incorrect_loss_raw": 1.3806796073913574, "correct_loss_per_char": 0.7096894383430481, "incorrect_loss_per_char": 0.6903398036956787, "correct_loss_per_token": 1.4193788766860962, "incorrect_loss_per_token": 1.3806796073913574, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3755900859832764, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.3755900859832764, "logits_per_char": -0.6877950429916382, "num_chars": 2}, {"sum_logits": -1.4193788766860962, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4193788766860962, "logits_per_char": -0.7096894383430481, "num_chars": 2}, {"sum_logits": -1.385334849357605, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.385334849357605, "logits_per_char": -0.6926674246788025, "num_chars": 2}, {"sum_logits": -1.381113886833191, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.381113886833191, "logits_per_char": -0.6905569434165955, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 199, "native_id": "Mercury_SC_407070", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3298484086990356, "incorrect_loss_raw": 1.412097970644633, "correct_loss_per_char": 0.6649242043495178, "incorrect_loss_per_char": 0.7060489853223165, "correct_loss_per_token": 1.3298484086990356, "incorrect_loss_per_token": 1.412097970644633, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3850704431533813, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3850704431533813, "logits_per_char": -0.6925352215766907, "num_chars": 2}, {"sum_logits": -1.3298484086990356, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.3298484086990356, "logits_per_char": -0.6649242043495178, "num_chars": 2}, {"sum_logits": -1.4243888854980469, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4243888854980469, "logits_per_char": -0.7121944427490234, "num_chars": 2}, {"sum_logits": -1.4268345832824707, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4268345832824707, "logits_per_char": -0.7134172916412354, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 200, "native_id": "Mercury_SC_400708", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4936606884002686, "incorrect_loss_raw": 1.3610847393671672, "correct_loss_per_char": 0.7468303442001343, "incorrect_loss_per_char": 0.6805423696835836, "correct_loss_per_token": 1.4936606884002686, "incorrect_loss_per_token": 1.3610847393671672, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2766908407211304, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.2766908407211304, "logits_per_char": -0.6383454203605652, "num_chars": 2}, {"sum_logits": -1.4676928520202637, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4676928520202637, "logits_per_char": -0.7338464260101318, "num_chars": 2}, {"sum_logits": -1.4936606884002686, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4936606884002686, "logits_per_char": -0.7468303442001343, "num_chars": 2}, {"sum_logits": -1.3388705253601074, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3388705253601074, "logits_per_char": -0.6694352626800537, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 201, "native_id": "Mercury_7075040", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5279417037963867, "incorrect_loss_raw": 1.3698268334070842, "correct_loss_per_char": 0.7639708518981934, "incorrect_loss_per_char": 0.6849134167035421, "correct_loss_per_token": 1.5279417037963867, "incorrect_loss_per_token": 1.3698268334070842, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1157995462417603, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.1157995462417603, "logits_per_char": -0.5578997731208801, "num_chars": 2}, {"sum_logits": -1.3726556301116943, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3726556301116943, "logits_per_char": -0.6863278150558472, "num_chars": 2}, {"sum_logits": -1.5279417037963867, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5279417037963867, "logits_per_char": -0.7639708518981934, "num_chars": 2}, {"sum_logits": -1.6210253238677979, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.6210253238677979, "logits_per_char": -0.8105126619338989, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 202, "native_id": "Mercury_7137165", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4762272834777832, "incorrect_loss_raw": 1.3667312860488892, "correct_loss_per_char": 0.7381136417388916, "incorrect_loss_per_char": 0.6833656430244446, "correct_loss_per_token": 1.4762272834777832, "incorrect_loss_per_token": 1.3667312860488892, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4762272834777832, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.4762272834777832, "logits_per_char": -0.7381136417388916, "num_chars": 2}, {"sum_logits": -1.4916647672653198, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.4916647672653198, "logits_per_char": -0.7458323836326599, "num_chars": 2}, {"sum_logits": -1.3187193870544434, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.3187193870544434, "logits_per_char": -0.6593596935272217, "num_chars": 2}, {"sum_logits": -1.2898097038269043, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.2898097038269043, "logits_per_char": -0.6449048519134521, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 203, "native_id": "Mercury_SC_400046", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.482159972190857, "incorrect_loss_raw": 1.3620057106018066, "correct_loss_per_char": 0.7410799860954285, "incorrect_loss_per_char": 0.6810028553009033, "correct_loss_per_token": 1.482159972190857, "incorrect_loss_per_token": 1.3620057106018066, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3438231945037842, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3438231945037842, "logits_per_char": -0.6719115972518921, "num_chars": 2}, {"sum_logits": -1.482159972190857, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.482159972190857, "logits_per_char": -0.7410799860954285, "num_chars": 2}, {"sum_logits": -1.4287922382354736, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4287922382354736, "logits_per_char": -0.7143961191177368, "num_chars": 2}, {"sum_logits": -1.313401699066162, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.313401699066162, "logits_per_char": -0.656700849533081, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 204, "native_id": "Mercury_7099330", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.386110782623291, "incorrect_loss_raw": 1.3927077054977417, "correct_loss_per_char": 0.6930553913116455, "incorrect_loss_per_char": 0.6963538527488708, "correct_loss_per_token": 1.386110782623291, "incorrect_loss_per_token": 1.3927077054977417, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.385785460472107, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.385785460472107, "logits_per_char": -0.6928927302360535, "num_chars": 2}, {"sum_logits": -1.3519529104232788, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.3519529104232788, "logits_per_char": -0.6759764552116394, "num_chars": 2}, {"sum_logits": -1.4403847455978394, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4403847455978394, "logits_per_char": -0.7201923727989197, "num_chars": 2}, {"sum_logits": -1.386110782623291, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.386110782623291, "logits_per_char": -0.6930553913116455, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 205, "native_id": "MDSA_2007_5_2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3582549095153809, "incorrect_loss_raw": 1.4030537207921345, "correct_loss_per_char": 0.6791274547576904, "incorrect_loss_per_char": 0.7015268603960673, "correct_loss_per_token": 1.3582549095153809, "incorrect_loss_per_token": 1.4030537207921345, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3603850603103638, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3603850603103638, "logits_per_char": -0.6801925301551819, "num_chars": 2}, {"sum_logits": -1.507561445236206, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.507561445236206, "logits_per_char": -0.753780722618103, "num_chars": 2}, {"sum_logits": -1.3582549095153809, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3582549095153809, "logits_per_char": -0.6791274547576904, "num_chars": 2}, {"sum_logits": -1.341214656829834, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.341214656829834, "logits_per_char": -0.670607328414917, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 206, "native_id": "Mercury_7271758", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3547641038894653, "incorrect_loss_raw": 1.411669095357259, "correct_loss_per_char": 0.6773820519447327, "incorrect_loss_per_char": 0.7058345476786295, "correct_loss_per_token": 1.3547641038894653, "incorrect_loss_per_token": 1.411669095357259, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.565210223197937, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.565210223197937, "logits_per_char": -0.7826051115989685, "num_chars": 2}, {"sum_logits": -1.3547641038894653, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3547641038894653, "logits_per_char": -0.6773820519447327, "num_chars": 2}, {"sum_logits": -1.4491714239120483, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4491714239120483, "logits_per_char": -0.7245857119560242, "num_chars": 2}, {"sum_logits": -1.220625638961792, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.220625638961792, "logits_per_char": -0.610312819480896, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 207, "native_id": "MCAS_2003_8_31", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3514260053634644, "incorrect_loss_raw": 1.4058920939763386, "correct_loss_per_char": 0.6757130026817322, "incorrect_loss_per_char": 0.7029460469881693, "correct_loss_per_token": 1.3514260053634644, "incorrect_loss_per_token": 1.4058920939763386, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.470428705215454, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.470428705215454, "logits_per_char": -0.735214352607727, "num_chars": 2}, {"sum_logits": -1.4474517107009888, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.4474517107009888, "logits_per_char": -0.7237258553504944, "num_chars": 2}, {"sum_logits": -1.3514260053634644, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.3514260053634644, "logits_per_char": -0.6757130026817322, "num_chars": 2}, {"sum_logits": -1.2997958660125732, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": true, "logits_per_token": -1.2997958660125732, "logits_per_char": -0.6498979330062866, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 208, "native_id": "AKDE&ED_2008_8_53", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4319924116134644, "incorrect_loss_raw": 1.3798781236012776, "correct_loss_per_char": 0.7159962058067322, "incorrect_loss_per_char": 0.6899390618006388, "correct_loss_per_token": 1.4319924116134644, "incorrect_loss_per_token": 1.3798781236012776, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.489302635192871, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.489302635192871, "logits_per_char": -0.7446513175964355, "num_chars": 2}, {"sum_logits": -1.3204166889190674, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.3204166889190674, "logits_per_char": -0.6602083444595337, "num_chars": 2}, {"sum_logits": -1.4319924116134644, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4319924116134644, "logits_per_char": -0.7159962058067322, "num_chars": 2}, {"sum_logits": -1.3299150466918945, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3299150466918945, "logits_per_char": -0.6649575233459473, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 209, "native_id": "TIMSS_2007_8_pg109", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3358358144760132, "incorrect_loss_raw": 1.4102420806884766, "correct_loss_per_char": 0.6679179072380066, "incorrect_loss_per_char": 0.7051210403442383, "correct_loss_per_token": 1.3358358144760132, "incorrect_loss_per_token": 1.4102420806884766, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4551465511322021, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4551465511322021, "logits_per_char": -0.7275732755661011, "num_chars": 2}, {"sum_logits": -1.4252605438232422, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4252605438232422, "logits_per_char": -0.7126302719116211, "num_chars": 2}, {"sum_logits": -1.3358358144760132, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.3358358144760132, "logits_per_char": -0.6679179072380066, "num_chars": 2}, {"sum_logits": -1.3503191471099854, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3503191471099854, "logits_per_char": -0.6751595735549927, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 210, "native_id": "Mercury_175385", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4149932861328125, "incorrect_loss_raw": 1.3822453022003174, "correct_loss_per_char": 0.7074966430664062, "incorrect_loss_per_char": 0.6911226511001587, "correct_loss_per_token": 1.4149932861328125, "incorrect_loss_per_token": 1.3822453022003174, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3472111225128174, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.3472111225128174, "logits_per_char": -0.6736055612564087, "num_chars": 2}, {"sum_logits": -1.4106452465057373, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4106452465057373, "logits_per_char": -0.7053226232528687, "num_chars": 2}, {"sum_logits": -1.4149932861328125, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4149932861328125, "logits_per_char": -0.7074966430664062, "num_chars": 2}, {"sum_logits": -1.3888795375823975, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3888795375823975, "logits_per_char": -0.6944397687911987, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 211, "native_id": "Mercury_410669", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3048690557479858, "incorrect_loss_raw": 1.424412210782369, "correct_loss_per_char": 0.6524345278739929, "incorrect_loss_per_char": 0.7122061053911845, "correct_loss_per_token": 1.3048690557479858, "incorrect_loss_per_token": 1.424412210782369, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.482964038848877, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.482964038848877, "logits_per_char": -0.7414820194244385, "num_chars": 2}, {"sum_logits": -1.3026918172836304, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": true, "logits_per_token": -1.3026918172836304, "logits_per_char": -0.6513459086418152, "num_chars": 2}, {"sum_logits": -1.4875807762145996, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.4875807762145996, "logits_per_char": -0.7437903881072998, "num_chars": 2}, {"sum_logits": -1.3048690557479858, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.3048690557479858, "logits_per_char": -0.6524345278739929, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 212, "native_id": "MEAP_2005_8_39", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4761848449707031, "incorrect_loss_raw": 1.363615353902181, "correct_loss_per_char": 0.7380924224853516, "incorrect_loss_per_char": 0.6818076769510905, "correct_loss_per_token": 1.4761848449707031, "incorrect_loss_per_token": 1.363615353902181, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.314957618713379, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.314957618713379, "logits_per_char": -0.6574788093566895, "num_chars": 2}, {"sum_logits": -1.3476097583770752, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3476097583770752, "logits_per_char": -0.6738048791885376, "num_chars": 2}, {"sum_logits": -1.4761848449707031, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4761848449707031, "logits_per_char": -0.7380924224853516, "num_chars": 2}, {"sum_logits": -1.4282786846160889, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4282786846160889, "logits_per_char": -0.7141393423080444, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 213, "native_id": "Mercury_SC_408568", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3849869966506958, "incorrect_loss_raw": 1.4011398156483967, "correct_loss_per_char": 0.6924934983253479, "incorrect_loss_per_char": 0.7005699078241984, "correct_loss_per_token": 1.3849869966506958, "incorrect_loss_per_token": 1.4011398156483967, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4594779014587402, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4594779014587402, "logits_per_char": -0.7297389507293701, "num_chars": 2}, {"sum_logits": -1.3849869966506958, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.3849869966506958, "logits_per_char": -0.6924934983253479, "num_chars": 2}, {"sum_logits": -1.5218696594238281, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.5218696594238281, "logits_per_char": -0.7609348297119141, "num_chars": 2}, {"sum_logits": -1.222071886062622, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.222071886062622, "logits_per_char": -0.611035943031311, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 214, "native_id": "AKDE&ED_2008_8_7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3292417526245117, "incorrect_loss_raw": 1.4174368778864543, "correct_loss_per_char": 0.6646208763122559, "incorrect_loss_per_char": 0.7087184389432272, "correct_loss_per_token": 1.3292417526245117, "incorrect_loss_per_token": 1.4174368778864543, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2850714921951294, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": true, "logits_per_token": -1.2850714921951294, "logits_per_char": -0.6425357460975647, "num_chars": 2}, {"sum_logits": -1.3292417526245117, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.3292417526245117, "logits_per_char": -0.6646208763122559, "num_chars": 2}, {"sum_logits": -1.4919795989990234, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.4919795989990234, "logits_per_char": -0.7459897994995117, "num_chars": 2}, {"sum_logits": -1.47525954246521, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.47525954246521, "logits_per_char": -0.737629771232605, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 215, "native_id": "Mercury_7082845", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.33098566532135, "incorrect_loss_raw": 1.4121991793314617, "correct_loss_per_char": 0.665492832660675, "incorrect_loss_per_char": 0.7060995896657308, "correct_loss_per_token": 1.33098566532135, "incorrect_loss_per_token": 1.4121991793314617, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.341023325920105, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.341023325920105, "logits_per_char": -0.6705116629600525, "num_chars": 2}, {"sum_logits": -1.33098566532135, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.33098566532135, "logits_per_char": -0.665492832660675, "num_chars": 2}, {"sum_logits": -1.5088235139846802, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.5088235139846802, "logits_per_char": -0.7544117569923401, "num_chars": 2}, {"sum_logits": -1.3867506980895996, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3867506980895996, "logits_per_char": -0.6933753490447998, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 216, "native_id": "Mercury_SC_405726", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2101346254348755, "incorrect_loss_raw": 1.460503617922465, "correct_loss_per_char": 0.6050673127174377, "incorrect_loss_per_char": 0.7302518089612325, "correct_loss_per_token": 1.2101346254348755, "incorrect_loss_per_token": 1.460503617922465, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5030865669250488, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.5030865669250488, "logits_per_char": -0.7515432834625244, "num_chars": 2}, {"sum_logits": -1.3688366413116455, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3688366413116455, "logits_per_char": -0.6844183206558228, "num_chars": 2}, {"sum_logits": -1.5095876455307007, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.5095876455307007, "logits_per_char": -0.7547938227653503, "num_chars": 2}, {"sum_logits": -1.2101346254348755, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.2101346254348755, "logits_per_char": -0.6050673127174377, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 217, "native_id": "Mercury_SC_415407", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0998709201812744, "incorrect_loss_raw": 1.556800087292989, "correct_loss_per_char": 0.5499354600906372, "incorrect_loss_per_char": 0.7784000436464945, "correct_loss_per_token": 1.0998709201812744, "incorrect_loss_per_token": 1.556800087292989, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0998709201812744, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.0998709201812744, "logits_per_char": -0.5499354600906372, "num_chars": 2}, {"sum_logits": -1.2987784147262573, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.2987784147262573, "logits_per_char": -0.6493892073631287, "num_chars": 2}, {"sum_logits": -1.5051493644714355, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.5051493644714355, "logits_per_char": -0.7525746822357178, "num_chars": 2}, {"sum_logits": -1.8664724826812744, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.8664724826812744, "logits_per_char": -0.9332362413406372, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 218, "native_id": "Mercury_SC_401792", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3237942457199097, "incorrect_loss_raw": 1.4274191459019978, "correct_loss_per_char": 0.6618971228599548, "incorrect_loss_per_char": 0.7137095729509989, "correct_loss_per_token": 1.3237942457199097, "incorrect_loss_per_token": 1.4274191459019978, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5775911808013916, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.5775911808013916, "logits_per_char": -0.7887955904006958, "num_chars": 2}, {"sum_logits": -1.4892972707748413, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.4892972707748413, "logits_per_char": -0.7446486353874207, "num_chars": 2}, {"sum_logits": -1.3237942457199097, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.3237942457199097, "logits_per_char": -0.6618971228599548, "num_chars": 2}, {"sum_logits": -1.2153689861297607, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": true, "logits_per_token": -1.2153689861297607, "logits_per_char": -0.6076844930648804, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 219, "native_id": "LEAP_2000_8_4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4310578107833862, "incorrect_loss_raw": 1.3811055421829224, "correct_loss_per_char": 0.7155289053916931, "incorrect_loss_per_char": 0.6905527710914612, "correct_loss_per_token": 1.4310578107833862, "incorrect_loss_per_token": 1.3811055421829224, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.262792944908142, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.262792944908142, "logits_per_char": -0.631396472454071, "num_chars": 2}, {"sum_logits": -1.4310578107833862, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4310578107833862, "logits_per_char": -0.7155289053916931, "num_chars": 2}, {"sum_logits": -1.5022114515304565, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.5022114515304565, "logits_per_char": -0.7511057257652283, "num_chars": 2}, {"sum_logits": -1.3783122301101685, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3783122301101685, "logits_per_char": -0.6891561150550842, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 220, "native_id": "Mercury_SC_413439", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6384103298187256, "incorrect_loss_raw": 1.3301855325698853, "correct_loss_per_char": 0.8192051649093628, "incorrect_loss_per_char": 0.6650927662849426, "correct_loss_per_token": 1.6384103298187256, "incorrect_loss_per_token": 1.3301855325698853, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6384103298187256, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.6384103298187256, "logits_per_char": -0.8192051649093628, "num_chars": 2}, {"sum_logits": -1.5066053867340088, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.5066053867340088, "logits_per_char": -0.7533026933670044, "num_chars": 2}, {"sum_logits": -1.3470869064331055, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3470869064331055, "logits_per_char": -0.6735434532165527, "num_chars": 2}, {"sum_logits": -1.1368643045425415, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.1368643045425415, "logits_per_char": -0.5684321522712708, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 221, "native_id": "ACTAAP_2014_7_13", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4314218759536743, "incorrect_loss_raw": 1.3838471571604412, "correct_loss_per_char": 0.7157109379768372, "incorrect_loss_per_char": 0.6919235785802206, "correct_loss_per_token": 1.4314218759536743, "incorrect_loss_per_token": 1.3838471571604412, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.530875325202942, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.530875325202942, "logits_per_char": -0.765437662601471, "num_chars": 2}, {"sum_logits": -1.4314218759536743, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4314218759536743, "logits_per_char": -0.7157109379768372, "num_chars": 2}, {"sum_logits": -1.396531105041504, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.396531105041504, "logits_per_char": -0.698265552520752, "num_chars": 2}, {"sum_logits": -1.2241350412368774, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.2241350412368774, "logits_per_char": -0.6120675206184387, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 222, "native_id": "Mercury_SC_402638", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2158918380737305, "incorrect_loss_raw": 1.4642760753631592, "correct_loss_per_char": 0.6079459190368652, "incorrect_loss_per_char": 0.7321380376815796, "correct_loss_per_token": 1.2158918380737305, "incorrect_loss_per_token": 1.4642760753631592, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2158918380737305, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2158918380737305, "logits_per_char": -0.6079459190368652, "num_chars": 2}, {"sum_logits": -1.3132731914520264, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3132731914520264, "logits_per_char": -0.6566365957260132, "num_chars": 2}, {"sum_logits": -1.602911353111267, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.602911353111267, "logits_per_char": -0.8014556765556335, "num_chars": 2}, {"sum_logits": -1.476643681526184, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.476643681526184, "logits_per_char": -0.738321840763092, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 223, "native_id": "Mercury_SC_406725", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4069730043411255, "incorrect_loss_raw": 1.3887179692586262, "correct_loss_per_char": 0.7034865021705627, "incorrect_loss_per_char": 0.6943589846293131, "correct_loss_per_token": 1.4069730043411255, "incorrect_loss_per_token": 1.3887179692586262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4069730043411255, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4069730043411255, "logits_per_char": -0.7034865021705627, "num_chars": 2}, {"sum_logits": -1.3782674074172974, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3782674074172974, "logits_per_char": -0.6891337037086487, "num_chars": 2}, {"sum_logits": -1.493849754333496, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.493849754333496, "logits_per_char": -0.746924877166748, "num_chars": 2}, {"sum_logits": -1.2940367460250854, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.2940367460250854, "logits_per_char": -0.6470183730125427, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 224, "native_id": "NYSEDREGENTS_2015_4_29", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3372535705566406, "incorrect_loss_raw": 1.4216443697611492, "correct_loss_per_char": 0.6686267852783203, "incorrect_loss_per_char": 0.7108221848805746, "correct_loss_per_token": 1.3372535705566406, "incorrect_loss_per_token": 1.4216443697611492, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3372535705566406, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3372535705566406, "logits_per_char": -0.6686267852783203, "num_chars": 2}, {"sum_logits": -1.2076503038406372, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2076503038406372, "logits_per_char": -0.6038251519203186, "num_chars": 2}, {"sum_logits": -1.5558691024780273, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.5558691024780273, "logits_per_char": -0.7779345512390137, "num_chars": 2}, {"sum_logits": -1.5014137029647827, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.5014137029647827, "logits_per_char": -0.7507068514823914, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 225, "native_id": "Mercury_406136", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4862245321273804, "incorrect_loss_raw": 1.3652718861897786, "correct_loss_per_char": 0.7431122660636902, "incorrect_loss_per_char": 0.6826359430948893, "correct_loss_per_token": 1.4862245321273804, "incorrect_loss_per_token": 1.3652718861897786, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4862245321273804, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4862245321273804, "logits_per_char": -0.7431122660636902, "num_chars": 2}, {"sum_logits": -1.4443835020065308, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4443835020065308, "logits_per_char": -0.7221917510032654, "num_chars": 2}, {"sum_logits": -1.4348156452178955, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4348156452178955, "logits_per_char": -0.7174078226089478, "num_chars": 2}, {"sum_logits": -1.2166165113449097, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.2166165113449097, "logits_per_char": -0.6083082556724548, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 226, "native_id": "MSA_2012_5_23", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4070097208023071, "incorrect_loss_raw": 1.3904660542805989, "correct_loss_per_char": 0.7035048604011536, "incorrect_loss_per_char": 0.6952330271402994, "correct_loss_per_token": 1.4070097208023071, "incorrect_loss_per_token": 1.3904660542805989, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4346766471862793, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.4346766471862793, "logits_per_char": -0.7173383235931396, "num_chars": 2}, {"sum_logits": -1.4831809997558594, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.4831809997558594, "logits_per_char": -0.7415904998779297, "num_chars": 2}, {"sum_logits": -1.4070097208023071, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.4070097208023071, "logits_per_char": -0.7035048604011536, "num_chars": 2}, {"sum_logits": -1.2535405158996582, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": true, "logits_per_token": -1.2535405158996582, "logits_per_char": -0.6267702579498291, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 227, "native_id": "Mercury_405873", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3972294330596924, "incorrect_loss_raw": 1.3924152453740437, "correct_loss_per_char": 0.6986147165298462, "incorrect_loss_per_char": 0.6962076226870219, "correct_loss_per_token": 1.3972294330596924, "incorrect_loss_per_token": 1.3924152453740437, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2808403968811035, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.2808403968811035, "logits_per_char": -0.6404201984405518, "num_chars": 2}, {"sum_logits": -1.3972294330596924, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3972294330596924, "logits_per_char": -0.6986147165298462, "num_chars": 2}, {"sum_logits": -1.4646966457366943, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4646966457366943, "logits_per_char": -0.7323483228683472, "num_chars": 2}, {"sum_logits": -1.4317086935043335, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4317086935043335, "logits_per_char": -0.7158543467521667, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 228, "native_id": "Mercury_7043820", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5990890264511108, "incorrect_loss_raw": 1.3313076098759968, "correct_loss_per_char": 0.7995445132255554, "incorrect_loss_per_char": 0.6656538049379984, "correct_loss_per_token": 1.5990890264511108, "incorrect_loss_per_token": 1.3313076098759968, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3643076419830322, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3643076419830322, "logits_per_char": -0.6821538209915161, "num_chars": 2}, {"sum_logits": -1.5990890264511108, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5990890264511108, "logits_per_char": -0.7995445132255554, "num_chars": 2}, {"sum_logits": -1.3986231088638306, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3986231088638306, "logits_per_char": -0.6993115544319153, "num_chars": 2}, {"sum_logits": -1.230992078781128, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.230992078781128, "logits_per_char": -0.615496039390564, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 229, "native_id": "MCAS_2005_5_34", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.468116283416748, "incorrect_loss_raw": 1.3701680501302083, "correct_loss_per_char": 0.734058141708374, "incorrect_loss_per_char": 0.6850840250651041, "correct_loss_per_token": 1.468116283416748, "incorrect_loss_per_token": 1.3701680501302083, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4543956518173218, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4543956518173218, "logits_per_char": -0.7271978259086609, "num_chars": 2}, {"sum_logits": -1.468116283416748, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.468116283416748, "logits_per_char": -0.734058141708374, "num_chars": 2}, {"sum_logits": -1.4301248788833618, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4301248788833618, "logits_per_char": -0.7150624394416809, "num_chars": 2}, {"sum_logits": -1.2259836196899414, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.2259836196899414, "logits_per_char": -0.6129918098449707, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 230, "native_id": "Mercury_7182245", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2073458433151245, "incorrect_loss_raw": 1.4597506125768025, "correct_loss_per_char": 0.6036729216575623, "incorrect_loss_per_char": 0.7298753062884012, "correct_loss_per_token": 1.2073458433151245, "incorrect_loss_per_token": 1.4597506125768025, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5043596029281616, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.5043596029281616, "logits_per_char": -0.7521798014640808, "num_chars": 2}, {"sum_logits": -1.415218710899353, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.415218710899353, "logits_per_char": -0.7076093554496765, "num_chars": 2}, {"sum_logits": -1.459673523902893, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": false, "logits_per_token": -1.459673523902893, "logits_per_char": -0.7298367619514465, "num_chars": 2}, {"sum_logits": -1.2073458433151245, "num_tokens": 1, "num_tokens_all": 451, "is_greedy": true, "logits_per_token": -1.2073458433151245, "logits_per_char": -0.6036729216575623, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 231, "native_id": "MSA_2012_8_30", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7495651245117188, "incorrect_loss_raw": 1.3025457859039307, "correct_loss_per_char": 0.8747825622558594, "incorrect_loss_per_char": 0.6512728929519653, "correct_loss_per_token": 1.7495651245117188, "incorrect_loss_per_token": 1.3025457859039307, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7495651245117188, "num_tokens": 1, "num_tokens_all": 498, "is_greedy": false, "logits_per_token": -1.7495651245117188, "logits_per_char": -0.8747825622558594, "num_chars": 2}, {"sum_logits": -1.2015671730041504, "num_tokens": 1, "num_tokens_all": 498, "is_greedy": true, "logits_per_token": -1.2015671730041504, "logits_per_char": -0.6007835865020752, "num_chars": 2}, {"sum_logits": -1.4958345890045166, "num_tokens": 1, "num_tokens_all": 498, "is_greedy": false, "logits_per_token": -1.4958345890045166, "logits_per_char": -0.7479172945022583, "num_chars": 2}, {"sum_logits": -1.210235595703125, "num_tokens": 1, "num_tokens_all": 498, "is_greedy": false, "logits_per_token": -1.210235595703125, "logits_per_char": -0.6051177978515625, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 232, "native_id": "Mercury_7252753", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.430619239807129, "incorrect_loss_raw": 1.3807475169499714, "correct_loss_per_char": 0.7153096199035645, "incorrect_loss_per_char": 0.6903737584749857, "correct_loss_per_token": 1.430619239807129, "incorrect_loss_per_token": 1.3807475169499714, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.430619239807129, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.430619239807129, "logits_per_char": -0.7153096199035645, "num_chars": 2}, {"sum_logits": -1.2828136682510376, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2828136682510376, "logits_per_char": -0.6414068341255188, "num_chars": 2}, {"sum_logits": -1.4928514957427979, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4928514957427979, "logits_per_char": -0.7464257478713989, "num_chars": 2}, {"sum_logits": -1.366577386856079, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.366577386856079, "logits_per_char": -0.6832886934280396, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 233, "native_id": "TAKS_2009_8_36", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5541714429855347, "incorrect_loss_raw": 1.3662924369176228, "correct_loss_per_char": 0.7770857214927673, "incorrect_loss_per_char": 0.6831462184588114, "correct_loss_per_token": 1.5541714429855347, "incorrect_loss_per_token": 1.3662924369176228, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5725847482681274, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.5725847482681274, "logits_per_char": -0.7862923741340637, "num_chars": 2}, {"sum_logits": -1.5541714429855347, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.5541714429855347, "logits_per_char": -0.7770857214927673, "num_chars": 2}, {"sum_logits": -1.1496895551681519, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.1496895551681519, "logits_per_char": -0.5748447775840759, "num_chars": 2}, {"sum_logits": -1.3766030073165894, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3766030073165894, "logits_per_char": -0.6883015036582947, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 234, "native_id": "Mercury_SC_415473", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2755513191223145, "incorrect_loss_raw": 1.4373006423314412, "correct_loss_per_char": 0.6377756595611572, "incorrect_loss_per_char": 0.7186503211657206, "correct_loss_per_token": 1.2755513191223145, "incorrect_loss_per_token": 1.4373006423314412, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4802179336547852, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4802179336547852, "logits_per_char": -0.7401089668273926, "num_chars": 2}, {"sum_logits": -1.5296909809112549, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5296909809112549, "logits_per_char": -0.7648454904556274, "num_chars": 2}, {"sum_logits": -1.2755513191223145, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.2755513191223145, "logits_per_char": -0.6377756595611572, "num_chars": 2}, {"sum_logits": -1.3019930124282837, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3019930124282837, "logits_per_char": -0.6509965062141418, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 235, "native_id": "Mercury_SC_413624", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4305896759033203, "incorrect_loss_raw": 1.3770255247751872, "correct_loss_per_char": 0.7152948379516602, "incorrect_loss_per_char": 0.6885127623875936, "correct_loss_per_token": 1.4305896759033203, "incorrect_loss_per_token": 1.3770255247751872, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.344022274017334, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.344022274017334, "logits_per_char": -0.672011137008667, "num_chars": 2}, {"sum_logits": -1.4305896759033203, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4305896759033203, "logits_per_char": -0.7152948379516602, "num_chars": 2}, {"sum_logits": -1.3717405796051025, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3717405796051025, "logits_per_char": -0.6858702898025513, "num_chars": 2}, {"sum_logits": -1.415313720703125, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.415313720703125, "logits_per_char": -0.7076568603515625, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 236, "native_id": "Mercury_7016800", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4182071685791016, "incorrect_loss_raw": 1.386265794436137, "correct_loss_per_char": 0.7091035842895508, "incorrect_loss_per_char": 0.6931328972180685, "correct_loss_per_token": 1.4182071685791016, "incorrect_loss_per_token": 1.386265794436137, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2916271686553955, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.2916271686553955, "logits_per_char": -0.6458135843276978, "num_chars": 2}, {"sum_logits": -1.4161194562911987, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4161194562911987, "logits_per_char": -0.7080597281455994, "num_chars": 2}, {"sum_logits": -1.4510507583618164, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4510507583618164, "logits_per_char": -0.7255253791809082, "num_chars": 2}, {"sum_logits": -1.4182071685791016, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4182071685791016, "logits_per_char": -0.7091035842895508, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 237, "native_id": "Mercury_SC_407228", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2848347425460815, "incorrect_loss_raw": 1.4294259150822957, "correct_loss_per_char": 0.6424173712730408, "incorrect_loss_per_char": 0.7147129575411478, "correct_loss_per_token": 1.2848347425460815, "incorrect_loss_per_token": 1.4294259150822957, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4089622497558594, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4089622497558594, "logits_per_char": -0.7044811248779297, "num_chars": 2}, {"sum_logits": -1.3979443311691284, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3979443311691284, "logits_per_char": -0.6989721655845642, "num_chars": 2}, {"sum_logits": -1.4813711643218994, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4813711643218994, "logits_per_char": -0.7406855821609497, "num_chars": 2}, {"sum_logits": -1.2848347425460815, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.2848347425460815, "logits_per_char": -0.6424173712730408, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 238, "native_id": "Mercury_414504", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.403738021850586, "incorrect_loss_raw": 1.3864620923995972, "correct_loss_per_char": 0.701869010925293, "incorrect_loss_per_char": 0.6932310461997986, "correct_loss_per_token": 1.403738021850586, "incorrect_loss_per_token": 1.3864620923995972, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.40598464012146, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.40598464012146, "logits_per_char": -0.70299232006073, "num_chars": 2}, {"sum_logits": -1.403738021850586, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.403738021850586, "logits_per_char": -0.701869010925293, "num_chars": 2}, {"sum_logits": -1.346225619316101, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": true, "logits_per_token": -1.346225619316101, "logits_per_char": -0.6731128096580505, "num_chars": 2}, {"sum_logits": -1.4071760177612305, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.4071760177612305, "logits_per_char": -0.7035880088806152, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 239, "native_id": "TIMSS_2011_4_pg27", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5105063915252686, "incorrect_loss_raw": 1.3554975191752117, "correct_loss_per_char": 0.7552531957626343, "incorrect_loss_per_char": 0.6777487595876058, "correct_loss_per_token": 1.5105063915252686, "incorrect_loss_per_token": 1.3554975191752117, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5105063915252686, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5105063915252686, "logits_per_char": -0.7552531957626343, "num_chars": 2}, {"sum_logits": -1.4758167266845703, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4758167266845703, "logits_per_char": -0.7379083633422852, "num_chars": 2}, {"sum_logits": -1.3113402128219604, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3113402128219604, "logits_per_char": -0.6556701064109802, "num_chars": 2}, {"sum_logits": -1.279335618019104, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.279335618019104, "logits_per_char": -0.639667809009552, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 240, "native_id": "Mercury_SC_402029", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2720434665679932, "incorrect_loss_raw": 1.4350967804590862, "correct_loss_per_char": 0.6360217332839966, "incorrect_loss_per_char": 0.7175483902295431, "correct_loss_per_token": 1.2720434665679932, "incorrect_loss_per_token": 1.4350967804590862, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3931041955947876, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3931041955947876, "logits_per_char": -0.6965520977973938, "num_chars": 2}, {"sum_logits": -1.5065809488296509, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5065809488296509, "logits_per_char": -0.7532904744148254, "num_chars": 2}, {"sum_logits": -1.4056051969528198, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4056051969528198, "logits_per_char": -0.7028025984764099, "num_chars": 2}, {"sum_logits": -1.2720434665679932, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.2720434665679932, "logits_per_char": -0.6360217332839966, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 241, "native_id": "Mercury_7131845", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5077425241470337, "incorrect_loss_raw": 1.362158219019572, "correct_loss_per_char": 0.7538712620735168, "incorrect_loss_per_char": 0.681079109509786, "correct_loss_per_token": 1.5077425241470337, "incorrect_loss_per_token": 1.362158219019572, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5061012506484985, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.5061012506484985, "logits_per_char": -0.7530506253242493, "num_chars": 2}, {"sum_logits": -1.3885236978530884, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3885236978530884, "logits_per_char": -0.6942618489265442, "num_chars": 2}, {"sum_logits": -1.5077425241470337, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.5077425241470337, "logits_per_char": -0.7538712620735168, "num_chars": 2}, {"sum_logits": -1.191849708557129, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.191849708557129, "logits_per_char": -0.5959248542785645, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 242, "native_id": "Mercury_SC_405533", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4073125123977661, "incorrect_loss_raw": 1.3879525264104207, "correct_loss_per_char": 0.7036562561988831, "incorrect_loss_per_char": 0.6939762632052103, "correct_loss_per_token": 1.4073125123977661, "incorrect_loss_per_token": 1.3879525264104207, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.334877848625183, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.334877848625183, "logits_per_char": -0.6674389243125916, "num_chars": 2}, {"sum_logits": -1.4073125123977661, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4073125123977661, "logits_per_char": -0.7036562561988831, "num_chars": 2}, {"sum_logits": -1.4785832166671753, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4785832166671753, "logits_per_char": -0.7392916083335876, "num_chars": 2}, {"sum_logits": -1.3503965139389038, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3503965139389038, "logits_per_char": -0.6751982569694519, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 243, "native_id": "Mercury_7086748", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3310986757278442, "incorrect_loss_raw": 1.411215861638387, "correct_loss_per_char": 0.6655493378639221, "incorrect_loss_per_char": 0.7056079308191935, "correct_loss_per_token": 1.3310986757278442, "incorrect_loss_per_token": 1.411215861638387, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3310986757278442, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.3310986757278442, "logits_per_char": -0.6655493378639221, "num_chars": 2}, {"sum_logits": -1.4086545705795288, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4086545705795288, "logits_per_char": -0.7043272852897644, "num_chars": 2}, {"sum_logits": -1.4085609912872314, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4085609912872314, "logits_per_char": -0.7042804956436157, "num_chars": 2}, {"sum_logits": -1.4164320230484009, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4164320230484009, "logits_per_char": -0.7082160115242004, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 244, "native_id": "MDSA_2007_8_17", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3618170022964478, "incorrect_loss_raw": 1.39963694413503, "correct_loss_per_char": 0.6809085011482239, "incorrect_loss_per_char": 0.699818472067515, "correct_loss_per_token": 1.3618170022964478, "incorrect_loss_per_token": 1.39963694413503, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4019410610198975, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4019410610198975, "logits_per_char": -0.7009705305099487, "num_chars": 2}, {"sum_logits": -1.3618170022964478, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.3618170022964478, "logits_per_char": -0.6809085011482239, "num_chars": 2}, {"sum_logits": -1.4019746780395508, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4019746780395508, "logits_per_char": -0.7009873390197754, "num_chars": 2}, {"sum_logits": -1.394995093345642, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.394995093345642, "logits_per_char": -0.697497546672821, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 245, "native_id": "Mercury_7210473", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3812220096588135, "incorrect_loss_raw": 1.3953516483306885, "correct_loss_per_char": 0.6906110048294067, "incorrect_loss_per_char": 0.6976758241653442, "correct_loss_per_token": 1.3812220096588135, "incorrect_loss_per_token": 1.3953516483306885, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3949027061462402, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3949027061462402, "logits_per_char": -0.6974513530731201, "num_chars": 2}, {"sum_logits": -1.3221288919448853, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.3221288919448853, "logits_per_char": -0.6610644459724426, "num_chars": 2}, {"sum_logits": -1.3812220096588135, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3812220096588135, "logits_per_char": -0.6906110048294067, "num_chars": 2}, {"sum_logits": -1.46902334690094, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.46902334690094, "logits_per_char": -0.73451167345047, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 246, "native_id": "Mercury_7214340", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3837802410125732, "incorrect_loss_raw": 1.3982412020365398, "correct_loss_per_char": 0.6918901205062866, "incorrect_loss_per_char": 0.6991206010182699, "correct_loss_per_token": 1.3837802410125732, "incorrect_loss_per_token": 1.3982412020365398, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3837802410125732, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3837802410125732, "logits_per_char": -0.6918901205062866, "num_chars": 2}, {"sum_logits": -1.4098548889160156, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4098548889160156, "logits_per_char": -0.7049274444580078, "num_chars": 2}, {"sum_logits": -1.5095890760421753, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5095890760421753, "logits_per_char": -0.7547945380210876, "num_chars": 2}, {"sum_logits": -1.2752796411514282, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2752796411514282, "logits_per_char": -0.6376398205757141, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 247, "native_id": "MCAS_2005_9_17", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6157673597335815, "incorrect_loss_raw": 1.3490224679311116, "correct_loss_per_char": 0.8078836798667908, "incorrect_loss_per_char": 0.6745112339655558, "correct_loss_per_token": 1.6157673597335815, "incorrect_loss_per_token": 1.3490224679311116, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.374201774597168, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.374201774597168, "logits_per_char": -0.687100887298584, "num_chars": 2}, {"sum_logits": -1.5825965404510498, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.5825965404510498, "logits_per_char": -0.7912982702255249, "num_chars": 2}, {"sum_logits": -1.6157673597335815, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.6157673597335815, "logits_per_char": -0.8078836798667908, "num_chars": 2}, {"sum_logits": -1.0902690887451172, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.0902690887451172, "logits_per_char": -0.5451345443725586, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 248, "native_id": "MEA_2016_8_12", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3667452335357666, "incorrect_loss_raw": 1.4000919262568157, "correct_loss_per_char": 0.6833726167678833, "incorrect_loss_per_char": 0.7000459631284078, "correct_loss_per_token": 1.3667452335357666, "incorrect_loss_per_token": 1.4000919262568157, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4393316507339478, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4393316507339478, "logits_per_char": -0.7196658253669739, "num_chars": 2}, {"sum_logits": -1.3667452335357666, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3667452335357666, "logits_per_char": -0.6833726167678833, "num_chars": 2}, {"sum_logits": -1.4263124465942383, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4263124465942383, "logits_per_char": -0.7131562232971191, "num_chars": 2}, {"sum_logits": -1.3346316814422607, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.3346316814422607, "logits_per_char": -0.6673158407211304, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 249, "native_id": "Mercury_SC_401278", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.359492301940918, "incorrect_loss_raw": 1.4026472568511963, "correct_loss_per_char": 0.679746150970459, "incorrect_loss_per_char": 0.7013236284255981, "correct_loss_per_token": 1.359492301940918, "incorrect_loss_per_token": 1.4026472568511963, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3356696367263794, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3356696367263794, "logits_per_char": -0.6678348183631897, "num_chars": 2}, {"sum_logits": -1.359492301940918, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.359492301940918, "logits_per_char": -0.679746150970459, "num_chars": 2}, {"sum_logits": -1.4504883289337158, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4504883289337158, "logits_per_char": -0.7252441644668579, "num_chars": 2}, {"sum_logits": -1.4217838048934937, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4217838048934937, "logits_per_char": -0.7108919024467468, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 250, "native_id": "Mercury_SC_407689", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2245523929595947, "incorrect_loss_raw": 1.456497033437093, "correct_loss_per_char": 0.6122761964797974, "incorrect_loss_per_char": 0.7282485167185465, "correct_loss_per_token": 1.2245523929595947, "incorrect_loss_per_token": 1.456497033437093, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5953518152236938, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5953518152236938, "logits_per_char": -0.7976759076118469, "num_chars": 2}, {"sum_logits": -1.3888516426086426, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3888516426086426, "logits_per_char": -0.6944258213043213, "num_chars": 2}, {"sum_logits": -1.3852876424789429, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3852876424789429, "logits_per_char": -0.6926438212394714, "num_chars": 2}, {"sum_logits": -1.2245523929595947, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2245523929595947, "logits_per_char": -0.6122761964797974, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 251, "native_id": "Mercury_7230405", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3827741146087646, "incorrect_loss_raw": 1.397664229075114, "correct_loss_per_char": 0.6913870573043823, "incorrect_loss_per_char": 0.698832114537557, "correct_loss_per_token": 1.3827741146087646, "incorrect_loss_per_token": 1.397664229075114, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5320485830307007, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.5320485830307007, "logits_per_char": -0.7660242915153503, "num_chars": 2}, {"sum_logits": -1.3827741146087646, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3827741146087646, "logits_per_char": -0.6913870573043823, "num_chars": 2}, {"sum_logits": -1.371809720993042, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.371809720993042, "logits_per_char": -0.685904860496521, "num_chars": 2}, {"sum_logits": -1.2891343832015991, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.2891343832015991, "logits_per_char": -0.6445671916007996, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 252, "native_id": "Mercury_SC_405640", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.501875877380371, "incorrect_loss_raw": 1.3759446144104004, "correct_loss_per_char": 0.7509379386901855, "incorrect_loss_per_char": 0.6879723072052002, "correct_loss_per_token": 1.501875877380371, "incorrect_loss_per_token": 1.3759446144104004, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.501875877380371, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.501875877380371, "logits_per_char": -0.7509379386901855, "num_chars": 2}, {"sum_logits": -1.5394600629806519, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.5394600629806519, "logits_per_char": -0.7697300314903259, "num_chars": 2}, {"sum_logits": -1.4908193349838257, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4908193349838257, "logits_per_char": -0.7454096674919128, "num_chars": 2}, {"sum_logits": -1.0975544452667236, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.0975544452667236, "logits_per_char": -0.5487772226333618, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 253, "native_id": "Mercury_7201775", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3631776571273804, "incorrect_loss_raw": 1.4015940427780151, "correct_loss_per_char": 0.6815888285636902, "incorrect_loss_per_char": 0.7007970213890076, "correct_loss_per_token": 1.3631776571273804, "incorrect_loss_per_token": 1.4015940427780151, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.366776704788208, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.366776704788208, "logits_per_char": -0.683388352394104, "num_chars": 2}, {"sum_logits": -1.4342821836471558, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4342821836471558, "logits_per_char": -0.7171410918235779, "num_chars": 2}, {"sum_logits": -1.4037232398986816, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4037232398986816, "logits_per_char": -0.7018616199493408, "num_chars": 2}, {"sum_logits": -1.3631776571273804, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3631776571273804, "logits_per_char": -0.6815888285636902, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 254, "native_id": "Mercury_7177398", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3424923419952393, "incorrect_loss_raw": 1.413714090983073, "correct_loss_per_char": 0.6712461709976196, "incorrect_loss_per_char": 0.7068570454915365, "correct_loss_per_token": 1.3424923419952393, "incorrect_loss_per_token": 1.413714090983073, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5811008214950562, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.5811008214950562, "logits_per_char": -0.7905504107475281, "num_chars": 2}, {"sum_logits": -1.3625720739364624, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3625720739364624, "logits_per_char": -0.6812860369682312, "num_chars": 2}, {"sum_logits": -1.2974693775177002, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.2974693775177002, "logits_per_char": -0.6487346887588501, "num_chars": 2}, {"sum_logits": -1.3424923419952393, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3424923419952393, "logits_per_char": -0.6712461709976196, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 255, "native_id": "Mercury_7041423", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4573779106140137, "incorrect_loss_raw": 1.3721903165181477, "correct_loss_per_char": 0.7286889553070068, "incorrect_loss_per_char": 0.6860951582590739, "correct_loss_per_token": 1.4573779106140137, "incorrect_loss_per_token": 1.3721903165181477, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4573779106140137, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4573779106140137, "logits_per_char": -0.7286889553070068, "num_chars": 2}, {"sum_logits": -1.2996898889541626, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.2996898889541626, "logits_per_char": -0.6498449444770813, "num_chars": 2}, {"sum_logits": -1.4693231582641602, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4693231582641602, "logits_per_char": -0.7346615791320801, "num_chars": 2}, {"sum_logits": -1.3475579023361206, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3475579023361206, "logits_per_char": -0.6737789511680603, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 256, "native_id": "Mercury_7004743", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5785796642303467, "incorrect_loss_raw": 1.336231033007304, "correct_loss_per_char": 0.7892898321151733, "incorrect_loss_per_char": 0.668115516503652, "correct_loss_per_token": 1.5785796642303467, "incorrect_loss_per_token": 1.336231033007304, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2815606594085693, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2815606594085693, "logits_per_char": -0.6407803297042847, "num_chars": 2}, {"sum_logits": -1.3520824909210205, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3520824909210205, "logits_per_char": -0.6760412454605103, "num_chars": 2}, {"sum_logits": -1.3750499486923218, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3750499486923218, "logits_per_char": -0.6875249743461609, "num_chars": 2}, {"sum_logits": -1.5785796642303467, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.5785796642303467, "logits_per_char": -0.7892898321151733, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 257, "native_id": "Mercury_7198468", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.496008276939392, "incorrect_loss_raw": 1.3673386573791504, "correct_loss_per_char": 0.748004138469696, "incorrect_loss_per_char": 0.6836693286895752, "correct_loss_per_token": 1.496008276939392, "incorrect_loss_per_token": 1.3673386573791504, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.496008276939392, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.496008276939392, "logits_per_char": -0.748004138469696, "num_chars": 2}, {"sum_logits": -1.3586490154266357, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.3586490154266357, "logits_per_char": -0.6793245077133179, "num_chars": 2}, {"sum_logits": -1.5419018268585205, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.5419018268585205, "logits_per_char": -0.7709509134292603, "num_chars": 2}, {"sum_logits": -1.201465129852295, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": true, "logits_per_token": -1.201465129852295, "logits_per_char": -0.6007325649261475, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 258, "native_id": "MEA_2014_5_11", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.335005283355713, "incorrect_loss_raw": 1.4161393245061238, "correct_loss_per_char": 0.6675026416778564, "incorrect_loss_per_char": 0.7080696622530619, "correct_loss_per_token": 1.335005283355713, "incorrect_loss_per_token": 1.4161393245061238, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.534744381904602, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.534744381904602, "logits_per_char": -0.767372190952301, "num_chars": 2}, {"sum_logits": -1.4532870054244995, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4532870054244995, "logits_per_char": -0.7266435027122498, "num_chars": 2}, {"sum_logits": -1.335005283355713, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.335005283355713, "logits_per_char": -0.6675026416778564, "num_chars": 2}, {"sum_logits": -1.26038658618927, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.26038658618927, "logits_per_char": -0.630193293094635, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 259, "native_id": "Mercury_410602", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.500644326210022, "incorrect_loss_raw": 1.3594469626744587, "correct_loss_per_char": 0.750322163105011, "incorrect_loss_per_char": 0.6797234813372294, "correct_loss_per_token": 1.500644326210022, "incorrect_loss_per_token": 1.3594469626744587, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.500644326210022, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.500644326210022, "logits_per_char": -0.750322163105011, "num_chars": 2}, {"sum_logits": -1.2919976711273193, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.2919976711273193, "logits_per_char": -0.6459988355636597, "num_chars": 2}, {"sum_logits": -1.4572967290878296, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4572967290878296, "logits_per_char": -0.7286483645439148, "num_chars": 2}, {"sum_logits": -1.3290464878082275, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3290464878082275, "logits_per_char": -0.6645232439041138, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 260, "native_id": "Mercury_7108868", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4492578506469727, "incorrect_loss_raw": 1.392702579498291, "correct_loss_per_char": 0.7246289253234863, "incorrect_loss_per_char": 0.6963512897491455, "correct_loss_per_token": 1.4492578506469727, "incorrect_loss_per_token": 1.392702579498291, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6102879047393799, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.6102879047393799, "logits_per_char": -0.8051439523696899, "num_chars": 2}, {"sum_logits": -1.4455465078353882, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4455465078353882, "logits_per_char": -0.7227732539176941, "num_chars": 2}, {"sum_logits": -1.4492578506469727, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4492578506469727, "logits_per_char": -0.7246289253234863, "num_chars": 2}, {"sum_logits": -1.122273325920105, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.122273325920105, "logits_per_char": -0.5611366629600525, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 261, "native_id": "Mercury_7033828", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1247305870056152, "incorrect_loss_raw": 1.5162335634231567, "correct_loss_per_char": 0.5623652935028076, "incorrect_loss_per_char": 0.7581167817115784, "correct_loss_per_token": 1.1247305870056152, "incorrect_loss_per_token": 1.5162335634231567, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7952393293380737, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.7952393293380737, "logits_per_char": -0.8976196646690369, "num_chars": 2}, {"sum_logits": -1.3992724418640137, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3992724418640137, "logits_per_char": -0.6996362209320068, "num_chars": 2}, {"sum_logits": -1.3541889190673828, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3541889190673828, "logits_per_char": -0.6770944595336914, "num_chars": 2}, {"sum_logits": -1.1247305870056152, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.1247305870056152, "logits_per_char": -0.5623652935028076, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 262, "native_id": "TIMSS_2007_4_pg19", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2618889808654785, "incorrect_loss_raw": 1.4411509831746419, "correct_loss_per_char": 0.6309444904327393, "incorrect_loss_per_char": 0.7205754915873209, "correct_loss_per_token": 1.2618889808654785, "incorrect_loss_per_token": 1.4411509831746419, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2618889808654785, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.2618889808654785, "logits_per_char": -0.6309444904327393, "num_chars": 2}, {"sum_logits": -1.5682220458984375, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.5682220458984375, "logits_per_char": -0.7841110229492188, "num_chars": 2}, {"sum_logits": -1.3748315572738647, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3748315572738647, "logits_per_char": -0.6874157786369324, "num_chars": 2}, {"sum_logits": -1.3803993463516235, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3803993463516235, "logits_per_char": -0.6901996731758118, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 263, "native_id": "Mercury_400828", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5771043300628662, "incorrect_loss_raw": 1.3417857090632122, "correct_loss_per_char": 0.7885521650314331, "incorrect_loss_per_char": 0.6708928545316061, "correct_loss_per_token": 1.5771043300628662, "incorrect_loss_per_token": 1.3417857090632122, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1666525602340698, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.1666525602340698, "logits_per_char": -0.5833262801170349, "num_chars": 2}, {"sum_logits": -1.4150904417037964, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4150904417037964, "logits_per_char": -0.7075452208518982, "num_chars": 2}, {"sum_logits": -1.44361412525177, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.44361412525177, "logits_per_char": -0.721807062625885, "num_chars": 2}, {"sum_logits": -1.5771043300628662, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.5771043300628662, "logits_per_char": -0.7885521650314331, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 264, "native_id": "VASoL_2008_3_16", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3783748149871826, "incorrect_loss_raw": 1.3992881774902344, "correct_loss_per_char": 0.6891874074935913, "incorrect_loss_per_char": 0.6996440887451172, "correct_loss_per_token": 1.3783748149871826, "incorrect_loss_per_token": 1.3992881774902344, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2946507930755615, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.2946507930755615, "logits_per_char": -0.6473253965377808, "num_chars": 2}, {"sum_logits": -1.3783748149871826, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3783748149871826, "logits_per_char": -0.6891874074935913, "num_chars": 2}, {"sum_logits": -1.518977165222168, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.518977165222168, "logits_per_char": -0.759488582611084, "num_chars": 2}, {"sum_logits": -1.3842365741729736, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3842365741729736, "logits_per_char": -0.6921182870864868, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 265, "native_id": "LEAP__5_10315", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4272584915161133, "incorrect_loss_raw": 1.3787902196248372, "correct_loss_per_char": 0.7136292457580566, "incorrect_loss_per_char": 0.6893951098124186, "correct_loss_per_token": 1.4272584915161133, "incorrect_loss_per_token": 1.3787902196248372, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3081716299057007, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.3081716299057007, "logits_per_char": -0.6540858149528503, "num_chars": 2}, {"sum_logits": -1.4397646188735962, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4397646188735962, "logits_per_char": -0.7198823094367981, "num_chars": 2}, {"sum_logits": -1.4272584915161133, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4272584915161133, "logits_per_char": -0.7136292457580566, "num_chars": 2}, {"sum_logits": -1.3884344100952148, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3884344100952148, "logits_per_char": -0.6942172050476074, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 266, "native_id": "Mercury_SC_415471", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3666036128997803, "incorrect_loss_raw": 1.3987315098444622, "correct_loss_per_char": 0.6833018064498901, "incorrect_loss_per_char": 0.6993657549222311, "correct_loss_per_token": 1.3666036128997803, "incorrect_loss_per_token": 1.3987315098444622, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3666036128997803, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.3666036128997803, "logits_per_char": -0.6833018064498901, "num_chars": 2}, {"sum_logits": -1.382413387298584, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.382413387298584, "logits_per_char": -0.691206693649292, "num_chars": 2}, {"sum_logits": -1.3698935508728027, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3698935508728027, "logits_per_char": -0.6849467754364014, "num_chars": 2}, {"sum_logits": -1.4438875913619995, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4438875913619995, "logits_per_char": -0.7219437956809998, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 267, "native_id": "Mercury_7247065", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4219071865081787, "incorrect_loss_raw": 1.3835180600484211, "correct_loss_per_char": 0.7109535932540894, "incorrect_loss_per_char": 0.6917590300242106, "correct_loss_per_token": 1.4219071865081787, "incorrect_loss_per_token": 1.3835180600484211, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3454071283340454, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.3454071283340454, "logits_per_char": -0.6727035641670227, "num_chars": 2}, {"sum_logits": -1.4587095975875854, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4587095975875854, "logits_per_char": -0.7293547987937927, "num_chars": 2}, {"sum_logits": -1.4219071865081787, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4219071865081787, "logits_per_char": -0.7109535932540894, "num_chars": 2}, {"sum_logits": -1.3464374542236328, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.3464374542236328, "logits_per_char": -0.6732187271118164, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 268, "native_id": "MDSA_2011_5_3", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.354581356048584, "incorrect_loss_raw": 1.4087856610616047, "correct_loss_per_char": 0.677290678024292, "incorrect_loss_per_char": 0.7043928305308024, "correct_loss_per_token": 1.354581356048584, "incorrect_loss_per_token": 1.4087856610616047, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5735362768173218, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.5735362768173218, "logits_per_char": -0.7867681384086609, "num_chars": 2}, {"sum_logits": -1.354581356048584, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.354581356048584, "logits_per_char": -0.677290678024292, "num_chars": 2}, {"sum_logits": -1.3671518564224243, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3671518564224243, "logits_per_char": -0.6835759282112122, "num_chars": 2}, {"sum_logits": -1.2856688499450684, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.2856688499450684, "logits_per_char": -0.6428344249725342, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 269, "native_id": "MDSA_2009_5_39", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4803264141082764, "incorrect_loss_raw": 1.3683154185612996, "correct_loss_per_char": 0.7401632070541382, "incorrect_loss_per_char": 0.6841577092806498, "correct_loss_per_token": 1.4803264141082764, "incorrect_loss_per_token": 1.3683154185612996, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5005810260772705, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": false, "logits_per_token": -1.5005810260772705, "logits_per_char": -0.7502905130386353, "num_chars": 2}, {"sum_logits": -1.242146372795105, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": true, "logits_per_token": -1.242146372795105, "logits_per_char": -0.6210731863975525, "num_chars": 2}, {"sum_logits": -1.4803264141082764, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": false, "logits_per_token": -1.4803264141082764, "logits_per_char": -0.7401632070541382, "num_chars": 2}, {"sum_logits": -1.3622188568115234, "num_tokens": 1, "num_tokens_all": 484, "is_greedy": false, "logits_per_token": -1.3622188568115234, "logits_per_char": -0.6811094284057617, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 270, "native_id": "Mercury_187198", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5218939781188965, "incorrect_loss_raw": 1.365072210629781, "correct_loss_per_char": 0.7609469890594482, "incorrect_loss_per_char": 0.6825361053148905, "correct_loss_per_token": 1.5218939781188965, "incorrect_loss_per_token": 1.365072210629781, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1287567615509033, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.1287567615509033, "logits_per_char": -0.5643783807754517, "num_chars": 2}, {"sum_logits": -1.4254401922225952, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4254401922225952, "logits_per_char": -0.7127200961112976, "num_chars": 2}, {"sum_logits": -1.5218939781188965, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.5218939781188965, "logits_per_char": -0.7609469890594482, "num_chars": 2}, {"sum_logits": -1.5410196781158447, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.5410196781158447, "logits_per_char": -0.7705098390579224, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 271, "native_id": "MCAS_2000_4_36", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7100732326507568, "incorrect_loss_raw": 1.3070290088653564, "correct_loss_per_char": 0.8550366163253784, "incorrect_loss_per_char": 0.6535145044326782, "correct_loss_per_token": 1.7100732326507568, "incorrect_loss_per_token": 1.3070290088653564, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7100732326507568, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.7100732326507568, "logits_per_char": -0.8550366163253784, "num_chars": 2}, {"sum_logits": -1.2635926008224487, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.2635926008224487, "logits_per_char": -0.6317963004112244, "num_chars": 2}, {"sum_logits": -1.335553526878357, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.335553526878357, "logits_per_char": -0.6677767634391785, "num_chars": 2}, {"sum_logits": -1.3219408988952637, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3219408988952637, "logits_per_char": -0.6609704494476318, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 272, "native_id": "Mercury_184100", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3641180992126465, "incorrect_loss_raw": 1.4007037083307903, "correct_loss_per_char": 0.6820590496063232, "incorrect_loss_per_char": 0.7003518541653951, "correct_loss_per_token": 1.3641180992126465, "incorrect_loss_per_token": 1.4007037083307903, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3726036548614502, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3726036548614502, "logits_per_char": -0.6863018274307251, "num_chars": 2}, {"sum_logits": -1.363970160484314, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.363970160484314, "logits_per_char": -0.681985080242157, "num_chars": 2}, {"sum_logits": -1.3641180992126465, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3641180992126465, "logits_per_char": -0.6820590496063232, "num_chars": 2}, {"sum_logits": -1.4655373096466064, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4655373096466064, "logits_per_char": -0.7327686548233032, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 273, "native_id": "Mercury_LBS10814", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3439356088638306, "incorrect_loss_raw": 1.406848390897115, "correct_loss_per_char": 0.6719678044319153, "incorrect_loss_per_char": 0.7034241954485575, "correct_loss_per_token": 1.3439356088638306, "incorrect_loss_per_token": 1.406848390897115, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4039915800094604, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4039915800094604, "logits_per_char": -0.7019957900047302, "num_chars": 2}, {"sum_logits": -1.3439356088638306, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.3439356088638306, "logits_per_char": -0.6719678044319153, "num_chars": 2}, {"sum_logits": -1.4436014890670776, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4436014890670776, "logits_per_char": -0.7218007445335388, "num_chars": 2}, {"sum_logits": -1.3729521036148071, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3729521036148071, "logits_per_char": -0.6864760518074036, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 274, "native_id": "Mercury_SC_408384", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3269457817077637, "incorrect_loss_raw": 1.4249674876530964, "correct_loss_per_char": 0.6634728908538818, "incorrect_loss_per_char": 0.7124837438265482, "correct_loss_per_token": 1.3269457817077637, "incorrect_loss_per_token": 1.4249674876530964, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2039666175842285, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.2039666175842285, "logits_per_char": -0.6019833087921143, "num_chars": 2}, {"sum_logits": -1.3269457817077637, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3269457817077637, "logits_per_char": -0.6634728908538818, "num_chars": 2}, {"sum_logits": -1.4961832761764526, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4961832761764526, "logits_per_char": -0.7480916380882263, "num_chars": 2}, {"sum_logits": -1.5747525691986084, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.5747525691986084, "logits_per_char": -0.7873762845993042, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 275, "native_id": "Mercury_7043068", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4040511846542358, "incorrect_loss_raw": 1.3936056693394978, "correct_loss_per_char": 0.7020255923271179, "incorrect_loss_per_char": 0.6968028346697489, "correct_loss_per_token": 1.4040511846542358, "incorrect_loss_per_token": 1.3936056693394978, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2897425889968872, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.2897425889968872, "logits_per_char": -0.6448712944984436, "num_chars": 2}, {"sum_logits": -1.32575523853302, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.32575523853302, "logits_per_char": -0.66287761926651, "num_chars": 2}, {"sum_logits": -1.5653191804885864, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.5653191804885864, "logits_per_char": -0.7826595902442932, "num_chars": 2}, {"sum_logits": -1.4040511846542358, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.4040511846542358, "logits_per_char": -0.7020255923271179, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 276, "native_id": "Mercury_411071", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3383034467697144, "incorrect_loss_raw": 1.4089682896931965, "correct_loss_per_char": 0.6691517233848572, "incorrect_loss_per_char": 0.7044841448465983, "correct_loss_per_token": 1.3383034467697144, "incorrect_loss_per_token": 1.4089682896931965, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3681180477142334, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3681180477142334, "logits_per_char": -0.6840590238571167, "num_chars": 2}, {"sum_logits": -1.3383034467697144, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.3383034467697144, "logits_per_char": -0.6691517233848572, "num_chars": 2}, {"sum_logits": -1.4211500883102417, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4211500883102417, "logits_per_char": -0.7105750441551208, "num_chars": 2}, {"sum_logits": -1.4376367330551147, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4376367330551147, "logits_per_char": -0.7188183665275574, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 277, "native_id": "NYSEDREGENTS_2010_4_24", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3933659791946411, "incorrect_loss_raw": 1.6623357931772869, "correct_loss_per_char": 0.6966829895973206, "incorrect_loss_per_char": 0.8311678965886434, "correct_loss_per_token": 1.3933659791946411, "incorrect_loss_per_token": 1.6623357931772869, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9329484701156616, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -0.9329484701156616, "logits_per_char": -0.4664742350578308, "num_chars": 2}, {"sum_logits": -1.3933659791946411, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3933659791946411, "logits_per_char": -0.6966829895973206, "num_chars": 2}, {"sum_logits": -1.8001216650009155, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.8001216650009155, "logits_per_char": -0.9000608325004578, "num_chars": 2}, {"sum_logits": -2.253937244415283, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -2.253937244415283, "logits_per_char": -1.1269686222076416, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 278, "native_id": "Mercury_SC_409673", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4210939407348633, "incorrect_loss_raw": 1.3806004126866658, "correct_loss_per_char": 0.7105469703674316, "incorrect_loss_per_char": 0.6903002063433329, "correct_loss_per_token": 1.4210939407348633, "incorrect_loss_per_token": 1.3806004126866658, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3448050022125244, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.3448050022125244, "logits_per_char": -0.6724025011062622, "num_chars": 2}, {"sum_logits": -1.368565320968628, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.368565320968628, "logits_per_char": -0.684282660484314, "num_chars": 2}, {"sum_logits": -1.4284309148788452, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4284309148788452, "logits_per_char": -0.7142154574394226, "num_chars": 2}, {"sum_logits": -1.4210939407348633, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4210939407348633, "logits_per_char": -0.7105469703674316, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 279, "native_id": "Mercury_SC_400374", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.325103521347046, "incorrect_loss_raw": 1.4177251259485881, "correct_loss_per_char": 0.662551760673523, "incorrect_loss_per_char": 0.7088625629742941, "correct_loss_per_token": 1.325103521347046, "incorrect_loss_per_token": 1.4177251259485881, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5476009845733643, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.5476009845733643, "logits_per_char": -0.7738004922866821, "num_chars": 2}, {"sum_logits": -1.325103521347046, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.325103521347046, "logits_per_char": -0.662551760673523, "num_chars": 2}, {"sum_logits": -1.4009463787078857, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4009463787078857, "logits_per_char": -0.7004731893539429, "num_chars": 2}, {"sum_logits": -1.3046280145645142, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.3046280145645142, "logits_per_char": -0.6523140072822571, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 280, "native_id": "CSZ_2009_8_CSZ20740", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4541833400726318, "incorrect_loss_raw": 1.3713759978612263, "correct_loss_per_char": 0.7270916700363159, "incorrect_loss_per_char": 0.6856879989306132, "correct_loss_per_token": 1.4541833400726318, "incorrect_loss_per_token": 1.3713759978612263, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3941736221313477, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3941736221313477, "logits_per_char": -0.6970868110656738, "num_chars": 2}, {"sum_logits": -1.4541833400726318, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4541833400726318, "logits_per_char": -0.7270916700363159, "num_chars": 2}, {"sum_logits": -1.326351523399353, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.326351523399353, "logits_per_char": -0.6631757616996765, "num_chars": 2}, {"sum_logits": -1.3936028480529785, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3936028480529785, "logits_per_char": -0.6968014240264893, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 281, "native_id": "Mercury_SC_406482", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2144801616668701, "incorrect_loss_raw": 1.4565807978312175, "correct_loss_per_char": 0.6072400808334351, "incorrect_loss_per_char": 0.7282903989156088, "correct_loss_per_token": 1.2144801616668701, "incorrect_loss_per_token": 1.4565807978312175, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4453574419021606, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4453574419021606, "logits_per_char": -0.7226787209510803, "num_chars": 2}, {"sum_logits": -1.444352626800537, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.444352626800537, "logits_per_char": -0.7221763134002686, "num_chars": 2}, {"sum_logits": -1.4800323247909546, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4800323247909546, "logits_per_char": -0.7400161623954773, "num_chars": 2}, {"sum_logits": -1.2144801616668701, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.2144801616668701, "logits_per_char": -0.6072400808334351, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 282, "native_id": "OHAT_2007_8_24", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3510513305664062, "incorrect_loss_raw": 1.4052395820617676, "correct_loss_per_char": 0.6755256652832031, "incorrect_loss_per_char": 0.7026197910308838, "correct_loss_per_token": 1.3510513305664062, "incorrect_loss_per_token": 1.4052395820617676, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3510513305664062, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3510513305664062, "logits_per_char": -0.6755256652832031, "num_chars": 2}, {"sum_logits": -1.3921877145767212, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3921877145767212, "logits_per_char": -0.6960938572883606, "num_chars": 2}, {"sum_logits": -1.4949390888214111, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4949390888214111, "logits_per_char": -0.7474695444107056, "num_chars": 2}, {"sum_logits": -1.3285919427871704, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.3285919427871704, "logits_per_char": -0.6642959713935852, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 283, "native_id": "Mercury_188335", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3270612955093384, "incorrect_loss_raw": 1.4122652212778728, "correct_loss_per_char": 0.6635306477546692, "incorrect_loss_per_char": 0.7061326106389364, "correct_loss_per_token": 1.3270612955093384, "incorrect_loss_per_token": 1.4122652212778728, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.39786696434021, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.39786696434021, "logits_per_char": -0.698933482170105, "num_chars": 2}, {"sum_logits": -1.3270612955093384, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.3270612955093384, "logits_per_char": -0.6635306477546692, "num_chars": 2}, {"sum_logits": -1.4366012811660767, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4366012811660767, "logits_per_char": -0.7183006405830383, "num_chars": 2}, {"sum_logits": -1.4023274183273315, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4023274183273315, "logits_per_char": -0.7011637091636658, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 284, "native_id": "Mercury_7128555", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4343738555908203, "incorrect_loss_raw": 1.3807628949483235, "correct_loss_per_char": 0.7171869277954102, "incorrect_loss_per_char": 0.6903814474741617, "correct_loss_per_token": 1.4343738555908203, "incorrect_loss_per_token": 1.3807628949483235, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4751659631729126, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.4751659631729126, "logits_per_char": -0.7375829815864563, "num_chars": 2}, {"sum_logits": -1.4343738555908203, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.4343738555908203, "logits_per_char": -0.7171869277954102, "num_chars": 2}, {"sum_logits": -1.4287792444229126, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.4287792444229126, "logits_per_char": -0.7143896222114563, "num_chars": 2}, {"sum_logits": -1.2383434772491455, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": true, "logits_per_token": -1.2383434772491455, "logits_per_char": -0.6191717386245728, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 285, "native_id": "Mercury_407517", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3137456178665161, "incorrect_loss_raw": 1.4231191078821819, "correct_loss_per_char": 0.6568728089332581, "incorrect_loss_per_char": 0.7115595539410909, "correct_loss_per_token": 1.3137456178665161, "incorrect_loss_per_token": 1.4231191078821819, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4574028253555298, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4574028253555298, "logits_per_char": -0.7287014126777649, "num_chars": 2}, {"sum_logits": -1.5242056846618652, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5242056846618652, "logits_per_char": -0.7621028423309326, "num_chars": 2}, {"sum_logits": -1.3137456178665161, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3137456178665161, "logits_per_char": -0.6568728089332581, "num_chars": 2}, {"sum_logits": -1.2877488136291504, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.2877488136291504, "logits_per_char": -0.6438744068145752, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 286, "native_id": "Mercury_405950", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4802418947219849, "incorrect_loss_raw": 1.3654497464497883, "correct_loss_per_char": 0.7401209473609924, "incorrect_loss_per_char": 0.6827248732248942, "correct_loss_per_token": 1.4802418947219849, "incorrect_loss_per_token": 1.3654497464497883, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3756080865859985, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3756080865859985, "logits_per_char": -0.6878040432929993, "num_chars": 2}, {"sum_logits": -1.253299593925476, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.253299593925476, "logits_per_char": -0.626649796962738, "num_chars": 2}, {"sum_logits": -1.4802418947219849, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4802418947219849, "logits_per_char": -0.7401209473609924, "num_chars": 2}, {"sum_logits": -1.4674415588378906, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4674415588378906, "logits_per_char": -0.7337207794189453, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 287, "native_id": "MCAS_2004_9_5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.418925166130066, "incorrect_loss_raw": 1.3818780183792114, "correct_loss_per_char": 0.709462583065033, "incorrect_loss_per_char": 0.6909390091896057, "correct_loss_per_token": 1.418925166130066, "incorrect_loss_per_token": 1.3818780183792114, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3050183057785034, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.3050183057785034, "logits_per_char": -0.6525091528892517, "num_chars": 2}, {"sum_logits": -1.464987874031067, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.464987874031067, "logits_per_char": -0.7324939370155334, "num_chars": 2}, {"sum_logits": -1.418925166130066, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.418925166130066, "logits_per_char": -0.709462583065033, "num_chars": 2}, {"sum_logits": -1.375627875328064, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.375627875328064, "logits_per_char": -0.687813937664032, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 288, "native_id": "NCEOGA_2013_8_28", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4003918170928955, "incorrect_loss_raw": 1.3911067247390747, "correct_loss_per_char": 0.7001959085464478, "incorrect_loss_per_char": 0.6955533623695374, "correct_loss_per_token": 1.4003918170928955, "incorrect_loss_per_token": 1.3911067247390747, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2954615354537964, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2954615354537964, "logits_per_char": -0.6477307677268982, "num_chars": 2}, {"sum_logits": -1.4003918170928955, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4003918170928955, "logits_per_char": -0.7001959085464478, "num_chars": 2}, {"sum_logits": -1.5289552211761475, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.5289552211761475, "logits_per_char": -0.7644776105880737, "num_chars": 2}, {"sum_logits": -1.3489034175872803, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3489034175872803, "logits_per_char": -0.6744517087936401, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 289, "native_id": "Mercury_SC_406451", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4359793663024902, "incorrect_loss_raw": 1.3777302900950115, "correct_loss_per_char": 0.7179896831512451, "incorrect_loss_per_char": 0.6888651450475057, "correct_loss_per_token": 1.4359793663024902, "incorrect_loss_per_token": 1.3777302900950115, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.450656533241272, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.450656533241272, "logits_per_char": -0.725328266620636, "num_chars": 2}, {"sum_logits": -1.3925176858901978, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.3925176858901978, "logits_per_char": -0.6962588429450989, "num_chars": 2}, {"sum_logits": -1.4359793663024902, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.4359793663024902, "logits_per_char": -0.7179896831512451, "num_chars": 2}, {"sum_logits": -1.2900166511535645, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": true, "logits_per_token": -1.2900166511535645, "logits_per_char": -0.6450083255767822, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 290, "native_id": "Mercury_7109323", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2104401588439941, "incorrect_loss_raw": 1.4582939545313518, "correct_loss_per_char": 0.6052200794219971, "incorrect_loss_per_char": 0.7291469772656759, "correct_loss_per_token": 1.2104401588439941, "incorrect_loss_per_token": 1.4582939545313518, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4485752582550049, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": false, "logits_per_token": -1.4485752582550049, "logits_per_char": -0.7242876291275024, "num_chars": 2}, {"sum_logits": -1.4314610958099365, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": false, "logits_per_token": -1.4314610958099365, "logits_per_char": -0.7157305479049683, "num_chars": 2}, {"sum_logits": -1.4948455095291138, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": false, "logits_per_token": -1.4948455095291138, "logits_per_char": -0.7474227547645569, "num_chars": 2}, {"sum_logits": -1.2104401588439941, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": true, "logits_per_token": -1.2104401588439941, "logits_per_char": -0.6052200794219971, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 291, "native_id": "Mercury_404132", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.170454502105713, "incorrect_loss_raw": 1.4785556395848591, "correct_loss_per_char": 0.5852272510528564, "incorrect_loss_per_char": 0.7392778197924296, "correct_loss_per_token": 1.170454502105713, "incorrect_loss_per_token": 1.4785556395848591, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.48358952999115, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.48358952999115, "logits_per_char": -0.741794764995575, "num_chars": 2}, {"sum_logits": -1.170454502105713, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -1.170454502105713, "logits_per_char": -0.5852272510528564, "num_chars": 2}, {"sum_logits": -1.3936654329299927, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.3936654329299927, "logits_per_char": -0.6968327164649963, "num_chars": 2}, {"sum_logits": -1.558411955833435, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.558411955833435, "logits_per_char": -0.7792059779167175, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 292, "native_id": "Mercury_7210210", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5159140825271606, "incorrect_loss_raw": 1.3547573884328206, "correct_loss_per_char": 0.7579570412635803, "incorrect_loss_per_char": 0.6773786942164103, "correct_loss_per_token": 1.5159140825271606, "incorrect_loss_per_token": 1.3547573884328206, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4546265602111816, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4546265602111816, "logits_per_char": -0.7273132801055908, "num_chars": 2}, {"sum_logits": -1.3060314655303955, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3060314655303955, "logits_per_char": -0.6530157327651978, "num_chars": 2}, {"sum_logits": -1.5159140825271606, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.5159140825271606, "logits_per_char": -0.7579570412635803, "num_chars": 2}, {"sum_logits": -1.3036141395568848, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.3036141395568848, "logits_per_char": -0.6518070697784424, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 293, "native_id": "Mercury_SC_408042", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2925693988800049, "incorrect_loss_raw": 1.425478498140971, "correct_loss_per_char": 0.6462846994400024, "incorrect_loss_per_char": 0.7127392490704855, "correct_loss_per_token": 1.2925693988800049, "incorrect_loss_per_token": 1.425478498140971, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4531983137130737, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4531983137130737, "logits_per_char": -0.7265991568565369, "num_chars": 2}, {"sum_logits": -1.2925693988800049, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.2925693988800049, "logits_per_char": -0.6462846994400024, "num_chars": 2}, {"sum_logits": -1.4169950485229492, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4169950485229492, "logits_per_char": -0.7084975242614746, "num_chars": 2}, {"sum_logits": -1.4062421321868896, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4062421321868896, "logits_per_char": -0.7031210660934448, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 294, "native_id": "MCAS_2004_8_14", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.398849606513977, "incorrect_loss_raw": 1.395344893137614, "correct_loss_per_char": 0.6994248032569885, "incorrect_loss_per_char": 0.697672446568807, "correct_loss_per_token": 1.398849606513977, "incorrect_loss_per_token": 1.395344893137614, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4704012870788574, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4704012870788574, "logits_per_char": -0.7352006435394287, "num_chars": 2}, {"sum_logits": -1.2209815979003906, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2209815979003906, "logits_per_char": -0.6104907989501953, "num_chars": 2}, {"sum_logits": -1.398849606513977, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.398849606513977, "logits_per_char": -0.6994248032569885, "num_chars": 2}, {"sum_logits": -1.4946517944335938, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4946517944335938, "logits_per_char": -0.7473258972167969, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 295, "native_id": "TIMSS_2011_4_pg5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3224554061889648, "incorrect_loss_raw": 1.4166535933812459, "correct_loss_per_char": 0.6612277030944824, "incorrect_loss_per_char": 0.7083267966906229, "correct_loss_per_token": 1.3224554061889648, "incorrect_loss_per_token": 1.4166535933812459, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.355844259262085, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.355844259262085, "logits_per_char": -0.6779221296310425, "num_chars": 2}, {"sum_logits": -1.3224554061889648, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.3224554061889648, "logits_per_char": -0.6612277030944824, "num_chars": 2}, {"sum_logits": -1.3912321329116821, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3912321329116821, "logits_per_char": -0.6956160664558411, "num_chars": 2}, {"sum_logits": -1.5028843879699707, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.5028843879699707, "logits_per_char": -0.7514421939849854, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 296, "native_id": "Mercury_SC_406833", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5665313005447388, "incorrect_loss_raw": 1.3461623986562092, "correct_loss_per_char": 0.7832656502723694, "incorrect_loss_per_char": 0.6730811993281046, "correct_loss_per_token": 1.5665313005447388, "incorrect_loss_per_token": 1.3461623986562092, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.211889624595642, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.211889624595642, "logits_per_char": -0.605944812297821, "num_chars": 2}, {"sum_logits": -1.3566575050354004, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3566575050354004, "logits_per_char": -0.6783287525177002, "num_chars": 2}, {"sum_logits": -1.5665313005447388, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5665313005447388, "logits_per_char": -0.7832656502723694, "num_chars": 2}, {"sum_logits": -1.4699400663375854, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4699400663375854, "logits_per_char": -0.7349700331687927, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 297, "native_id": "Mercury_7029558", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4743605852127075, "incorrect_loss_raw": 1.3672627607981365, "correct_loss_per_char": 0.7371802926063538, "incorrect_loss_per_char": 0.6836313803990682, "correct_loss_per_token": 1.4743605852127075, "incorrect_loss_per_token": 1.3672627607981365, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4918296337127686, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4918296337127686, "logits_per_char": -0.7459148168563843, "num_chars": 2}, {"sum_logits": -1.2834060192108154, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2834060192108154, "logits_per_char": -0.6417030096054077, "num_chars": 2}, {"sum_logits": -1.4743605852127075, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4743605852127075, "logits_per_char": -0.7371802926063538, "num_chars": 2}, {"sum_logits": -1.3265526294708252, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3265526294708252, "logits_per_char": -0.6632763147354126, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 298, "native_id": "Mercury_7138390", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3070076704025269, "incorrect_loss_raw": 1.4242717027664185, "correct_loss_per_char": 0.6535038352012634, "incorrect_loss_per_char": 0.7121358513832092, "correct_loss_per_token": 1.3070076704025269, "incorrect_loss_per_token": 1.4242717027664185, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4518808126449585, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.4518808126449585, "logits_per_char": -0.7259404063224792, "num_chars": 2}, {"sum_logits": -1.3070076704025269, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.3070076704025269, "logits_per_char": -0.6535038352012634, "num_chars": 2}, {"sum_logits": -1.5350446701049805, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.5350446701049805, "logits_per_char": -0.7675223350524902, "num_chars": 2}, {"sum_logits": -1.2858896255493164, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": true, "logits_per_token": -1.2858896255493164, "logits_per_char": -0.6429448127746582, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 299, "native_id": "MEAP_2005_5_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3791495561599731, "incorrect_loss_raw": 1.3939604759216309, "correct_loss_per_char": 0.6895747780799866, "incorrect_loss_per_char": 0.6969802379608154, "correct_loss_per_token": 1.3791495561599731, "incorrect_loss_per_token": 1.3939604759216309, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3791495561599731, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.3791495561599731, "logits_per_char": -0.6895747780799866, "num_chars": 2}, {"sum_logits": -1.352893352508545, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": true, "logits_per_token": -1.352893352508545, "logits_per_char": -0.6764466762542725, "num_chars": 2}, {"sum_logits": -1.446816086769104, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.446816086769104, "logits_per_char": -0.723408043384552, "num_chars": 2}, {"sum_logits": -1.3821719884872437, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.3821719884872437, "logits_per_char": -0.6910859942436218, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 300, "native_id": "MCAS_2000_4_30", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2006583213806152, "incorrect_loss_raw": 1.470135490099589, "correct_loss_per_char": 0.6003291606903076, "incorrect_loss_per_char": 0.7350677450497946, "correct_loss_per_token": 1.2006583213806152, "incorrect_loss_per_token": 1.470135490099589, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5994161367416382, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.5994161367416382, "logits_per_char": -0.7997080683708191, "num_chars": 2}, {"sum_logits": -1.440426230430603, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.440426230430603, "logits_per_char": -0.7202131152153015, "num_chars": 2}, {"sum_logits": -1.3705641031265259, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3705641031265259, "logits_per_char": -0.6852820515632629, "num_chars": 2}, {"sum_logits": -1.2006583213806152, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2006583213806152, "logits_per_char": -0.6003291606903076, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 301, "native_id": "MCAS_1998_4_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.501063585281372, "incorrect_loss_raw": 1.3557746410369873, "correct_loss_per_char": 0.750531792640686, "incorrect_loss_per_char": 0.6778873205184937, "correct_loss_per_token": 1.501063585281372, "incorrect_loss_per_token": 1.3557746410369873, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.378793478012085, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.378793478012085, "logits_per_char": -0.6893967390060425, "num_chars": 2}, {"sum_logits": -1.3323012590408325, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.3323012590408325, "logits_per_char": -0.6661506295204163, "num_chars": 2}, {"sum_logits": -1.501063585281372, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.501063585281372, "logits_per_char": -0.750531792640686, "num_chars": 2}, {"sum_logits": -1.3562291860580444, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3562291860580444, "logits_per_char": -0.6781145930290222, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 302, "native_id": "Mercury_175840", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.297824740409851, "incorrect_loss_raw": 1.4235442479451497, "correct_loss_per_char": 0.6489123702049255, "incorrect_loss_per_char": 0.7117721239725748, "correct_loss_per_token": 1.297824740409851, "incorrect_loss_per_token": 1.4235442479451497, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4204378128051758, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4204378128051758, "logits_per_char": -0.7102189064025879, "num_chars": 2}, {"sum_logits": -1.297824740409851, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.297824740409851, "logits_per_char": -0.6489123702049255, "num_chars": 2}, {"sum_logits": -1.4625325202941895, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4625325202941895, "logits_per_char": -0.7312662601470947, "num_chars": 2}, {"sum_logits": -1.387662410736084, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.387662410736084, "logits_per_char": -0.693831205368042, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 303, "native_id": "Mercury_7099190", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4749424457550049, "incorrect_loss_raw": 1.363608678181966, "correct_loss_per_char": 0.7374712228775024, "incorrect_loss_per_char": 0.681804339090983, "correct_loss_per_token": 1.4749424457550049, "incorrect_loss_per_token": 1.363608678181966, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.353632926940918, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.353632926940918, "logits_per_char": -0.676816463470459, "num_chars": 2}, {"sum_logits": -1.3174957036972046, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.3174957036972046, "logits_per_char": -0.6587478518486023, "num_chars": 2}, {"sum_logits": -1.4749424457550049, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4749424457550049, "logits_per_char": -0.7374712228775024, "num_chars": 2}, {"sum_logits": -1.4196974039077759, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4196974039077759, "logits_per_char": -0.7098487019538879, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 304, "native_id": "Mercury_SC_401605", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5015369653701782, "incorrect_loss_raw": 1.3592145442962646, "correct_loss_per_char": 0.7507684826850891, "incorrect_loss_per_char": 0.6796072721481323, "correct_loss_per_token": 1.5015369653701782, "incorrect_loss_per_token": 1.3592145442962646, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5015369653701782, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5015369653701782, "logits_per_char": -0.7507684826850891, "num_chars": 2}, {"sum_logits": -1.4203838109970093, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4203838109970093, "logits_per_char": -0.7101919054985046, "num_chars": 2}, {"sum_logits": -1.4127782583236694, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4127782583236694, "logits_per_char": -0.7063891291618347, "num_chars": 2}, {"sum_logits": -1.2444815635681152, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2444815635681152, "logits_per_char": -0.6222407817840576, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 305, "native_id": "TAKS_2009_5_36", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2846604585647583, "incorrect_loss_raw": 1.4416931867599487, "correct_loss_per_char": 0.6423302292823792, "incorrect_loss_per_char": 0.7208465933799744, "correct_loss_per_token": 1.2846604585647583, "incorrect_loss_per_token": 1.4416931867599487, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2846604585647583, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.2846604585647583, "logits_per_char": -0.6423302292823792, "num_chars": 2}, {"sum_logits": -1.257560133934021, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.257560133934021, "logits_per_char": -0.6287800669670105, "num_chars": 2}, {"sum_logits": -1.4228615760803223, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4228615760803223, "logits_per_char": -0.7114307880401611, "num_chars": 2}, {"sum_logits": -1.644657850265503, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.644657850265503, "logits_per_char": -0.8223289251327515, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 306, "native_id": "Mercury_7171570", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3002630472183228, "incorrect_loss_raw": 1.4234225749969482, "correct_loss_per_char": 0.6501315236091614, "incorrect_loss_per_char": 0.7117112874984741, "correct_loss_per_token": 1.3002630472183228, "incorrect_loss_per_token": 1.4234225749969482, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.441080093383789, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.441080093383789, "logits_per_char": -0.7205400466918945, "num_chars": 2}, {"sum_logits": -1.3756438493728638, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3756438493728638, "logits_per_char": -0.6878219246864319, "num_chars": 2}, {"sum_logits": -1.453543782234192, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.453543782234192, "logits_per_char": -0.726771891117096, "num_chars": 2}, {"sum_logits": -1.3002630472183228, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.3002630472183228, "logits_per_char": -0.6501315236091614, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 307, "native_id": "Mercury_SC_402057", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5356236696243286, "incorrect_loss_raw": 1.3461960951487224, "correct_loss_per_char": 0.7678118348121643, "incorrect_loss_per_char": 0.6730980475743612, "correct_loss_per_token": 1.5356236696243286, "incorrect_loss_per_token": 1.3461960951487224, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3732792139053345, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3732792139053345, "logits_per_char": -0.6866396069526672, "num_chars": 2}, {"sum_logits": -1.337807297706604, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.337807297706604, "logits_per_char": -0.668903648853302, "num_chars": 2}, {"sum_logits": -1.5356236696243286, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5356236696243286, "logits_per_char": -0.7678118348121643, "num_chars": 2}, {"sum_logits": -1.3275017738342285, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.3275017738342285, "logits_per_char": -0.6637508869171143, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 308, "native_id": "Mercury_SC_413628", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3735562562942505, "incorrect_loss_raw": 1.4018059968948364, "correct_loss_per_char": 0.6867781281471252, "incorrect_loss_per_char": 0.7009029984474182, "correct_loss_per_token": 1.3735562562942505, "incorrect_loss_per_token": 1.4018059968948364, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5149282217025757, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.5149282217025757, "logits_per_char": -0.7574641108512878, "num_chars": 2}, {"sum_logits": -1.4326552152633667, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4326552152633667, "logits_per_char": -0.7163276076316833, "num_chars": 2}, {"sum_logits": -1.3735562562942505, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3735562562942505, "logits_per_char": -0.6867781281471252, "num_chars": 2}, {"sum_logits": -1.257834553718567, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.257834553718567, "logits_per_char": -0.6289172768592834, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 309, "native_id": "Mercury_LBS10131", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4471443891525269, "incorrect_loss_raw": 1.3916441202163696, "correct_loss_per_char": 0.7235721945762634, "incorrect_loss_per_char": 0.6958220601081848, "correct_loss_per_token": 1.4471443891525269, "incorrect_loss_per_token": 1.3916441202163696, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5410842895507812, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5410842895507812, "logits_per_char": -0.7705421447753906, "num_chars": 2}, {"sum_logits": -1.5170692205429077, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5170692205429077, "logits_per_char": -0.7585346102714539, "num_chars": 2}, {"sum_logits": -1.4471443891525269, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4471443891525269, "logits_per_char": -0.7235721945762634, "num_chars": 2}, {"sum_logits": -1.11677885055542, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.11677885055542, "logits_per_char": -0.55838942527771, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 310, "native_id": "Mercury_7032428", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4463332891464233, "incorrect_loss_raw": 1.3790767192840576, "correct_loss_per_char": 0.7231666445732117, "incorrect_loss_per_char": 0.6895383596420288, "correct_loss_per_token": 1.4463332891464233, "incorrect_loss_per_token": 1.3790767192840576, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.403922438621521, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.403922438621521, "logits_per_char": -0.7019612193107605, "num_chars": 2}, {"sum_logits": -1.4463332891464233, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.4463332891464233, "logits_per_char": -0.7231666445732117, "num_chars": 2}, {"sum_logits": -1.5017389059066772, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.5017389059066772, "logits_per_char": -0.7508694529533386, "num_chars": 2}, {"sum_logits": -1.2315688133239746, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": true, "logits_per_token": -1.2315688133239746, "logits_per_char": -0.6157844066619873, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 311, "native_id": "Mercury_7025008", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.356605052947998, "incorrect_loss_raw": 1.4019068479537964, "correct_loss_per_char": 0.678302526473999, "incorrect_loss_per_char": 0.7009534239768982, "correct_loss_per_token": 1.356605052947998, "incorrect_loss_per_token": 1.4019068479537964, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.356605052947998, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.356605052947998, "logits_per_char": -0.678302526473999, "num_chars": 2}, {"sum_logits": -1.4018192291259766, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4018192291259766, "logits_per_char": -0.7009096145629883, "num_chars": 2}, {"sum_logits": -1.4299664497375488, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4299664497375488, "logits_per_char": -0.7149832248687744, "num_chars": 2}, {"sum_logits": -1.3739348649978638, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.3739348649978638, "logits_per_char": -0.6869674324989319, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 312, "native_id": "MEA_2011_8_19", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4134749174118042, "incorrect_loss_raw": 1.3848777612050374, "correct_loss_per_char": 0.7067374587059021, "incorrect_loss_per_char": 0.6924388806025187, "correct_loss_per_token": 1.4134749174118042, "incorrect_loss_per_token": 1.3848777612050374, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3576542139053345, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.3576542139053345, "logits_per_char": -0.6788271069526672, "num_chars": 2}, {"sum_logits": -1.431483507156372, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.431483507156372, "logits_per_char": -0.715741753578186, "num_chars": 2}, {"sum_logits": -1.3654955625534058, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3654955625534058, "logits_per_char": -0.6827477812767029, "num_chars": 2}, {"sum_logits": -1.4134749174118042, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4134749174118042, "logits_per_char": -0.7067374587059021, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 313, "native_id": "NYSEDREGENTS_2008_8_27", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.362648606300354, "incorrect_loss_raw": 1.4019462664922078, "correct_loss_per_char": 0.681324303150177, "incorrect_loss_per_char": 0.7009731332461039, "correct_loss_per_token": 1.362648606300354, "incorrect_loss_per_token": 1.4019462664922078, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4515231847763062, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4515231847763062, "logits_per_char": -0.7257615923881531, "num_chars": 2}, {"sum_logits": -1.379954218864441, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.379954218864441, "logits_per_char": -0.6899771094322205, "num_chars": 2}, {"sum_logits": -1.362648606300354, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.362648606300354, "logits_per_char": -0.681324303150177, "num_chars": 2}, {"sum_logits": -1.3743613958358765, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3743613958358765, "logits_per_char": -0.6871806979179382, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 314, "native_id": "VASoL_2007_5_22", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3822178840637207, "incorrect_loss_raw": 1.3946423530578613, "correct_loss_per_char": 0.6911089420318604, "incorrect_loss_per_char": 0.6973211765289307, "correct_loss_per_token": 1.3822178840637207, "incorrect_loss_per_token": 1.3946423530578613, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3476756811141968, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.3476756811141968, "logits_per_char": -0.6738378405570984, "num_chars": 2}, {"sum_logits": -1.3822178840637207, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3822178840637207, "logits_per_char": -0.6911089420318604, "num_chars": 2}, {"sum_logits": -1.4577956199645996, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4577956199645996, "logits_per_char": -0.7288978099822998, "num_chars": 2}, {"sum_logits": -1.3784557580947876, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3784557580947876, "logits_per_char": -0.6892278790473938, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 315, "native_id": "NCEOGA_2013_5_19", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4636110067367554, "incorrect_loss_raw": 1.3757407267888386, "correct_loss_per_char": 0.7318055033683777, "incorrect_loss_per_char": 0.6878703633944193, "correct_loss_per_token": 1.4636110067367554, "incorrect_loss_per_token": 1.3757407267888386, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4636110067367554, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4636110067367554, "logits_per_char": -0.7318055033683777, "num_chars": 2}, {"sum_logits": -1.4850075244903564, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4850075244903564, "logits_per_char": -0.7425037622451782, "num_chars": 2}, {"sum_logits": -1.4538342952728271, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4538342952728271, "logits_per_char": -0.7269171476364136, "num_chars": 2}, {"sum_logits": -1.1883803606033325, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.1883803606033325, "logits_per_char": -0.5941901803016663, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 316, "native_id": "Mercury_7037555", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.691770315170288, "incorrect_loss_raw": 1.3497637112935383, "correct_loss_per_char": 0.845885157585144, "incorrect_loss_per_char": 0.6748818556467692, "correct_loss_per_token": 1.691770315170288, "incorrect_loss_per_token": 1.3497637112935383, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0035135746002197, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.0035135746002197, "logits_per_char": -0.5017567873001099, "num_chars": 2}, {"sum_logits": -1.4081344604492188, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4081344604492188, "logits_per_char": -0.7040672302246094, "num_chars": 2}, {"sum_logits": -1.691770315170288, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.691770315170288, "logits_per_char": -0.845885157585144, "num_chars": 2}, {"sum_logits": -1.6376430988311768, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.6376430988311768, "logits_per_char": -0.8188215494155884, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 317, "native_id": "Mercury_402132", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3305960893630981, "incorrect_loss_raw": 1.414730151494344, "correct_loss_per_char": 0.6652980446815491, "incorrect_loss_per_char": 0.707365075747172, "correct_loss_per_token": 1.3305960893630981, "incorrect_loss_per_token": 1.414730151494344, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4410630464553833, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4410630464553833, "logits_per_char": -0.7205315232276917, "num_chars": 2}, {"sum_logits": -1.3305960893630981, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.3305960893630981, "logits_per_char": -0.6652980446815491, "num_chars": 2}, {"sum_logits": -1.4613970518112183, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4613970518112183, "logits_per_char": -0.7306985259056091, "num_chars": 2}, {"sum_logits": -1.3417303562164307, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.3417303562164307, "logits_per_char": -0.6708651781082153, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 318, "native_id": "MCAS_2006_8_24", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4169639348983765, "incorrect_loss_raw": 1.3836836417516072, "correct_loss_per_char": 0.7084819674491882, "incorrect_loss_per_char": 0.6918418208758036, "correct_loss_per_token": 1.4169639348983765, "incorrect_loss_per_token": 1.3836836417516072, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4169639348983765, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": false, "logits_per_token": -1.4169639348983765, "logits_per_char": -0.7084819674491882, "num_chars": 2}, {"sum_logits": -1.380142331123352, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": false, "logits_per_token": -1.380142331123352, "logits_per_char": -0.690071165561676, "num_chars": 2}, {"sum_logits": -1.4727212190628052, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": false, "logits_per_token": -1.4727212190628052, "logits_per_char": -0.7363606095314026, "num_chars": 2}, {"sum_logits": -1.2981873750686646, "num_tokens": 1, "num_tokens_all": 436, "is_greedy": true, "logits_per_token": -1.2981873750686646, "logits_per_char": -0.6490936875343323, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 319, "native_id": "Mercury_7128923", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3724944591522217, "incorrect_loss_raw": 1.399419864018758, "correct_loss_per_char": 0.6862472295761108, "incorrect_loss_per_char": 0.699709932009379, "correct_loss_per_token": 1.3724944591522217, "incorrect_loss_per_token": 1.399419864018758, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4377143383026123, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4377143383026123, "logits_per_char": -0.7188571691513062, "num_chars": 2}, {"sum_logits": -1.3724944591522217, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3724944591522217, "logits_per_char": -0.6862472295761108, "num_chars": 2}, {"sum_logits": -1.2971103191375732, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.2971103191375732, "logits_per_char": -0.6485551595687866, "num_chars": 2}, {"sum_logits": -1.4634349346160889, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4634349346160889, "logits_per_char": -0.7317174673080444, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 320, "native_id": "Mercury_416379", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.662718653678894, "incorrect_loss_raw": 1.3203074932098389, "correct_loss_per_char": 0.831359326839447, "incorrect_loss_per_char": 0.6601537466049194, "correct_loss_per_token": 1.662718653678894, "incorrect_loss_per_token": 1.3203074932098389, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.662718653678894, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": false, "logits_per_token": -1.662718653678894, "logits_per_char": -0.831359326839447, "num_chars": 2}, {"sum_logits": -1.4285873174667358, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": false, "logits_per_token": -1.4285873174667358, "logits_per_char": -0.7142936587333679, "num_chars": 2}, {"sum_logits": -1.3801149129867554, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": false, "logits_per_token": -1.3801149129867554, "logits_per_char": -0.6900574564933777, "num_chars": 2}, {"sum_logits": -1.1522202491760254, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": true, "logits_per_token": -1.1522202491760254, "logits_per_char": -0.5761101245880127, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 321, "native_id": "Mercury_7168053", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.354127287864685, "incorrect_loss_raw": 1.4174127578735352, "correct_loss_per_char": 0.6770636439323425, "incorrect_loss_per_char": 0.7087063789367676, "correct_loss_per_token": 1.354127287864685, "incorrect_loss_per_token": 1.4174127578735352, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6443232297897339, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.6443232297897339, "logits_per_char": -0.8221616148948669, "num_chars": 2}, {"sum_logits": -1.3876265287399292, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3876265287399292, "logits_per_char": -0.6938132643699646, "num_chars": 2}, {"sum_logits": -1.354127287864685, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.354127287864685, "logits_per_char": -0.6770636439323425, "num_chars": 2}, {"sum_logits": -1.2202885150909424, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.2202885150909424, "logits_per_char": -0.6101442575454712, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 322, "native_id": "AKDE&ED_2008_8_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4584360122680664, "incorrect_loss_raw": 1.369257887204488, "correct_loss_per_char": 0.7292180061340332, "incorrect_loss_per_char": 0.684628943602244, "correct_loss_per_token": 1.4584360122680664, "incorrect_loss_per_token": 1.369257887204488, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4584360122680664, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4584360122680664, "logits_per_char": -0.7292180061340332, "num_chars": 2}, {"sum_logits": -1.4106793403625488, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4106793403625488, "logits_per_char": -0.7053396701812744, "num_chars": 2}, {"sum_logits": -1.369030475616455, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.369030475616455, "logits_per_char": -0.6845152378082275, "num_chars": 2}, {"sum_logits": -1.3280638456344604, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.3280638456344604, "logits_per_char": -0.6640319228172302, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 323, "native_id": "Mercury_SC_415476", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4535125494003296, "incorrect_loss_raw": 1.3782221873601277, "correct_loss_per_char": 0.7267562747001648, "incorrect_loss_per_char": 0.6891110936800638, "correct_loss_per_token": 1.4535125494003296, "incorrect_loss_per_token": 1.3782221873601277, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4956481456756592, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4956481456756592, "logits_per_char": -0.7478240728378296, "num_chars": 2}, {"sum_logits": -1.4535125494003296, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4535125494003296, "logits_per_char": -0.7267562747001648, "num_chars": 2}, {"sum_logits": -1.430526614189148, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.430526614189148, "logits_per_char": -0.715263307094574, "num_chars": 2}, {"sum_logits": -1.2084918022155762, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.2084918022155762, "logits_per_char": -0.6042459011077881, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 324, "native_id": "Mercury_7106960", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.489714503288269, "incorrect_loss_raw": 1.361682931582133, "correct_loss_per_char": 0.7448572516441345, "incorrect_loss_per_char": 0.6808414657910665, "correct_loss_per_token": 1.489714503288269, "incorrect_loss_per_token": 1.361682931582133, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.489714503288269, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.489714503288269, "logits_per_char": -0.7448572516441345, "num_chars": 2}, {"sum_logits": -1.3803880214691162, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3803880214691162, "logits_per_char": -0.6901940107345581, "num_chars": 2}, {"sum_logits": -1.2960444688796997, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2960444688796997, "logits_per_char": -0.6480222344398499, "num_chars": 2}, {"sum_logits": -1.408616304397583, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.408616304397583, "logits_per_char": -0.7043081521987915, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 325, "native_id": "Mercury_7160563", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4570869207382202, "incorrect_loss_raw": 1.376042087872823, "correct_loss_per_char": 0.7285434603691101, "incorrect_loss_per_char": 0.6880210439364115, "correct_loss_per_token": 1.4570869207382202, "incorrect_loss_per_token": 1.376042087872823, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.530307412147522, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.530307412147522, "logits_per_char": -0.765153706073761, "num_chars": 2}, {"sum_logits": -1.3423196077346802, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.3423196077346802, "logits_per_char": -0.6711598038673401, "num_chars": 2}, {"sum_logits": -1.4570869207382202, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.4570869207382202, "logits_per_char": -0.7285434603691101, "num_chars": 2}, {"sum_logits": -1.255499243736267, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": true, "logits_per_token": -1.255499243736267, "logits_per_char": -0.6277496218681335, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 326, "native_id": "Mercury_7068583", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3483797311782837, "incorrect_loss_raw": 1.4057770172754924, "correct_loss_per_char": 0.6741898655891418, "incorrect_loss_per_char": 0.7028885086377462, "correct_loss_per_token": 1.3483797311782837, "incorrect_loss_per_token": 1.4057770172754924, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3528162240982056, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3528162240982056, "logits_per_char": -0.6764081120491028, "num_chars": 2}, {"sum_logits": -1.3483797311782837, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.3483797311782837, "logits_per_char": -0.6741898655891418, "num_chars": 2}, {"sum_logits": -1.4111864566802979, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4111864566802979, "logits_per_char": -0.7055932283401489, "num_chars": 2}, {"sum_logits": -1.4533283710479736, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4533283710479736, "logits_per_char": -0.7266641855239868, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 327, "native_id": "Mercury_404638", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.466570258140564, "incorrect_loss_raw": 1.3690215746561687, "correct_loss_per_char": 0.733285129070282, "incorrect_loss_per_char": 0.6845107873280843, "correct_loss_per_token": 1.466570258140564, "incorrect_loss_per_token": 1.3690215746561687, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2984482049942017, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2984482049942017, "logits_per_char": -0.6492241024971008, "num_chars": 2}, {"sum_logits": -1.3977402448654175, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3977402448654175, "logits_per_char": -0.6988701224327087, "num_chars": 2}, {"sum_logits": -1.4108762741088867, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4108762741088867, "logits_per_char": -0.7054381370544434, "num_chars": 2}, {"sum_logits": -1.466570258140564, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.466570258140564, "logits_per_char": -0.733285129070282, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 328, "native_id": "Mercury_SC_407138", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3932703733444214, "incorrect_loss_raw": 1.3910451332728069, "correct_loss_per_char": 0.6966351866722107, "incorrect_loss_per_char": 0.6955225666364034, "correct_loss_per_token": 1.3932703733444214, "incorrect_loss_per_token": 1.3910451332728069, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3932703733444214, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3932703733444214, "logits_per_char": -0.6966351866722107, "num_chars": 2}, {"sum_logits": -1.4656943082809448, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4656943082809448, "logits_per_char": -0.7328471541404724, "num_chars": 2}, {"sum_logits": -1.3503432273864746, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.3503432273864746, "logits_per_char": -0.6751716136932373, "num_chars": 2}, {"sum_logits": -1.357097864151001, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.357097864151001, "logits_per_char": -0.6785489320755005, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 329, "native_id": "MCAS_2000_4_10", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3824260234832764, "incorrect_loss_raw": 1.3967159191767375, "correct_loss_per_char": 0.6912130117416382, "incorrect_loss_per_char": 0.6983579595883688, "correct_loss_per_token": 1.3824260234832764, "incorrect_loss_per_token": 1.3967159191767375, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5144786834716797, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5144786834716797, "logits_per_char": -0.7572393417358398, "num_chars": 2}, {"sum_logits": -1.3838239908218384, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3838239908218384, "logits_per_char": -0.6919119954109192, "num_chars": 2}, {"sum_logits": -1.2918450832366943, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2918450832366943, "logits_per_char": -0.6459225416183472, "num_chars": 2}, {"sum_logits": -1.3824260234832764, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3824260234832764, "logits_per_char": -0.6912130117416382, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 330, "native_id": "Mercury_177748", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4987684488296509, "incorrect_loss_raw": 1.360957384109497, "correct_loss_per_char": 0.7493842244148254, "incorrect_loss_per_char": 0.6804786920547485, "correct_loss_per_token": 1.4987684488296509, "incorrect_loss_per_token": 1.360957384109497, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3777990341186523, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3777990341186523, "logits_per_char": -0.6888995170593262, "num_chars": 2}, {"sum_logits": -1.264444351196289, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.264444351196289, "logits_per_char": -0.6322221755981445, "num_chars": 2}, {"sum_logits": -1.4406287670135498, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4406287670135498, "logits_per_char": -0.7203143835067749, "num_chars": 2}, {"sum_logits": -1.4987684488296509, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4987684488296509, "logits_per_char": -0.7493842244148254, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 331, "native_id": "MCAS_2004_9_21-v1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4361225366592407, "incorrect_loss_raw": 1.376543641090393, "correct_loss_per_char": 0.7180612683296204, "incorrect_loss_per_char": 0.6882718205451965, "correct_loss_per_token": 1.4361225366592407, "incorrect_loss_per_token": 1.376543641090393, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3925890922546387, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": false, "logits_per_token": -1.3925890922546387, "logits_per_char": -0.6962945461273193, "num_chars": 2}, {"sum_logits": -1.4217126369476318, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": false, "logits_per_token": -1.4217126369476318, "logits_per_char": -0.7108563184738159, "num_chars": 2}, {"sum_logits": -1.4361225366592407, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": false, "logits_per_token": -1.4361225366592407, "logits_per_char": -0.7180612683296204, "num_chars": 2}, {"sum_logits": -1.3153291940689087, "num_tokens": 1, "num_tokens_all": 432, "is_greedy": true, "logits_per_token": -1.3153291940689087, "logits_per_char": -0.6576645970344543, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 332, "native_id": "MDSA_2007_5_16", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6170620918273926, "incorrect_loss_raw": 1.3301146427790325, "correct_loss_per_char": 0.8085310459136963, "incorrect_loss_per_char": 0.6650573213895162, "correct_loss_per_token": 1.6170620918273926, "incorrect_loss_per_token": 1.3301146427790325, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6170620918273926, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.6170620918273926, "logits_per_char": -0.8085310459136963, "num_chars": 2}, {"sum_logits": -1.3676615953445435, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.3676615953445435, "logits_per_char": -0.6838307976722717, "num_chars": 2}, {"sum_logits": -1.426582932472229, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.426582932472229, "logits_per_char": -0.7132914662361145, "num_chars": 2}, {"sum_logits": -1.1960994005203247, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.1960994005203247, "logits_per_char": -0.5980497002601624, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 333, "native_id": "Mercury_401763", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2746505737304688, "incorrect_loss_raw": 1.4384490251541138, "correct_loss_per_char": 0.6373252868652344, "incorrect_loss_per_char": 0.7192245125770569, "correct_loss_per_token": 1.2746505737304688, "incorrect_loss_per_token": 1.4384490251541138, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4941997528076172, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.4941997528076172, "logits_per_char": -0.7470998764038086, "num_chars": 2}, {"sum_logits": -1.2746505737304688, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.2746505737304688, "logits_per_char": -0.6373252868652344, "num_chars": 2}, {"sum_logits": -1.5395474433898926, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.5395474433898926, "logits_per_char": -0.7697737216949463, "num_chars": 2}, {"sum_logits": -1.2815998792648315, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.2815998792648315, "logits_per_char": -0.6407999396324158, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 334, "native_id": "Mercury_7268118", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4907481670379639, "incorrect_loss_raw": 1.363470991452535, "correct_loss_per_char": 0.7453740835189819, "incorrect_loss_per_char": 0.6817354957262675, "correct_loss_per_token": 1.4907481670379639, "incorrect_loss_per_token": 1.363470991452535, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2556313276290894, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.2556313276290894, "logits_per_char": -0.6278156638145447, "num_chars": 2}, {"sum_logits": -1.4749162197113037, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4749162197113037, "logits_per_char": -0.7374581098556519, "num_chars": 2}, {"sum_logits": -1.4907481670379639, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4907481670379639, "logits_per_char": -0.7453740835189819, "num_chars": 2}, {"sum_logits": -1.359865427017212, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.359865427017212, "logits_per_char": -0.679932713508606, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 335, "native_id": "Mercury_403232", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3352943658828735, "incorrect_loss_raw": 1.409029205640157, "correct_loss_per_char": 0.6676471829414368, "incorrect_loss_per_char": 0.7045146028200785, "correct_loss_per_token": 1.3352943658828735, "incorrect_loss_per_token": 1.409029205640157, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3670291900634766, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3670291900634766, "logits_per_char": -0.6835145950317383, "num_chars": 2}, {"sum_logits": -1.4681739807128906, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4681739807128906, "logits_per_char": -0.7340869903564453, "num_chars": 2}, {"sum_logits": -1.391884446144104, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.391884446144104, "logits_per_char": -0.695942223072052, "num_chars": 2}, {"sum_logits": -1.3352943658828735, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.3352943658828735, "logits_per_char": -0.6676471829414368, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 336, "native_id": "Mercury_415081", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.435314416885376, "incorrect_loss_raw": 1.3773947556813557, "correct_loss_per_char": 0.717657208442688, "incorrect_loss_per_char": 0.6886973778406779, "correct_loss_per_token": 1.435314416885376, "incorrect_loss_per_token": 1.3773947556813557, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.435314416885376, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.435314416885376, "logits_per_char": -0.717657208442688, "num_chars": 2}, {"sum_logits": -1.4599406719207764, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4599406719207764, "logits_per_char": -0.7299703359603882, "num_chars": 2}, {"sum_logits": -1.3551157712936401, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3551157712936401, "logits_per_char": -0.6775578856468201, "num_chars": 2}, {"sum_logits": -1.3171278238296509, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.3171278238296509, "logits_per_char": -0.6585639119148254, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 337, "native_id": "Mercury_7206378", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.478442668914795, "incorrect_loss_raw": 1.3670728206634521, "correct_loss_per_char": 0.7392213344573975, "incorrect_loss_per_char": 0.6835364103317261, "correct_loss_per_token": 1.478442668914795, "incorrect_loss_per_token": 1.3670728206634521, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5089000463485718, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.5089000463485718, "logits_per_char": -0.7544500231742859, "num_chars": 2}, {"sum_logits": -1.478442668914795, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.478442668914795, "logits_per_char": -0.7392213344573975, "num_chars": 2}, {"sum_logits": -1.3140089511871338, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3140089511871338, "logits_per_char": -0.6570044755935669, "num_chars": 2}, {"sum_logits": -1.2783094644546509, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.2783094644546509, "logits_per_char": -0.6391547322273254, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 338, "native_id": "CSZ30169", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6006050109863281, "incorrect_loss_raw": 1.3312562704086304, "correct_loss_per_char": 0.8003025054931641, "incorrect_loss_per_char": 0.6656281352043152, "correct_loss_per_token": 1.6006050109863281, "incorrect_loss_per_token": 1.3312562704086304, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6006050109863281, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.6006050109863281, "logits_per_char": -0.8003025054931641, "num_chars": 2}, {"sum_logits": -1.4511336088180542, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4511336088180542, "logits_per_char": -0.7255668044090271, "num_chars": 2}, {"sum_logits": -1.2625082731246948, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.2625082731246948, "logits_per_char": -0.6312541365623474, "num_chars": 2}, {"sum_logits": -1.280126929283142, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.280126929283142, "logits_per_char": -0.640063464641571, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 339, "native_id": "Mercury_7013948", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2471007108688354, "incorrect_loss_raw": 1.4423418839772542, "correct_loss_per_char": 0.6235503554344177, "incorrect_loss_per_char": 0.7211709419886271, "correct_loss_per_token": 1.2471007108688354, "incorrect_loss_per_token": 1.4423418839772542, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.464695692062378, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.464695692062378, "logits_per_char": -0.732347846031189, "num_chars": 2}, {"sum_logits": -1.4151966571807861, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4151966571807861, "logits_per_char": -0.7075983285903931, "num_chars": 2}, {"sum_logits": -1.4471333026885986, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4471333026885986, "logits_per_char": -0.7235666513442993, "num_chars": 2}, {"sum_logits": -1.2471007108688354, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.2471007108688354, "logits_per_char": -0.6235503554344177, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 340, "native_id": "Mercury_SC_402164", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.364334225654602, "incorrect_loss_raw": 1.405395229657491, "correct_loss_per_char": 0.682167112827301, "incorrect_loss_per_char": 0.7026976148287455, "correct_loss_per_token": 1.364334225654602, "incorrect_loss_per_token": 1.405395229657491, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.507222056388855, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.507222056388855, "logits_per_char": -0.7536110281944275, "num_chars": 2}, {"sum_logits": -1.4484412670135498, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4484412670135498, "logits_per_char": -0.7242206335067749, "num_chars": 2}, {"sum_logits": -1.364334225654602, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.364334225654602, "logits_per_char": -0.682167112827301, "num_chars": 2}, {"sum_logits": -1.2605223655700684, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.2605223655700684, "logits_per_char": -0.6302611827850342, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 341, "native_id": "Mercury_400880", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3793460130691528, "incorrect_loss_raw": 1.407673676808675, "correct_loss_per_char": 0.6896730065345764, "incorrect_loss_per_char": 0.7038368384043375, "correct_loss_per_token": 1.3793460130691528, "incorrect_loss_per_token": 1.407673676808675, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5503079891204834, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.5503079891204834, "logits_per_char": -0.7751539945602417, "num_chars": 2}, {"sum_logits": -1.213858723640442, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.213858723640442, "logits_per_char": -0.606929361820221, "num_chars": 2}, {"sum_logits": -1.3793460130691528, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3793460130691528, "logits_per_char": -0.6896730065345764, "num_chars": 2}, {"sum_logits": -1.4588543176651, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4588543176651, "logits_per_char": -0.72942715883255, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 342, "native_id": "Mercury_7040793", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.28156316280365, "incorrect_loss_raw": 1.4289910793304443, "correct_loss_per_char": 0.640781581401825, "incorrect_loss_per_char": 0.7144955396652222, "correct_loss_per_token": 1.28156316280365, "incorrect_loss_per_token": 1.4289910793304443, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4526472091674805, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4526472091674805, "logits_per_char": -0.7263236045837402, "num_chars": 2}, {"sum_logits": -1.28156316280365, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.28156316280365, "logits_per_char": -0.640781581401825, "num_chars": 2}, {"sum_logits": -1.4153846502304077, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4153846502304077, "logits_per_char": -0.7076923251152039, "num_chars": 2}, {"sum_logits": -1.4189413785934448, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4189413785934448, "logits_per_char": -0.7094706892967224, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 343, "native_id": "MDSA_2010_5_29", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3934003114700317, "incorrect_loss_raw": 1.3900381724039714, "correct_loss_per_char": 0.6967001557350159, "incorrect_loss_per_char": 0.6950190862019857, "correct_loss_per_token": 1.3934003114700317, "incorrect_loss_per_token": 1.3900381724039714, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.457426905632019, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.457426905632019, "logits_per_char": -0.7287134528160095, "num_chars": 2}, {"sum_logits": -1.3446125984191895, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": true, "logits_per_token": -1.3446125984191895, "logits_per_char": -0.6723062992095947, "num_chars": 2}, {"sum_logits": -1.3680750131607056, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.3680750131607056, "logits_per_char": -0.6840375065803528, "num_chars": 2}, {"sum_logits": -1.3934003114700317, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.3934003114700317, "logits_per_char": -0.6967001557350159, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 344, "native_id": "LEAP__8_10365", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4153382778167725, "incorrect_loss_raw": 1.3868857224782307, "correct_loss_per_char": 0.7076691389083862, "incorrect_loss_per_char": 0.6934428612391154, "correct_loss_per_token": 1.4153382778167725, "incorrect_loss_per_token": 1.3868857224782307, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4153382778167725, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.4153382778167725, "logits_per_char": -0.7076691389083862, "num_chars": 2}, {"sum_logits": -1.3715200424194336, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.3715200424194336, "logits_per_char": -0.6857600212097168, "num_chars": 2}, {"sum_logits": -1.4239113330841064, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.4239113330841064, "logits_per_char": -0.7119556665420532, "num_chars": 2}, {"sum_logits": -1.3652257919311523, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": true, "logits_per_token": -1.3652257919311523, "logits_per_char": -0.6826128959655762, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 345, "native_id": "Mercury_SC_401295", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.415574312210083, "incorrect_loss_raw": 1.3862082560857136, "correct_loss_per_char": 0.7077871561050415, "incorrect_loss_per_char": 0.6931041280428568, "correct_loss_per_token": 1.415574312210083, "incorrect_loss_per_token": 1.3862082560857136, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5107394456863403, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.5107394456863403, "logits_per_char": -0.7553697228431702, "num_chars": 2}, {"sum_logits": -1.415574312210083, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.415574312210083, "logits_per_char": -0.7077871561050415, "num_chars": 2}, {"sum_logits": -1.3760603666305542, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.3760603666305542, "logits_per_char": -0.6880301833152771, "num_chars": 2}, {"sum_logits": -1.2718249559402466, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": true, "logits_per_token": -1.2718249559402466, "logits_per_char": -0.6359124779701233, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 346, "native_id": "MCAS_2012_5_23625", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4475133419036865, "incorrect_loss_raw": 1.373354395230611, "correct_loss_per_char": 0.7237566709518433, "incorrect_loss_per_char": 0.6866771976153055, "correct_loss_per_token": 1.4475133419036865, "incorrect_loss_per_token": 1.373354395230611, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3643146753311157, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3643146753311157, "logits_per_char": -0.6821573376655579, "num_chars": 2}, {"sum_logits": -1.4297947883605957, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4297947883605957, "logits_per_char": -0.7148973941802979, "num_chars": 2}, {"sum_logits": -1.4475133419036865, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4475133419036865, "logits_per_char": -0.7237566709518433, "num_chars": 2}, {"sum_logits": -1.325953722000122, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.325953722000122, "logits_per_char": -0.662976861000061, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 347, "native_id": "Mercury_7268048", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.49446702003479, "incorrect_loss_raw": 1.3667775789896648, "correct_loss_per_char": 0.747233510017395, "incorrect_loss_per_char": 0.6833887894948324, "correct_loss_per_token": 1.49446702003479, "incorrect_loss_per_token": 1.3667775789896648, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2210156917572021, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.2210156917572021, "logits_per_char": -0.6105078458786011, "num_chars": 2}, {"sum_logits": -1.3402060270309448, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3402060270309448, "logits_per_char": -0.6701030135154724, "num_chars": 2}, {"sum_logits": -1.49446702003479, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.49446702003479, "logits_per_char": -0.747233510017395, "num_chars": 2}, {"sum_logits": -1.5391110181808472, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.5391110181808472, "logits_per_char": -0.7695555090904236, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 348, "native_id": "Mercury_SC_402629", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5806384086608887, "incorrect_loss_raw": 1.3346993923187256, "correct_loss_per_char": 0.7903192043304443, "incorrect_loss_per_char": 0.6673496961593628, "correct_loss_per_token": 1.5806384086608887, "incorrect_loss_per_token": 1.3346993923187256, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5806384086608887, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.5806384086608887, "logits_per_char": -0.7903192043304443, "num_chars": 2}, {"sum_logits": -1.3065489530563354, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.3065489530563354, "logits_per_char": -0.6532744765281677, "num_chars": 2}, {"sum_logits": -1.3774749040603638, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3774749040603638, "logits_per_char": -0.6887374520301819, "num_chars": 2}, {"sum_logits": -1.3200743198394775, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3200743198394775, "logits_per_char": -0.6600371599197388, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 349, "native_id": "NCEOGA_2013_8_42", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5508246421813965, "incorrect_loss_raw": 1.3461824258168538, "correct_loss_per_char": 0.7754123210906982, "incorrect_loss_per_char": 0.6730912129084269, "correct_loss_per_token": 1.5508246421813965, "incorrect_loss_per_token": 1.3461824258168538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3463410139083862, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3463410139083862, "logits_per_char": -0.6731705069541931, "num_chars": 2}, {"sum_logits": -1.2293847799301147, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.2293847799301147, "logits_per_char": -0.6146923899650574, "num_chars": 2}, {"sum_logits": -1.4628214836120605, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4628214836120605, "logits_per_char": -0.7314107418060303, "num_chars": 2}, {"sum_logits": -1.5508246421813965, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.5508246421813965, "logits_per_char": -0.7754123210906982, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 350, "native_id": "Mercury_412463", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3242805004119873, "incorrect_loss_raw": 1.4527894655863445, "correct_loss_per_char": 0.6621402502059937, "incorrect_loss_per_char": 0.7263947327931722, "correct_loss_per_token": 1.3242805004119873, "incorrect_loss_per_token": 1.4527894655863445, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5737308263778687, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5737308263778687, "logits_per_char": -0.7868654131889343, "num_chars": 2}, {"sum_logits": -1.6825977563858032, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.6825977563858032, "logits_per_char": -0.8412988781929016, "num_chars": 2}, {"sum_logits": -1.1020398139953613, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.1020398139953613, "logits_per_char": -0.5510199069976807, "num_chars": 2}, {"sum_logits": -1.3242805004119873, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3242805004119873, "logits_per_char": -0.6621402502059937, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 351, "native_id": "Mercury_409295", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3642680644989014, "incorrect_loss_raw": 1.4037978649139404, "correct_loss_per_char": 0.6821340322494507, "incorrect_loss_per_char": 0.7018989324569702, "correct_loss_per_token": 1.3642680644989014, "incorrect_loss_per_token": 1.4037978649139404, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4357177019119263, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4357177019119263, "logits_per_char": -0.7178588509559631, "num_chars": 2}, {"sum_logits": -1.3642680644989014, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.3642680644989014, "logits_per_char": -0.6821340322494507, "num_chars": 2}, {"sum_logits": -1.5070072412490845, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.5070072412490845, "logits_per_char": -0.7535036206245422, "num_chars": 2}, {"sum_logits": -1.2686686515808105, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.2686686515808105, "logits_per_char": -0.6343343257904053, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 352, "native_id": "Mercury_404609", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2735692262649536, "incorrect_loss_raw": 1.432835300763448, "correct_loss_per_char": 0.6367846131324768, "incorrect_loss_per_char": 0.716417650381724, "correct_loss_per_token": 1.2735692262649536, "incorrect_loss_per_token": 1.432835300763448, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4661649465560913, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4661649465560913, "logits_per_char": -0.7330824732780457, "num_chars": 2}, {"sum_logits": -1.4085075855255127, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4085075855255127, "logits_per_char": -0.7042537927627563, "num_chars": 2}, {"sum_logits": -1.4238333702087402, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4238333702087402, "logits_per_char": -0.7119166851043701, "num_chars": 2}, {"sum_logits": -1.2735692262649536, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.2735692262649536, "logits_per_char": -0.6367846131324768, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 353, "native_id": "Mercury_7230090", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.423350214958191, "incorrect_loss_raw": 1.3831796248753865, "correct_loss_per_char": 0.7116751074790955, "incorrect_loss_per_char": 0.6915898124376932, "correct_loss_per_token": 1.423350214958191, "incorrect_loss_per_token": 1.3831796248753865, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.49775230884552, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.49775230884552, "logits_per_char": -0.74887615442276, "num_chars": 2}, {"sum_logits": -1.319123387336731, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.319123387336731, "logits_per_char": -0.6595616936683655, "num_chars": 2}, {"sum_logits": -1.423350214958191, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.423350214958191, "logits_per_char": -0.7116751074790955, "num_chars": 2}, {"sum_logits": -1.3326631784439087, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3326631784439087, "logits_per_char": -0.6663315892219543, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 354, "native_id": "Mercury_7057488", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3187556266784668, "incorrect_loss_raw": 1.4161628882090251, "correct_loss_per_char": 0.6593778133392334, "incorrect_loss_per_char": 0.7080814441045126, "correct_loss_per_token": 1.3187556266784668, "incorrect_loss_per_token": 1.4161628882090251, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.379732608795166, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.379732608795166, "logits_per_char": -0.689866304397583, "num_chars": 2}, {"sum_logits": -1.4437673091888428, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4437673091888428, "logits_per_char": -0.7218836545944214, "num_chars": 2}, {"sum_logits": -1.4249887466430664, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4249887466430664, "logits_per_char": -0.7124943733215332, "num_chars": 2}, {"sum_logits": -1.3187556266784668, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.3187556266784668, "logits_per_char": -0.6593778133392334, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 355, "native_id": "MDSA_2009_4_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.358313798904419, "incorrect_loss_raw": 1.4038731654485066, "correct_loss_per_char": 0.6791568994522095, "incorrect_loss_per_char": 0.7019365827242533, "correct_loss_per_token": 1.358313798904419, "incorrect_loss_per_token": 1.4038731654485066, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.403752088546753, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.403752088546753, "logits_per_char": -0.7018760442733765, "num_chars": 2}, {"sum_logits": -1.3407435417175293, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.3407435417175293, "logits_per_char": -0.6703717708587646, "num_chars": 2}, {"sum_logits": -1.4671238660812378, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4671238660812378, "logits_per_char": -0.7335619330406189, "num_chars": 2}, {"sum_logits": -1.358313798904419, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.358313798904419, "logits_per_char": -0.6791568994522095, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 356, "native_id": "Mercury_7150728", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4569162130355835, "incorrect_loss_raw": 1.3722227414449055, "correct_loss_per_char": 0.7284581065177917, "incorrect_loss_per_char": 0.6861113707224528, "correct_loss_per_token": 1.4569162130355835, "incorrect_loss_per_token": 1.3722227414449055, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4569162130355835, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.4569162130355835, "logits_per_char": -0.7284581065177917, "num_chars": 2}, {"sum_logits": -1.2922053337097168, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": true, "logits_per_token": -1.2922053337097168, "logits_per_char": -0.6461026668548584, "num_chars": 2}, {"sum_logits": -1.4712262153625488, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.4712262153625488, "logits_per_char": -0.7356131076812744, "num_chars": 2}, {"sum_logits": -1.3532366752624512, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.3532366752624512, "logits_per_char": -0.6766183376312256, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 357, "native_id": "Mercury_402207", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4693291187286377, "incorrect_loss_raw": 1.3722504774729412, "correct_loss_per_char": 0.7346645593643188, "incorrect_loss_per_char": 0.6861252387364706, "correct_loss_per_token": 1.4693291187286377, "incorrect_loss_per_token": 1.3722504774729412, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4961154460906982, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.4961154460906982, "logits_per_char": -0.7480577230453491, "num_chars": 2}, {"sum_logits": -1.4693291187286377, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.4693291187286377, "logits_per_char": -0.7346645593643188, "num_chars": 2}, {"sum_logits": -1.409226894378662, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.409226894378662, "logits_per_char": -0.704613447189331, "num_chars": 2}, {"sum_logits": -1.211409091949463, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": true, "logits_per_token": -1.211409091949463, "logits_per_char": -0.6057045459747314, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 358, "native_id": "Mercury_411732", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2665818929672241, "incorrect_loss_raw": 1.449637492497762, "correct_loss_per_char": 0.6332909464836121, "incorrect_loss_per_char": 0.724818746248881, "correct_loss_per_token": 1.2665818929672241, "incorrect_loss_per_token": 1.449637492497762, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3445634841918945, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3445634841918945, "logits_per_char": -0.6722817420959473, "num_chars": 2}, {"sum_logits": -1.5392231941223145, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.5392231941223145, "logits_per_char": -0.7696115970611572, "num_chars": 2}, {"sum_logits": -1.4651257991790771, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4651257991790771, "logits_per_char": -0.7325628995895386, "num_chars": 2}, {"sum_logits": -1.2665818929672241, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.2665818929672241, "logits_per_char": -0.6332909464836121, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 359, "native_id": "Mercury_7270113", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4175840616226196, "incorrect_loss_raw": 1.3823047876358032, "correct_loss_per_char": 0.7087920308113098, "incorrect_loss_per_char": 0.6911523938179016, "correct_loss_per_token": 1.4175840616226196, "incorrect_loss_per_token": 1.3823047876358032, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4070557355880737, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4070557355880737, "logits_per_char": -0.7035278677940369, "num_chars": 2}, {"sum_logits": -1.420246958732605, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.420246958732605, "logits_per_char": -0.7101234793663025, "num_chars": 2}, {"sum_logits": -1.4175840616226196, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4175840616226196, "logits_per_char": -0.7087920308113098, "num_chars": 2}, {"sum_logits": -1.319611668586731, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.319611668586731, "logits_per_char": -0.6598058342933655, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 360, "native_id": "AKDE&ED_2008_8_3", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4558402299880981, "incorrect_loss_raw": 1.3701834281285603, "correct_loss_per_char": 0.7279201149940491, "incorrect_loss_per_char": 0.6850917140642802, "correct_loss_per_token": 1.4558402299880981, "incorrect_loss_per_token": 1.3701834281285603, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4508236646652222, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.4508236646652222, "logits_per_char": -0.7254118323326111, "num_chars": 2}, {"sum_logits": -1.4558402299880981, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.4558402299880981, "logits_per_char": -0.7279201149940491, "num_chars": 2}, {"sum_logits": -1.3529129028320312, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.3529129028320312, "logits_per_char": -0.6764564514160156, "num_chars": 2}, {"sum_logits": -1.3068137168884277, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -1.3068137168884277, "logits_per_char": -0.6534068584442139, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 361, "native_id": "MCAS_1999_8_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4132622480392456, "incorrect_loss_raw": 1.3833613793055217, "correct_loss_per_char": 0.7066311240196228, "incorrect_loss_per_char": 0.6916806896527609, "correct_loss_per_token": 1.4132622480392456, "incorrect_loss_per_token": 1.3833613793055217, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4132622480392456, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4132622480392456, "logits_per_char": -0.7066311240196228, "num_chars": 2}, {"sum_logits": -1.4099206924438477, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4099206924438477, "logits_per_char": -0.7049603462219238, "num_chars": 2}, {"sum_logits": -1.433404803276062, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.433404803276062, "logits_per_char": -0.716702401638031, "num_chars": 2}, {"sum_logits": -1.3067586421966553, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.3067586421966553, "logits_per_char": -0.6533793210983276, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 362, "native_id": "NYSEDREGENTS_2015_4_24", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5393829345703125, "incorrect_loss_raw": 1.3467131455739338, "correct_loss_per_char": 0.7696914672851562, "incorrect_loss_per_char": 0.6733565727869669, "correct_loss_per_token": 1.5393829345703125, "incorrect_loss_per_token": 1.3467131455739338, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3032982349395752, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.3032982349395752, "logits_per_char": -0.6516491174697876, "num_chars": 2}, {"sum_logits": -1.4283496141433716, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4283496141433716, "logits_per_char": -0.7141748070716858, "num_chars": 2}, {"sum_logits": -1.5393829345703125, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5393829345703125, "logits_per_char": -0.7696914672851562, "num_chars": 2}, {"sum_logits": -1.308491587638855, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.308491587638855, "logits_per_char": -0.6542457938194275, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 363, "native_id": "Mercury_7122640", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5282503366470337, "incorrect_loss_raw": 1.3483237822850545, "correct_loss_per_char": 0.7641251683235168, "incorrect_loss_per_char": 0.6741618911425272, "correct_loss_per_token": 1.5282503366470337, "incorrect_loss_per_token": 1.3483237822850545, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5282503366470337, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5282503366470337, "logits_per_char": -0.7641251683235168, "num_chars": 2}, {"sum_logits": -1.3736969232559204, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3736969232559204, "logits_per_char": -0.6868484616279602, "num_chars": 2}, {"sum_logits": -1.3447301387786865, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3447301387786865, "logits_per_char": -0.6723650693893433, "num_chars": 2}, {"sum_logits": -1.3265442848205566, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.3265442848205566, "logits_per_char": -0.6632721424102783, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 364, "native_id": "Mercury_402547", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6544387340545654, "incorrect_loss_raw": 1.3534642457962036, "correct_loss_per_char": 0.8272193670272827, "incorrect_loss_per_char": 0.6767321228981018, "correct_loss_per_token": 1.6544387340545654, "incorrect_loss_per_token": 1.3534642457962036, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6670262813568115, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.6670262813568115, "logits_per_char": -0.8335131406784058, "num_chars": 2}, {"sum_logits": -1.3640339374542236, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.3640339374542236, "logits_per_char": -0.6820169687271118, "num_chars": 2}, {"sum_logits": -1.6544387340545654, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.6544387340545654, "logits_per_char": -0.8272193670272827, "num_chars": 2}, {"sum_logits": -1.0293325185775757, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": true, "logits_per_token": -1.0293325185775757, "logits_per_char": -0.5146662592887878, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 365, "native_id": "Mercury_7133945", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3763624429702759, "incorrect_loss_raw": 1.402591625849406, "correct_loss_per_char": 0.6881812214851379, "incorrect_loss_per_char": 0.701295812924703, "correct_loss_per_token": 1.3763624429702759, "incorrect_loss_per_token": 1.402591625849406, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2380280494689941, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.2380280494689941, "logits_per_char": -0.6190140247344971, "num_chars": 2}, {"sum_logits": -1.3763624429702759, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3763624429702759, "logits_per_char": -0.6881812214851379, "num_chars": 2}, {"sum_logits": -1.4629267454147339, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4629267454147339, "logits_per_char": -0.7314633727073669, "num_chars": 2}, {"sum_logits": -1.5068200826644897, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5068200826644897, "logits_per_char": -0.7534100413322449, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 366, "native_id": "Mercury_7199028", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.532031536102295, "incorrect_loss_raw": 1.3574970563252766, "correct_loss_per_char": 0.7660157680511475, "incorrect_loss_per_char": 0.6787485281626383, "correct_loss_per_token": 1.532031536102295, "incorrect_loss_per_token": 1.3574970563252766, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5285624265670776, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.5285624265670776, "logits_per_char": -0.7642812132835388, "num_chars": 2}, {"sum_logits": -1.532031536102295, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.532031536102295, "logits_per_char": -0.7660157680511475, "num_chars": 2}, {"sum_logits": -1.3771727085113525, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3771727085113525, "logits_per_char": -0.6885863542556763, "num_chars": 2}, {"sum_logits": -1.1667560338974, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.1667560338974, "logits_per_char": -0.5833780169487, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 367, "native_id": "Mercury_7217298", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.238671064376831, "incorrect_loss_raw": 1.4582188129425049, "correct_loss_per_char": 0.6193355321884155, "incorrect_loss_per_char": 0.7291094064712524, "correct_loss_per_token": 1.238671064376831, "incorrect_loss_per_token": 1.4582188129425049, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.238671064376831, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.238671064376831, "logits_per_char": -0.6193355321884155, "num_chars": 2}, {"sum_logits": -1.2854231595993042, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.2854231595993042, "logits_per_char": -0.6427115797996521, "num_chars": 2}, {"sum_logits": -1.59799063205719, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.59799063205719, "logits_per_char": -0.798995316028595, "num_chars": 2}, {"sum_logits": -1.4912426471710205, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4912426471710205, "logits_per_char": -0.7456213235855103, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 368, "native_id": "Mercury_7057680", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6147128343582153, "incorrect_loss_raw": 1.333491325378418, "correct_loss_per_char": 0.8073564171791077, "incorrect_loss_per_char": 0.666745662689209, "correct_loss_per_token": 1.6147128343582153, "incorrect_loss_per_token": 1.333491325378418, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1710609197616577, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.1710609197616577, "logits_per_char": -0.5855304598808289, "num_chars": 2}, {"sum_logits": -1.3757834434509277, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3757834434509277, "logits_per_char": -0.6878917217254639, "num_chars": 2}, {"sum_logits": -1.6147128343582153, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.6147128343582153, "logits_per_char": -0.8073564171791077, "num_chars": 2}, {"sum_logits": -1.4536296129226685, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4536296129226685, "logits_per_char": -0.7268148064613342, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 369, "native_id": "Mercury_SC_400404", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3132234811782837, "incorrect_loss_raw": 1.4197429418563843, "correct_loss_per_char": 0.6566117405891418, "incorrect_loss_per_char": 0.7098714709281921, "correct_loss_per_token": 1.3132234811782837, "incorrect_loss_per_token": 1.4197429418563843, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4913592338562012, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4913592338562012, "logits_per_char": -0.7456796169281006, "num_chars": 2}, {"sum_logits": -1.4317107200622559, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4317107200622559, "logits_per_char": -0.7158553600311279, "num_chars": 2}, {"sum_logits": -1.3361588716506958, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3361588716506958, "logits_per_char": -0.6680794358253479, "num_chars": 2}, {"sum_logits": -1.3132234811782837, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.3132234811782837, "logits_per_char": -0.6566117405891418, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 370, "native_id": "Mercury_SC_408030", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3429734706878662, "incorrect_loss_raw": 1.4096782207489014, "correct_loss_per_char": 0.6714867353439331, "incorrect_loss_per_char": 0.7048391103744507, "correct_loss_per_token": 1.3429734706878662, "incorrect_loss_per_token": 1.4096782207489014, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3429734706878662, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3429734706878662, "logits_per_char": -0.6714867353439331, "num_chars": 2}, {"sum_logits": -1.454898476600647, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.454898476600647, "logits_per_char": -0.7274492383003235, "num_chars": 2}, {"sum_logits": -1.4657987356185913, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4657987356185913, "logits_per_char": -0.7328993678092957, "num_chars": 2}, {"sum_logits": -1.3083374500274658, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.3083374500274658, "logits_per_char": -0.6541687250137329, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 371, "native_id": "Mercury_415083", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4575482606887817, "incorrect_loss_raw": 1.3724518616994221, "correct_loss_per_char": 0.7287741303443909, "incorrect_loss_per_char": 0.6862259308497111, "correct_loss_per_token": 1.4575482606887817, "incorrect_loss_per_token": 1.3724518616994221, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4852502346038818, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4852502346038818, "logits_per_char": -0.7426251173019409, "num_chars": 2}, {"sum_logits": -1.4575482606887817, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4575482606887817, "logits_per_char": -0.7287741303443909, "num_chars": 2}, {"sum_logits": -1.3570648431777954, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3570648431777954, "logits_per_char": -0.6785324215888977, "num_chars": 2}, {"sum_logits": -1.2750405073165894, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2750405073165894, "logits_per_char": -0.6375202536582947, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 372, "native_id": "Mercury_409114", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4054514169692993, "incorrect_loss_raw": 1.3847055037816365, "correct_loss_per_char": 0.7027257084846497, "incorrect_loss_per_char": 0.6923527518908182, "correct_loss_per_token": 1.4054514169692993, "incorrect_loss_per_token": 1.3847055037816365, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3628169298171997, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.3628169298171997, "logits_per_char": -0.6814084649085999, "num_chars": 2}, {"sum_logits": -1.4054514169692993, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4054514169692993, "logits_per_char": -0.7027257084846497, "num_chars": 2}, {"sum_logits": -1.3974392414093018, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3974392414093018, "logits_per_char": -0.6987196207046509, "num_chars": 2}, {"sum_logits": -1.3938603401184082, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3938603401184082, "logits_per_char": -0.6969301700592041, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 373, "native_id": "Mercury_SC_415006", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3637096881866455, "incorrect_loss_raw": 1.3997428019841511, "correct_loss_per_char": 0.6818548440933228, "incorrect_loss_per_char": 0.6998714009920756, "correct_loss_per_token": 1.3637096881866455, "incorrect_loss_per_token": 1.3997428019841511, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3637096881866455, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.3637096881866455, "logits_per_char": -0.6818548440933228, "num_chars": 2}, {"sum_logits": -1.4386118650436401, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4386118650436401, "logits_per_char": -0.7193059325218201, "num_chars": 2}, {"sum_logits": -1.3689998388290405, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3689998388290405, "logits_per_char": -0.6844999194145203, "num_chars": 2}, {"sum_logits": -1.391616702079773, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.391616702079773, "logits_per_char": -0.6958083510398865, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 374, "native_id": "MSA_2012_5_15", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4498399496078491, "incorrect_loss_raw": 1.3728591203689575, "correct_loss_per_char": 0.7249199748039246, "incorrect_loss_per_char": 0.6864295601844788, "correct_loss_per_token": 1.4498399496078491, "incorrect_loss_per_token": 1.3728591203689575, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4393941164016724, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4393941164016724, "logits_per_char": -0.7196970582008362, "num_chars": 2}, {"sum_logits": -1.3959290981292725, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.3959290981292725, "logits_per_char": -0.6979645490646362, "num_chars": 2}, {"sum_logits": -1.4498399496078491, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": false, "logits_per_token": -1.4498399496078491, "logits_per_char": -0.7249199748039246, "num_chars": 2}, {"sum_logits": -1.2832541465759277, "num_tokens": 1, "num_tokens_all": 467, "is_greedy": true, "logits_per_token": -1.2832541465759277, "logits_per_char": -0.6416270732879639, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 375, "native_id": "Mercury_SC_402612", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3426895141601562, "incorrect_loss_raw": 1.4089750448862712, "correct_loss_per_char": 0.6713447570800781, "incorrect_loss_per_char": 0.7044875224431356, "correct_loss_per_token": 1.3426895141601562, "incorrect_loss_per_token": 1.4089750448862712, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3369321823120117, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.3369321823120117, "logits_per_char": -0.6684660911560059, "num_chars": 2}, {"sum_logits": -1.4368562698364258, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4368562698364258, "logits_per_char": -0.7184281349182129, "num_chars": 2}, {"sum_logits": -1.453136682510376, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.453136682510376, "logits_per_char": -0.726568341255188, "num_chars": 2}, {"sum_logits": -1.3426895141601562, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3426895141601562, "logits_per_char": -0.6713447570800781, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 376, "native_id": "Mercury_SC_405937", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3783953189849854, "incorrect_loss_raw": 1.403989553451538, "correct_loss_per_char": 0.6891976594924927, "incorrect_loss_per_char": 0.701994776725769, "correct_loss_per_token": 1.3783953189849854, "incorrect_loss_per_token": 1.403989553451538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2140859365463257, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2140859365463257, "logits_per_char": -0.6070429682731628, "num_chars": 2}, {"sum_logits": -1.3783953189849854, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3783953189849854, "logits_per_char": -0.6891976594924927, "num_chars": 2}, {"sum_logits": -1.5143601894378662, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5143601894378662, "logits_per_char": -0.7571800947189331, "num_chars": 2}, {"sum_logits": -1.4835225343704224, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4835225343704224, "logits_per_char": -0.7417612671852112, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 377, "native_id": "Mercury_SC_416459", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4116272926330566, "incorrect_loss_raw": 1.3894087076187134, "correct_loss_per_char": 0.7058136463165283, "incorrect_loss_per_char": 0.6947043538093567, "correct_loss_per_token": 1.4116272926330566, "incorrect_loss_per_token": 1.3894087076187134, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3674355745315552, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3674355745315552, "logits_per_char": -0.6837177872657776, "num_chars": 2}, {"sum_logits": -1.2998323440551758, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2998323440551758, "logits_per_char": -0.6499161720275879, "num_chars": 2}, {"sum_logits": -1.4116272926330566, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4116272926330566, "logits_per_char": -0.7058136463165283, "num_chars": 2}, {"sum_logits": -1.5009582042694092, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5009582042694092, "logits_per_char": -0.7504791021347046, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 378, "native_id": "NAEP_2000_8_S21+4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.463555097579956, "incorrect_loss_raw": 1.3708227475484211, "correct_loss_per_char": 0.731777548789978, "incorrect_loss_per_char": 0.6854113737742106, "correct_loss_per_token": 1.463555097579956, "incorrect_loss_per_token": 1.3708227475484211, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.413788914680481, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.413788914680481, "logits_per_char": -0.7068944573402405, "num_chars": 2}, {"sum_logits": -1.4104206562042236, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4104206562042236, "logits_per_char": -0.7052103281021118, "num_chars": 2}, {"sum_logits": -1.463555097579956, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.463555097579956, "logits_per_char": -0.731777548789978, "num_chars": 2}, {"sum_logits": -1.288258671760559, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.288258671760559, "logits_per_char": -0.6441293358802795, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 379, "native_id": "Mercury_7072380", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3187646865844727, "incorrect_loss_raw": 1.4207587639490764, "correct_loss_per_char": 0.6593823432922363, "incorrect_loss_per_char": 0.7103793819745382, "correct_loss_per_token": 1.3187646865844727, "incorrect_loss_per_token": 1.4207587639490764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.271968126296997, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.271968126296997, "logits_per_char": -0.6359840631484985, "num_chars": 2}, {"sum_logits": -1.3187646865844727, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3187646865844727, "logits_per_char": -0.6593823432922363, "num_chars": 2}, {"sum_logits": -1.4723349809646606, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4723349809646606, "logits_per_char": -0.7361674904823303, "num_chars": 2}, {"sum_logits": -1.5179731845855713, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5179731845855713, "logits_per_char": -0.7589865922927856, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 380, "native_id": "Mercury_SC_401373", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.413540244102478, "incorrect_loss_raw": 1.3854044675827026, "correct_loss_per_char": 0.706770122051239, "incorrect_loss_per_char": 0.6927022337913513, "correct_loss_per_token": 1.413540244102478, "incorrect_loss_per_token": 1.3854044675827026, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.413540244102478, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.413540244102478, "logits_per_char": -0.706770122051239, "num_chars": 2}, {"sum_logits": -1.4350959062576294, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4350959062576294, "logits_per_char": -0.7175479531288147, "num_chars": 2}, {"sum_logits": -1.421356439590454, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.421356439590454, "logits_per_char": -0.710678219795227, "num_chars": 2}, {"sum_logits": -1.2997610569000244, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.2997610569000244, "logits_per_char": -0.6498805284500122, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 381, "native_id": "Mercury_SC_400579", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5014604330062866, "incorrect_loss_raw": 1.3567941586176555, "correct_loss_per_char": 0.7507302165031433, "incorrect_loss_per_char": 0.6783970793088278, "correct_loss_per_token": 1.5014604330062866, "incorrect_loss_per_token": 1.3567941586176555, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3062621355056763, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.3062621355056763, "logits_per_char": -0.6531310677528381, "num_chars": 2}, {"sum_logits": -1.3873100280761719, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3873100280761719, "logits_per_char": -0.6936550140380859, "num_chars": 2}, {"sum_logits": -1.5014604330062866, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.5014604330062866, "logits_per_char": -0.7507302165031433, "num_chars": 2}, {"sum_logits": -1.3768103122711182, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3768103122711182, "logits_per_char": -0.6884051561355591, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 382, "native_id": "MCAS_2003_5_14", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4434291124343872, "incorrect_loss_raw": 1.3765615622202556, "correct_loss_per_char": 0.7217145562171936, "incorrect_loss_per_char": 0.6882807811101278, "correct_loss_per_token": 1.4434291124343872, "incorrect_loss_per_token": 1.3765615622202556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.324385643005371, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.324385643005371, "logits_per_char": -0.6621928215026855, "num_chars": 2}, {"sum_logits": -1.4802634716033936, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4802634716033936, "logits_per_char": -0.7401317358016968, "num_chars": 2}, {"sum_logits": -1.4434291124343872, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4434291124343872, "logits_per_char": -0.7217145562171936, "num_chars": 2}, {"sum_logits": -1.325035572052002, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.325035572052002, "logits_per_char": -0.662517786026001, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 383, "native_id": "MSA_2015_8_30", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5195473432540894, "incorrect_loss_raw": 1.35353422164917, "correct_loss_per_char": 0.7597736716270447, "incorrect_loss_per_char": 0.676767110824585, "correct_loss_per_token": 1.5195473432540894, "incorrect_loss_per_token": 1.35353422164917, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5195473432540894, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.5195473432540894, "logits_per_char": -0.7597736716270447, "num_chars": 2}, {"sum_logits": -1.4325401782989502, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4325401782989502, "logits_per_char": -0.7162700891494751, "num_chars": 2}, {"sum_logits": -1.3573070764541626, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3573070764541626, "logits_per_char": -0.6786535382270813, "num_chars": 2}, {"sum_logits": -1.270755410194397, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.270755410194397, "logits_per_char": -0.6353777050971985, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 384, "native_id": "Mercury_SC_415416", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4773128032684326, "incorrect_loss_raw": 1.3660314480463664, "correct_loss_per_char": 0.7386564016342163, "incorrect_loss_per_char": 0.6830157240231832, "correct_loss_per_token": 1.4773128032684326, "incorrect_loss_per_token": 1.3660314480463664, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4279941320419312, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4279941320419312, "logits_per_char": -0.7139970660209656, "num_chars": 2}, {"sum_logits": -1.3076400756835938, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.3076400756835938, "logits_per_char": -0.6538200378417969, "num_chars": 2}, {"sum_logits": -1.4773128032684326, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4773128032684326, "logits_per_char": -0.7386564016342163, "num_chars": 2}, {"sum_logits": -1.3624601364135742, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3624601364135742, "logits_per_char": -0.6812300682067871, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 385, "native_id": "NYSEDREGENTS_2012_8_42", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2177298069000244, "incorrect_loss_raw": 1.266363501548767, "correct_loss_per_char": 0.6088649034500122, "incorrect_loss_per_char": 0.6331817507743835, "correct_loss_per_token": 1.2177298069000244, "incorrect_loss_per_token": 1.266363501548767, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1932809352874756, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.1932809352874756, "logits_per_char": -0.5966404676437378, "num_chars": 2}, {"sum_logits": -1.2177298069000244, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.2177298069000244, "logits_per_char": -0.6088649034500122, "num_chars": 2}, {"sum_logits": -1.3394460678100586, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3394460678100586, "logits_per_char": -0.6697230339050293, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 386, "native_id": "NCEOGA_2013_5_9", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4181509017944336, "incorrect_loss_raw": 1.3853804270426433, "correct_loss_per_char": 0.7090754508972168, "incorrect_loss_per_char": 0.6926902135213217, "correct_loss_per_token": 1.4181509017944336, "incorrect_loss_per_token": 1.3853804270426433, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.45106840133667, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.45106840133667, "logits_per_char": -0.725534200668335, "num_chars": 2}, {"sum_logits": -1.4181509017944336, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4181509017944336, "logits_per_char": -0.7090754508972168, "num_chars": 2}, {"sum_logits": -1.4388012886047363, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4388012886047363, "logits_per_char": -0.7194006443023682, "num_chars": 2}, {"sum_logits": -1.2662715911865234, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.2662715911865234, "logits_per_char": -0.6331357955932617, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 387, "native_id": "MEAP_2005_8_45", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.200282096862793, "incorrect_loss_raw": 1.464713215827942, "correct_loss_per_char": 0.6001410484313965, "incorrect_loss_per_char": 0.732356607913971, "correct_loss_per_token": 1.200282096862793, "incorrect_loss_per_token": 1.464713215827942, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5057963132858276, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.5057963132858276, "logits_per_char": -0.7528981566429138, "num_chars": 2}, {"sum_logits": -1.4789190292358398, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.4789190292358398, "logits_per_char": -0.7394595146179199, "num_chars": 2}, {"sum_logits": -1.4094243049621582, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.4094243049621582, "logits_per_char": -0.7047121524810791, "num_chars": 2}, {"sum_logits": -1.200282096862793, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": true, "logits_per_token": -1.200282096862793, "logits_per_char": -0.6001410484313965, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 388, "native_id": "Mercury_SC_400594", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.425966739654541, "incorrect_loss_raw": 1.3789668480555217, "correct_loss_per_char": 0.7129833698272705, "incorrect_loss_per_char": 0.6894834240277609, "correct_loss_per_token": 1.425966739654541, "incorrect_loss_per_token": 1.3789668480555217, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.364316701889038, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.364316701889038, "logits_per_char": -0.682158350944519, "num_chars": 2}, {"sum_logits": -1.368320107460022, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.368320107460022, "logits_per_char": -0.684160053730011, "num_chars": 2}, {"sum_logits": -1.4042637348175049, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4042637348175049, "logits_per_char": -0.7021318674087524, "num_chars": 2}, {"sum_logits": -1.425966739654541, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.425966739654541, "logits_per_char": -0.7129833698272705, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 389, "native_id": "NCEOGA_2013_8_43", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4833922386169434, "incorrect_loss_raw": 1.3610233465830486, "correct_loss_per_char": 0.7416961193084717, "incorrect_loss_per_char": 0.6805116732915243, "correct_loss_per_token": 1.4833922386169434, "incorrect_loss_per_token": 1.3610233465830486, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4833922386169434, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4833922386169434, "logits_per_char": -0.7416961193084717, "num_chars": 2}, {"sum_logits": -1.392106294631958, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.392106294631958, "logits_per_char": -0.696053147315979, "num_chars": 2}, {"sum_logits": -1.3390674591064453, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.3390674591064453, "logits_per_char": -0.6695337295532227, "num_chars": 2}, {"sum_logits": -1.3518962860107422, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3518962860107422, "logits_per_char": -0.6759481430053711, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 390, "native_id": "MCAS_2006_8_13", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4143128395080566, "incorrect_loss_raw": 1.3843658765157063, "correct_loss_per_char": 0.7071564197540283, "incorrect_loss_per_char": 0.6921829382578532, "correct_loss_per_token": 1.4143128395080566, "incorrect_loss_per_token": 1.3843658765157063, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4524728059768677, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4524728059768677, "logits_per_char": -0.7262364029884338, "num_chars": 2}, {"sum_logits": -1.4030911922454834, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4030911922454834, "logits_per_char": -0.7015455961227417, "num_chars": 2}, {"sum_logits": -1.4143128395080566, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4143128395080566, "logits_per_char": -0.7071564197540283, "num_chars": 2}, {"sum_logits": -1.297533631324768, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.297533631324768, "logits_per_char": -0.648766815662384, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 391, "native_id": "Mercury_7168823", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2460424900054932, "incorrect_loss_raw": 1.44353187084198, "correct_loss_per_char": 0.6230212450027466, "incorrect_loss_per_char": 0.72176593542099, "correct_loss_per_token": 1.2460424900054932, "incorrect_loss_per_token": 1.44353187084198, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4512990713119507, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.4512990713119507, "logits_per_char": -0.7256495356559753, "num_chars": 2}, {"sum_logits": -1.3918747901916504, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.3918747901916504, "logits_per_char": -0.6959373950958252, "num_chars": 2}, {"sum_logits": -1.4874217510223389, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.4874217510223389, "logits_per_char": -0.7437108755111694, "num_chars": 2}, {"sum_logits": -1.2460424900054932, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": true, "logits_per_token": -1.2460424900054932, "logits_per_char": -0.6230212450027466, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 392, "native_id": "Mercury_7158935", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4329478740692139, "incorrect_loss_raw": 1.3888837893803914, "correct_loss_per_char": 0.7164739370346069, "incorrect_loss_per_char": 0.6944418946901957, "correct_loss_per_token": 1.4329478740692139, "incorrect_loss_per_token": 1.3888837893803914, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.549721598625183, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.549721598625183, "logits_per_char": -0.7748607993125916, "num_chars": 2}, {"sum_logits": -1.4478285312652588, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.4478285312652588, "logits_per_char": -0.7239142656326294, "num_chars": 2}, {"sum_logits": -1.4329478740692139, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.4329478740692139, "logits_per_char": -0.7164739370346069, "num_chars": 2}, {"sum_logits": -1.1691012382507324, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": true, "logits_per_token": -1.1691012382507324, "logits_per_char": -0.5845506191253662, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 393, "native_id": "Mercury_7172708", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5113004446029663, "incorrect_loss_raw": 1.3545646667480469, "correct_loss_per_char": 0.7556502223014832, "incorrect_loss_per_char": 0.6772823333740234, "correct_loss_per_token": 1.5113004446029663, "incorrect_loss_per_token": 1.3545646667480469, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4072281122207642, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4072281122207642, "logits_per_char": -0.7036140561103821, "num_chars": 2}, {"sum_logits": -1.5113004446029663, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.5113004446029663, "logits_per_char": -0.7556502223014832, "num_chars": 2}, {"sum_logits": -1.3849953413009644, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3849953413009644, "logits_per_char": -0.6924976706504822, "num_chars": 2}, {"sum_logits": -1.271470546722412, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.271470546722412, "logits_per_char": -0.635735273361206, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 394, "native_id": "ACTAAP_2010_5_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.440347671508789, "incorrect_loss_raw": 1.3807136217753093, "correct_loss_per_char": 0.7201738357543945, "incorrect_loss_per_char": 0.6903568108876547, "correct_loss_per_token": 1.440347671508789, "incorrect_loss_per_token": 1.3807136217753093, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.440347671508789, "num_tokens": 1, "num_tokens_all": 415, "is_greedy": false, "logits_per_token": -1.440347671508789, "logits_per_char": -0.7201738357543945, "num_chars": 2}, {"sum_logits": -1.4605618715286255, "num_tokens": 1, "num_tokens_all": 415, "is_greedy": false, "logits_per_token": -1.4605618715286255, "logits_per_char": -0.7302809357643127, "num_chars": 2}, {"sum_logits": -1.463124394416809, "num_tokens": 1, "num_tokens_all": 415, "is_greedy": false, "logits_per_token": -1.463124394416809, "logits_per_char": -0.7315621972084045, "num_chars": 2}, {"sum_logits": -1.2184545993804932, "num_tokens": 1, "num_tokens_all": 415, "is_greedy": true, "logits_per_token": -1.2184545993804932, "logits_per_char": -0.6092272996902466, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 395, "native_id": "Mercury_7093048", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4684903621673584, "incorrect_loss_raw": 1.365834395090739, "correct_loss_per_char": 0.7342451810836792, "incorrect_loss_per_char": 0.6829171975453695, "correct_loss_per_token": 1.4684903621673584, "incorrect_loss_per_token": 1.365834395090739, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3641325235366821, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3641325235366821, "logits_per_char": -0.6820662617683411, "num_chars": 2}, {"sum_logits": -1.3521209955215454, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.3521209955215454, "logits_per_char": -0.6760604977607727, "num_chars": 2}, {"sum_logits": -1.4684903621673584, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4684903621673584, "logits_per_char": -0.7342451810836792, "num_chars": 2}, {"sum_logits": -1.3812496662139893, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3812496662139893, "logits_per_char": -0.6906248331069946, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 396, "native_id": "Mercury_7081603", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.348356008529663, "incorrect_loss_raw": 1.413034439086914, "correct_loss_per_char": 0.6741780042648315, "incorrect_loss_per_char": 0.706517219543457, "correct_loss_per_token": 1.348356008529663, "incorrect_loss_per_token": 1.413034439086914, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5159107446670532, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.5159107446670532, "logits_per_char": -0.7579553723335266, "num_chars": 2}, {"sum_logits": -1.4891163110733032, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.4891163110733032, "logits_per_char": -0.7445581555366516, "num_chars": 2}, {"sum_logits": -1.348356008529663, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.348356008529663, "logits_per_char": -0.6741780042648315, "num_chars": 2}, {"sum_logits": -1.2340762615203857, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": true, "logits_per_token": -1.2340762615203857, "logits_per_char": -0.6170381307601929, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 397, "native_id": "Mercury_SC_LBS11003", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5941733121871948, "incorrect_loss_raw": 1.3594214121500652, "correct_loss_per_char": 0.7970866560935974, "incorrect_loss_per_char": 0.6797107060750326, "correct_loss_per_token": 1.5941733121871948, "incorrect_loss_per_token": 1.3594214121500652, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0689321756362915, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.0689321756362915, "logits_per_char": -0.5344660878181458, "num_chars": 2}, {"sum_logits": -1.4602363109588623, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4602363109588623, "logits_per_char": -0.7301181554794312, "num_chars": 2}, {"sum_logits": -1.5490957498550415, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5490957498550415, "logits_per_char": -0.7745478749275208, "num_chars": 2}, {"sum_logits": -1.5941733121871948, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5941733121871948, "logits_per_char": -0.7970866560935974, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 398, "native_id": "MCAS_2005_8_2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.385261058807373, "incorrect_loss_raw": 1.3928317228953044, "correct_loss_per_char": 0.6926305294036865, "incorrect_loss_per_char": 0.6964158614476522, "correct_loss_per_token": 1.385261058807373, "incorrect_loss_per_token": 1.3928317228953044, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3701825141906738, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.3701825141906738, "logits_per_char": -0.6850912570953369, "num_chars": 2}, {"sum_logits": -1.3677281141281128, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": true, "logits_per_token": -1.3677281141281128, "logits_per_char": -0.6838640570640564, "num_chars": 2}, {"sum_logits": -1.385261058807373, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.385261058807373, "logits_per_char": -0.6926305294036865, "num_chars": 2}, {"sum_logits": -1.4405845403671265, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.4405845403671265, "logits_per_char": -0.7202922701835632, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 399, "native_id": "ACTAAP_2010_7_14", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4575836658477783, "incorrect_loss_raw": 1.3699281613032024, "correct_loss_per_char": 0.7287918329238892, "incorrect_loss_per_char": 0.6849640806516012, "correct_loss_per_token": 1.4575836658477783, "incorrect_loss_per_token": 1.3699281613032024, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.464443325996399, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.464443325996399, "logits_per_char": -0.7322216629981995, "num_chars": 2}, {"sum_logits": -1.3373281955718994, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.3373281955718994, "logits_per_char": -0.6686640977859497, "num_chars": 2}, {"sum_logits": -1.4575836658477783, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4575836658477783, "logits_per_char": -0.7287918329238892, "num_chars": 2}, {"sum_logits": -1.3080129623413086, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.3080129623413086, "logits_per_char": -0.6540064811706543, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 400, "native_id": "NYSEDREGENTS_2008_4_15", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2901012897491455, "incorrect_loss_raw": 1.43495774269104, "correct_loss_per_char": 0.6450506448745728, "incorrect_loss_per_char": 0.71747887134552, "correct_loss_per_token": 1.2901012897491455, "incorrect_loss_per_token": 1.43495774269104, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3071157932281494, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3071157932281494, "logits_per_char": -0.6535578966140747, "num_chars": 2}, {"sum_logits": -1.5627996921539307, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.5627996921539307, "logits_per_char": -0.7813998460769653, "num_chars": 2}, {"sum_logits": -1.2901012897491455, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.2901012897491455, "logits_per_char": -0.6450506448745728, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 401, "native_id": "Mercury_7107240", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3414552211761475, "incorrect_loss_raw": 1.4098703861236572, "correct_loss_per_char": 0.6707276105880737, "incorrect_loss_per_char": 0.7049351930618286, "correct_loss_per_token": 1.3414552211761475, "incorrect_loss_per_token": 1.4098703861236572, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3414552211761475, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3414552211761475, "logits_per_char": -0.6707276105880737, "num_chars": 2}, {"sum_logits": -1.3233083486557007, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.3233083486557007, "logits_per_char": -0.6616541743278503, "num_chars": 2}, {"sum_logits": -1.5000050067901611, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.5000050067901611, "logits_per_char": -0.7500025033950806, "num_chars": 2}, {"sum_logits": -1.4062978029251099, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4062978029251099, "logits_per_char": -0.7031489014625549, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 402, "native_id": "Mercury_7218628", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3921887874603271, "incorrect_loss_raw": 1.3912667830785115, "correct_loss_per_char": 0.6960943937301636, "incorrect_loss_per_char": 0.6956333915392557, "correct_loss_per_token": 1.3921887874603271, "incorrect_loss_per_token": 1.3912667830785115, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4242472648620605, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.4242472648620605, "logits_per_char": -0.7121236324310303, "num_chars": 2}, {"sum_logits": -1.4295079708099365, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.4295079708099365, "logits_per_char": -0.7147539854049683, "num_chars": 2}, {"sum_logits": -1.3921887874603271, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.3921887874603271, "logits_per_char": -0.6960943937301636, "num_chars": 2}, {"sum_logits": -1.3200451135635376, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": true, "logits_per_token": -1.3200451135635376, "logits_per_char": -0.6600225567817688, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 403, "native_id": "MSA_2013_5_23", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1811645030975342, "incorrect_loss_raw": 1.4732156197230022, "correct_loss_per_char": 0.5905822515487671, "incorrect_loss_per_char": 0.7366078098615011, "correct_loss_per_token": 1.1811645030975342, "incorrect_loss_per_token": 1.4732156197230022, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.56989324092865, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.56989324092865, "logits_per_char": -0.784946620464325, "num_chars": 2}, {"sum_logits": -1.413304328918457, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.413304328918457, "logits_per_char": -0.7066521644592285, "num_chars": 2}, {"sum_logits": -1.4364492893218994, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4364492893218994, "logits_per_char": -0.7182246446609497, "num_chars": 2}, {"sum_logits": -1.1811645030975342, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.1811645030975342, "logits_per_char": -0.5905822515487671, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 404, "native_id": "Mercury_7081725", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4451804161071777, "incorrect_loss_raw": 1.3793108463287354, "correct_loss_per_char": 0.7225902080535889, "incorrect_loss_per_char": 0.6896554231643677, "correct_loss_per_token": 1.4451804161071777, "incorrect_loss_per_token": 1.3793108463287354, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4451804161071777, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4451804161071777, "logits_per_char": -0.7225902080535889, "num_chars": 2}, {"sum_logits": -1.3848873376846313, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3848873376846313, "logits_per_char": -0.6924436688423157, "num_chars": 2}, {"sum_logits": -1.5134332180023193, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.5134332180023193, "logits_per_char": -0.7567166090011597, "num_chars": 2}, {"sum_logits": -1.2396119832992554, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.2396119832992554, "logits_per_char": -0.6198059916496277, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 405, "native_id": "Mercury_SC_413542", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5719528198242188, "incorrect_loss_raw": 1.3471696774164836, "correct_loss_per_char": 0.7859764099121094, "incorrect_loss_per_char": 0.6735848387082418, "correct_loss_per_token": 1.5719528198242188, "incorrect_loss_per_token": 1.3471696774164836, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5719528198242188, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.5719528198242188, "logits_per_char": -0.7859764099121094, "num_chars": 2}, {"sum_logits": -1.4591333866119385, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4591333866119385, "logits_per_char": -0.7295666933059692, "num_chars": 2}, {"sum_logits": -1.4421420097351074, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4421420097351074, "logits_per_char": -0.7210710048675537, "num_chars": 2}, {"sum_logits": -1.1402336359024048, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.1402336359024048, "logits_per_char": -0.5701168179512024, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 406, "native_id": "Mercury_SC_407302", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4084006547927856, "incorrect_loss_raw": 1.3848211765289307, "correct_loss_per_char": 0.7042003273963928, "incorrect_loss_per_char": 0.6924105882644653, "correct_loss_per_token": 1.4084006547927856, "incorrect_loss_per_token": 1.3848211765289307, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4118133783340454, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4118133783340454, "logits_per_char": -0.7059066891670227, "num_chars": 2}, {"sum_logits": -1.3944512605667114, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3944512605667114, "logits_per_char": -0.6972256302833557, "num_chars": 2}, {"sum_logits": -1.4084006547927856, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4084006547927856, "logits_per_char": -0.7042003273963928, "num_chars": 2}, {"sum_logits": -1.3481988906860352, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.3481988906860352, "logits_per_char": -0.6740994453430176, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 407, "native_id": "Mercury_175053", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2947889566421509, "incorrect_loss_raw": 1.4240199724833171, "correct_loss_per_char": 0.6473944783210754, "incorrect_loss_per_char": 0.7120099862416586, "correct_loss_per_token": 1.2947889566421509, "incorrect_loss_per_token": 1.4240199724833171, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4663829803466797, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4663829803466797, "logits_per_char": -0.7331914901733398, "num_chars": 2}, {"sum_logits": -1.38503098487854, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.38503098487854, "logits_per_char": -0.69251549243927, "num_chars": 2}, {"sum_logits": -1.4206459522247314, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4206459522247314, "logits_per_char": -0.7103229761123657, "num_chars": 2}, {"sum_logits": -1.2947889566421509, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.2947889566421509, "logits_per_char": -0.6473944783210754, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 408, "native_id": "Mercury_7161315", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4122190475463867, "incorrect_loss_raw": 1.3924153248469036, "correct_loss_per_char": 0.7061095237731934, "incorrect_loss_per_char": 0.6962076624234518, "correct_loss_per_token": 1.4122190475463867, "incorrect_loss_per_token": 1.3924153248469036, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4847545623779297, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.4847545623779297, "logits_per_char": -0.7423772811889648, "num_chars": 2}, {"sum_logits": -1.4122190475463867, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.4122190475463867, "logits_per_char": -0.7061095237731934, "num_chars": 2}, {"sum_logits": -1.4964829683303833, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.4964829683303833, "logits_per_char": -0.7482414841651917, "num_chars": 2}, {"sum_logits": -1.1960084438323975, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": true, "logits_per_token": -1.1960084438323975, "logits_per_char": -0.5980042219161987, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 409, "native_id": "Mercury_189070", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4980517625808716, "incorrect_loss_raw": 1.3615655899047852, "correct_loss_per_char": 0.7490258812904358, "incorrect_loss_per_char": 0.6807827949523926, "correct_loss_per_token": 1.4980517625808716, "incorrect_loss_per_token": 1.3615655899047852, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4980517625808716, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4980517625808716, "logits_per_char": -0.7490258812904358, "num_chars": 2}, {"sum_logits": -1.493269443511963, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.493269443511963, "logits_per_char": -0.7466347217559814, "num_chars": 2}, {"sum_logits": -1.3007962703704834, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3007962703704834, "logits_per_char": -0.6503981351852417, "num_chars": 2}, {"sum_logits": -1.2906310558319092, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.2906310558319092, "logits_per_char": -0.6453155279159546, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 410, "native_id": "Mercury_7189123", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.464585781097412, "incorrect_loss_raw": 1.3769070704778035, "correct_loss_per_char": 0.732292890548706, "incorrect_loss_per_char": 0.6884535352389017, "correct_loss_per_token": 1.464585781097412, "incorrect_loss_per_token": 1.3769070704778035, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5532642602920532, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.5532642602920532, "logits_per_char": -0.7766321301460266, "num_chars": 2}, {"sum_logits": -1.464585781097412, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.464585781097412, "logits_per_char": -0.732292890548706, "num_chars": 2}, {"sum_logits": -1.3799082040786743, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.3799082040786743, "logits_per_char": -0.6899541020393372, "num_chars": 2}, {"sum_logits": -1.197548747062683, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": true, "logits_per_token": -1.197548747062683, "logits_per_char": -0.5987743735313416, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 411, "native_id": "Mercury_SC_402171", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3933182954788208, "incorrect_loss_raw": 1.3887183268864949, "correct_loss_per_char": 0.6966591477394104, "incorrect_loss_per_char": 0.6943591634432474, "correct_loss_per_token": 1.3933182954788208, "incorrect_loss_per_token": 1.3887183268864949, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.383438229560852, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.383438229560852, "logits_per_char": -0.691719114780426, "num_chars": 2}, {"sum_logits": -1.374526858329773, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.374526858329773, "logits_per_char": -0.6872634291648865, "num_chars": 2}, {"sum_logits": -1.4081898927688599, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4081898927688599, "logits_per_char": -0.7040949463844299, "num_chars": 2}, {"sum_logits": -1.3933182954788208, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3933182954788208, "logits_per_char": -0.6966591477394104, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 412, "native_id": "Mercury_7217368", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3180466890335083, "incorrect_loss_raw": 1.423090934753418, "correct_loss_per_char": 0.6590233445167542, "incorrect_loss_per_char": 0.711545467376709, "correct_loss_per_token": 1.3180466890335083, "incorrect_loss_per_token": 1.423090934753418, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5754462480545044, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5754462480545044, "logits_per_char": -0.7877231240272522, "num_chars": 2}, {"sum_logits": -1.3180466890335083, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3180466890335083, "logits_per_char": -0.6590233445167542, "num_chars": 2}, {"sum_logits": -1.2893470525741577, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.2893470525741577, "logits_per_char": -0.6446735262870789, "num_chars": 2}, {"sum_logits": -1.4044795036315918, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4044795036315918, "logits_per_char": -0.7022397518157959, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 413, "native_id": "Mercury_LBS10933", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5277258157730103, "incorrect_loss_raw": 1.3508350451787312, "correct_loss_per_char": 0.7638629078865051, "incorrect_loss_per_char": 0.6754175225893656, "correct_loss_per_token": 1.5277258157730103, "incorrect_loss_per_token": 1.3508350451787312, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5277258157730103, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.5277258157730103, "logits_per_char": -0.7638629078865051, "num_chars": 2}, {"sum_logits": -1.369608998298645, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.369608998298645, "logits_per_char": -0.6848044991493225, "num_chars": 2}, {"sum_logits": -1.4127644300460815, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4127644300460815, "logits_per_char": -0.7063822150230408, "num_chars": 2}, {"sum_logits": -1.2701317071914673, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.2701317071914673, "logits_per_char": -0.6350658535957336, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 414, "native_id": "Mercury_7223160", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4144867658615112, "incorrect_loss_raw": 1.3849518696467082, "correct_loss_per_char": 0.7072433829307556, "incorrect_loss_per_char": 0.6924759348233541, "correct_loss_per_token": 1.4144867658615112, "incorrect_loss_per_token": 1.3849518696467082, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4191420078277588, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4191420078277588, "logits_per_char": -0.7095710039138794, "num_chars": 2}, {"sum_logits": -1.431329607963562, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.431329607963562, "logits_per_char": -0.715664803981781, "num_chars": 2}, {"sum_logits": -1.4144867658615112, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4144867658615112, "logits_per_char": -0.7072433829307556, "num_chars": 2}, {"sum_logits": -1.3043839931488037, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.3043839931488037, "logits_per_char": -0.6521919965744019, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 415, "native_id": "Mercury_SC_401324", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3013132810592651, "incorrect_loss_raw": 1.4242870410283406, "correct_loss_per_char": 0.6506566405296326, "incorrect_loss_per_char": 0.7121435205141703, "correct_loss_per_token": 1.3013132810592651, "incorrect_loss_per_token": 1.4242870410283406, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4196281433105469, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4196281433105469, "logits_per_char": -0.7098140716552734, "num_chars": 2}, {"sum_logits": -1.3686771392822266, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.3686771392822266, "logits_per_char": -0.6843385696411133, "num_chars": 2}, {"sum_logits": -1.4845558404922485, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4845558404922485, "logits_per_char": -0.7422779202461243, "num_chars": 2}, {"sum_logits": -1.3013132810592651, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.3013132810592651, "logits_per_char": -0.6506566405296326, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 416, "native_id": "LEAP_2001_8_10379", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4704513549804688, "incorrect_loss_raw": 1.3676602443059285, "correct_loss_per_char": 0.7352256774902344, "incorrect_loss_per_char": 0.6838301221529642, "correct_loss_per_token": 1.4704513549804688, "incorrect_loss_per_token": 1.3676602443059285, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4704513549804688, "num_tokens": 1, "num_tokens_all": 466, "is_greedy": false, "logits_per_token": -1.4704513549804688, "logits_per_char": -0.7352256774902344, "num_chars": 2}, {"sum_logits": -1.3869072198867798, "num_tokens": 1, "num_tokens_all": 466, "is_greedy": false, "logits_per_token": -1.3869072198867798, "logits_per_char": -0.6934536099433899, "num_chars": 2}, {"sum_logits": -1.4577083587646484, "num_tokens": 1, "num_tokens_all": 466, "is_greedy": false, "logits_per_token": -1.4577083587646484, "logits_per_char": -0.7288541793823242, "num_chars": 2}, {"sum_logits": -1.2583651542663574, "num_tokens": 1, "num_tokens_all": 466, "is_greedy": true, "logits_per_token": -1.2583651542663574, "logits_per_char": -0.6291825771331787, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 417, "native_id": "VASoL_2009_5_30", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3242204189300537, "incorrect_loss_raw": 1.4155562321345012, "correct_loss_per_char": 0.6621102094650269, "incorrect_loss_per_char": 0.7077781160672506, "correct_loss_per_token": 1.3242204189300537, "incorrect_loss_per_token": 1.4155562321345012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3947902917861938, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3947902917861938, "logits_per_char": -0.6973951458930969, "num_chars": 2}, {"sum_logits": -1.451628565788269, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.451628565788269, "logits_per_char": -0.7258142828941345, "num_chars": 2}, {"sum_logits": -1.4002498388290405, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4002498388290405, "logits_per_char": -0.7001249194145203, "num_chars": 2}, {"sum_logits": -1.3242204189300537, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.3242204189300537, "logits_per_char": -0.6621102094650269, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 418, "native_id": "Mercury_416404", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2577296495437622, "incorrect_loss_raw": 1.4408705234527588, "correct_loss_per_char": 0.6288648247718811, "incorrect_loss_per_char": 0.7204352617263794, "correct_loss_per_token": 1.2577296495437622, "incorrect_loss_per_token": 1.4408705234527588, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4677050113677979, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4677050113677979, "logits_per_char": -0.7338525056838989, "num_chars": 2}, {"sum_logits": -1.5006381273269653, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.5006381273269653, "logits_per_char": -0.7503190636634827, "num_chars": 2}, {"sum_logits": -1.3542684316635132, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3542684316635132, "logits_per_char": -0.6771342158317566, "num_chars": 2}, {"sum_logits": -1.2577296495437622, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.2577296495437622, "logits_per_char": -0.6288648247718811, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 419, "native_id": "Mercury_7103530", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3600999116897583, "incorrect_loss_raw": 1.4049859046936035, "correct_loss_per_char": 0.6800499558448792, "incorrect_loss_per_char": 0.7024929523468018, "correct_loss_per_token": 1.3600999116897583, "incorrect_loss_per_token": 1.4049859046936035, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4406079053878784, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4406079053878784, "logits_per_char": -0.7203039526939392, "num_chars": 2}, {"sum_logits": -1.3600999116897583, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3600999116897583, "logits_per_char": -0.6800499558448792, "num_chars": 2}, {"sum_logits": -1.493023157119751, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.493023157119751, "logits_per_char": -0.7465115785598755, "num_chars": 2}, {"sum_logits": -1.2813266515731812, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.2813266515731812, "logits_per_char": -0.6406633257865906, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 420, "native_id": "Mercury_7030870", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5281192064285278, "incorrect_loss_raw": 1.3563952048619587, "correct_loss_per_char": 0.7640596032142639, "incorrect_loss_per_char": 0.6781976024309794, "correct_loss_per_token": 1.5281192064285278, "incorrect_loss_per_token": 1.3563952048619587, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5281192064285278, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.5281192064285278, "logits_per_char": -0.7640596032142639, "num_chars": 2}, {"sum_logits": -1.3970569372177124, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3970569372177124, "logits_per_char": -0.6985284686088562, "num_chars": 2}, {"sum_logits": -1.4705506563186646, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4705506563186646, "logits_per_char": -0.7352753281593323, "num_chars": 2}, {"sum_logits": -1.2015780210494995, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.2015780210494995, "logits_per_char": -0.6007890105247498, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 421, "native_id": "LEAP__7_10348", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.469111442565918, "incorrect_loss_raw": 1.3715004126230876, "correct_loss_per_char": 0.734555721282959, "incorrect_loss_per_char": 0.6857502063115438, "correct_loss_per_token": 1.469111442565918, "incorrect_loss_per_token": 1.3715004126230876, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5121201276779175, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.5121201276779175, "logits_per_char": -0.7560600638389587, "num_chars": 2}, {"sum_logits": -1.3708198070526123, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.3708198070526123, "logits_per_char": -0.6854099035263062, "num_chars": 2}, {"sum_logits": -1.469111442565918, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.469111442565918, "logits_per_char": -0.734555721282959, "num_chars": 2}, {"sum_logits": -1.231561303138733, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": true, "logits_per_token": -1.231561303138733, "logits_per_char": -0.6157806515693665, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 422, "native_id": "Mercury_SC_406835", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4153409004211426, "incorrect_loss_raw": 1.3885878721872966, "correct_loss_per_char": 0.7076704502105713, "incorrect_loss_per_char": 0.6942939360936483, "correct_loss_per_token": 1.4153409004211426, "incorrect_loss_per_token": 1.3885878721872966, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4401662349700928, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.4401662349700928, "logits_per_char": -0.7200831174850464, "num_chars": 2}, {"sum_logits": -1.4153409004211426, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.4153409004211426, "logits_per_char": -0.7076704502105713, "num_chars": 2}, {"sum_logits": -1.4942352771759033, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.4942352771759033, "logits_per_char": -0.7471176385879517, "num_chars": 2}, {"sum_logits": -1.2313621044158936, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -1.2313621044158936, "logits_per_char": -0.6156810522079468, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 423, "native_id": "Mercury_178255", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3972196578979492, "incorrect_loss_raw": 1.3928224245707195, "correct_loss_per_char": 0.6986098289489746, "incorrect_loss_per_char": 0.6964112122853597, "correct_loss_per_token": 1.3972196578979492, "incorrect_loss_per_token": 1.3928224245707195, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4976962804794312, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.4976962804794312, "logits_per_char": -0.7488481402397156, "num_chars": 2}, {"sum_logits": -1.3972196578979492, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.3972196578979492, "logits_per_char": -0.6986098289489746, "num_chars": 2}, {"sum_logits": -1.413306474685669, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.413306474685669, "logits_per_char": -0.7066532373428345, "num_chars": 2}, {"sum_logits": -1.267464518547058, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": true, "logits_per_token": -1.267464518547058, "logits_per_char": -0.633732259273529, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 424, "native_id": "MDSA_2012_8_16", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4475409984588623, "incorrect_loss_raw": 1.3742829163869221, "correct_loss_per_char": 0.7237704992294312, "incorrect_loss_per_char": 0.6871414581934611, "correct_loss_per_token": 1.4475409984588623, "incorrect_loss_per_token": 1.3742829163869221, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4666870832443237, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4666870832443237, "logits_per_char": -0.7333435416221619, "num_chars": 2}, {"sum_logits": -1.4475409984588623, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4475409984588623, "logits_per_char": -0.7237704992294312, "num_chars": 2}, {"sum_logits": -1.3417295217514038, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.3417295217514038, "logits_per_char": -0.6708647608757019, "num_chars": 2}, {"sum_logits": -1.314432144165039, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.314432144165039, "logits_per_char": -0.6572160720825195, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 425, "native_id": "Mercury_409645", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2216869592666626, "incorrect_loss_raw": 1.456858237584432, "correct_loss_per_char": 0.6108434796333313, "incorrect_loss_per_char": 0.728429118792216, "correct_loss_per_token": 1.2216869592666626, "incorrect_loss_per_token": 1.456858237584432, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5515215396881104, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.5515215396881104, "logits_per_char": -0.7757607698440552, "num_chars": 2}, {"sum_logits": -1.4517831802368164, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4517831802368164, "logits_per_char": -0.7258915901184082, "num_chars": 2}, {"sum_logits": -1.3672699928283691, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.3672699928283691, "logits_per_char": -0.6836349964141846, "num_chars": 2}, {"sum_logits": -1.2216869592666626, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.2216869592666626, "logits_per_char": -0.6108434796333313, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 426, "native_id": "TIMSS_2003_8_pg47", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4753296375274658, "incorrect_loss_raw": 1.3653107484181721, "correct_loss_per_char": 0.7376648187637329, "incorrect_loss_per_char": 0.6826553742090861, "correct_loss_per_token": 1.4753296375274658, "incorrect_loss_per_token": 1.3653107484181721, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3204594850540161, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.3204594850540161, "logits_per_char": -0.6602297425270081, "num_chars": 2}, {"sum_logits": -1.4092165231704712, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4092165231704712, "logits_per_char": -0.7046082615852356, "num_chars": 2}, {"sum_logits": -1.4753296375274658, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4753296375274658, "logits_per_char": -0.7376648187637329, "num_chars": 2}, {"sum_logits": -1.3662562370300293, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3662562370300293, "logits_per_char": -0.6831281185150146, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 427, "native_id": "NYSEDREGENTS_2010_8_16", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3644689321517944, "incorrect_loss_raw": 1.4031737248102825, "correct_loss_per_char": 0.6822344660758972, "incorrect_loss_per_char": 0.7015868624051412, "correct_loss_per_token": 1.3644689321517944, "incorrect_loss_per_token": 1.4031737248102825, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3141188621520996, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3141188621520996, "logits_per_char": -0.6570594310760498, "num_chars": 2}, {"sum_logits": -1.3644689321517944, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3644689321517944, "logits_per_char": -0.6822344660758972, "num_chars": 2}, {"sum_logits": -1.5177865028381348, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.5177865028381348, "logits_per_char": -0.7588932514190674, "num_chars": 2}, {"sum_logits": -1.3776158094406128, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3776158094406128, "logits_per_char": -0.6888079047203064, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 428, "native_id": "Mercury_7159810", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3786256313323975, "incorrect_loss_raw": 1.404161771138509, "correct_loss_per_char": 0.6893128156661987, "incorrect_loss_per_char": 0.7020808855692545, "correct_loss_per_token": 1.3786256313323975, "incorrect_loss_per_token": 1.404161771138509, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5519264936447144, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.5519264936447144, "logits_per_char": -0.7759632468223572, "num_chars": 2}, {"sum_logits": -1.3786256313323975, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.3786256313323975, "logits_per_char": -0.6893128156661987, "num_chars": 2}, {"sum_logits": -1.4528106451034546, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.4528106451034546, "logits_per_char": -0.7264053225517273, "num_chars": 2}, {"sum_logits": -1.2077481746673584, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": true, "logits_per_token": -1.2077481746673584, "logits_per_char": -0.6038740873336792, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 429, "native_id": "Mercury_7267523", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3956149816513062, "incorrect_loss_raw": 1.390569527943929, "correct_loss_per_char": 0.6978074908256531, "incorrect_loss_per_char": 0.6952847639719645, "correct_loss_per_token": 1.3956149816513062, "incorrect_loss_per_token": 1.390569527943929, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3344838619232178, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.3344838619232178, "logits_per_char": -0.6672419309616089, "num_chars": 2}, {"sum_logits": -1.3956149816513062, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3956149816513062, "logits_per_char": -0.6978074908256531, "num_chars": 2}, {"sum_logits": -1.4257153272628784, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4257153272628784, "logits_per_char": -0.7128576636314392, "num_chars": 2}, {"sum_logits": -1.411509394645691, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.411509394645691, "logits_per_char": -0.7057546973228455, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 430, "native_id": "Mercury_SC_401006", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3269872665405273, "incorrect_loss_raw": 1.413690447807312, "correct_loss_per_char": 0.6634936332702637, "incorrect_loss_per_char": 0.706845223903656, "correct_loss_per_token": 1.3269872665405273, "incorrect_loss_per_token": 1.413690447807312, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3269872665405273, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.3269872665405273, "logits_per_char": -0.6634936332702637, "num_chars": 2}, {"sum_logits": -1.3675073385238647, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3675073385238647, "logits_per_char": -0.6837536692619324, "num_chars": 2}, {"sum_logits": -1.4034411907196045, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4034411907196045, "logits_per_char": -0.7017205953598022, "num_chars": 2}, {"sum_logits": -1.4701228141784668, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4701228141784668, "logits_per_char": -0.7350614070892334, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 431, "native_id": "ACTAAP_2010_7_12", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4276899099349976, "incorrect_loss_raw": 1.3797437349955242, "correct_loss_per_char": 0.7138449549674988, "incorrect_loss_per_char": 0.6898718674977621, "correct_loss_per_token": 1.4276899099349976, "incorrect_loss_per_token": 1.3797437349955242, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3234678506851196, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": true, "logits_per_token": -1.3234678506851196, "logits_per_char": -0.6617339253425598, "num_chars": 2}, {"sum_logits": -1.3811049461364746, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.3811049461364746, "logits_per_char": -0.6905524730682373, "num_chars": 2}, {"sum_logits": -1.434658408164978, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.434658408164978, "logits_per_char": -0.717329204082489, "num_chars": 2}, {"sum_logits": -1.4276899099349976, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.4276899099349976, "logits_per_char": -0.7138449549674988, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 432, "native_id": "MEAP_2005_8_13", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5174626111984253, "incorrect_loss_raw": 1.3580973148345947, "correct_loss_per_char": 0.7587313055992126, "incorrect_loss_per_char": 0.6790486574172974, "correct_loss_per_token": 1.5174626111984253, "incorrect_loss_per_token": 1.3580973148345947, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2123684883117676, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.2123684883117676, "logits_per_char": -0.6061842441558838, "num_chars": 2}, {"sum_logits": -1.3656331300735474, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3656331300735474, "logits_per_char": -0.6828165650367737, "num_chars": 2}, {"sum_logits": -1.4962903261184692, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4962903261184692, "logits_per_char": -0.7481451630592346, "num_chars": 2}, {"sum_logits": -1.5174626111984253, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.5174626111984253, "logits_per_char": -0.7587313055992126, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 433, "native_id": "Mercury_7164623", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4541339874267578, "incorrect_loss_raw": 1.372193415959676, "correct_loss_per_char": 0.7270669937133789, "incorrect_loss_per_char": 0.686096707979838, "correct_loss_per_token": 1.4541339874267578, "incorrect_loss_per_token": 1.372193415959676, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4543907642364502, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4543907642364502, "logits_per_char": -0.7271953821182251, "num_chars": 2}, {"sum_logits": -1.392985224723816, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.392985224723816, "logits_per_char": -0.696492612361908, "num_chars": 2}, {"sum_logits": -1.4541339874267578, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4541339874267578, "logits_per_char": -0.7270669937133789, "num_chars": 2}, {"sum_logits": -1.2692042589187622, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2692042589187622, "logits_per_char": -0.6346021294593811, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 434, "native_id": "Mercury_417127", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4659812450408936, "incorrect_loss_raw": 1.3862788677215576, "correct_loss_per_char": 0.7329906225204468, "incorrect_loss_per_char": 0.6931394338607788, "correct_loss_per_token": 1.4659812450408936, "incorrect_loss_per_token": 1.3862788677215576, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4659812450408936, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.4659812450408936, "logits_per_char": -0.7329906225204468, "num_chars": 2}, {"sum_logits": -1.4850614070892334, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.4850614070892334, "logits_per_char": -0.7425307035446167, "num_chars": 2}, {"sum_logits": -1.563897728919983, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": false, "logits_per_token": -1.563897728919983, "logits_per_char": -0.7819488644599915, "num_chars": 2}, {"sum_logits": -1.1098774671554565, "num_tokens": 1, "num_tokens_all": 437, "is_greedy": true, "logits_per_token": -1.1098774671554565, "logits_per_char": -0.5549387335777283, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 435, "native_id": "Mercury_411224", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4744393825531006, "incorrect_loss_raw": 1.3726184368133545, "correct_loss_per_char": 0.7372196912765503, "incorrect_loss_per_char": 0.6863092184066772, "correct_loss_per_token": 1.4744393825531006, "incorrect_loss_per_token": 1.3726184368133545, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4607775211334229, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.4607775211334229, "logits_per_char": -0.7303887605667114, "num_chars": 2}, {"sum_logits": -1.4744393825531006, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.4744393825531006, "logits_per_char": -0.7372196912765503, "num_chars": 2}, {"sum_logits": -1.4783108234405518, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": false, "logits_per_token": -1.4783108234405518, "logits_per_char": -0.7391554117202759, "num_chars": 2}, {"sum_logits": -1.1787669658660889, "num_tokens": 1, "num_tokens_all": 428, "is_greedy": true, "logits_per_token": -1.1787669658660889, "logits_per_char": -0.5893834829330444, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 436, "native_id": "TIMSS_2011_8_pg15", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3000949621200562, "incorrect_loss_raw": 1.423232913017273, "correct_loss_per_char": 0.6500474810600281, "incorrect_loss_per_char": 0.7116164565086365, "correct_loss_per_token": 1.3000949621200562, "incorrect_loss_per_token": 1.423232913017273, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3683356046676636, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": false, "logits_per_token": -1.3683356046676636, "logits_per_char": -0.6841678023338318, "num_chars": 2}, {"sum_logits": -1.4431201219558716, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": false, "logits_per_token": -1.4431201219558716, "logits_per_char": -0.7215600609779358, "num_chars": 2}, {"sum_logits": -1.4582430124282837, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": false, "logits_per_token": -1.4582430124282837, "logits_per_char": -0.7291215062141418, "num_chars": 2}, {"sum_logits": -1.3000949621200562, "num_tokens": 1, "num_tokens_all": 442, "is_greedy": true, "logits_per_token": -1.3000949621200562, "logits_per_char": -0.6500474810600281, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 437, "native_id": "NYSEDREGENTS_2012_8_19", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5019505023956299, "incorrect_loss_raw": 1.3552606503168743, "correct_loss_per_char": 0.7509752511978149, "incorrect_loss_per_char": 0.6776303251584371, "correct_loss_per_token": 1.5019505023956299, "incorrect_loss_per_token": 1.3552606503168743, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3521764278411865, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3521764278411865, "logits_per_char": -0.6760882139205933, "num_chars": 2}, {"sum_logits": -1.3457227945327759, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.3457227945327759, "logits_per_char": -0.6728613972663879, "num_chars": 2}, {"sum_logits": -1.5019505023956299, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5019505023956299, "logits_per_char": -0.7509752511978149, "num_chars": 2}, {"sum_logits": -1.3678827285766602, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3678827285766602, "logits_per_char": -0.6839413642883301, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 438, "native_id": "Mercury_7222460", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3473206758499146, "incorrect_loss_raw": 1.4155572255452473, "correct_loss_per_char": 0.6736603379249573, "incorrect_loss_per_char": 0.7077786127726237, "correct_loss_per_token": 1.3473206758499146, "incorrect_loss_per_token": 1.4155572255452473, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3473206758499146, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.3473206758499146, "logits_per_char": -0.6736603379249573, "num_chars": 2}, {"sum_logits": -1.4026095867156982, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.4026095867156982, "logits_per_char": -0.7013047933578491, "num_chars": 2}, {"sum_logits": -1.6005828380584717, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.6005828380584717, "logits_per_char": -0.8002914190292358, "num_chars": 2}, {"sum_logits": -1.2434792518615723, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": true, "logits_per_token": -1.2434792518615723, "logits_per_char": -0.6217396259307861, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 439, "native_id": "Mercury_7007420", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.477782964706421, "incorrect_loss_raw": 1.370715896288554, "correct_loss_per_char": 0.7388914823532104, "incorrect_loss_per_char": 0.685357948144277, "correct_loss_per_token": 1.477782964706421, "incorrect_loss_per_token": 1.370715896288554, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4257272481918335, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4257272481918335, "logits_per_char": -0.7128636240959167, "num_chars": 2}, {"sum_logits": -1.486343264579773, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.486343264579773, "logits_per_char": -0.7431716322898865, "num_chars": 2}, {"sum_logits": -1.477782964706421, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.477782964706421, "logits_per_char": -0.7388914823532104, "num_chars": 2}, {"sum_logits": -1.2000771760940552, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.2000771760940552, "logits_per_char": -0.6000385880470276, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 440, "native_id": "Mercury_SC_405710", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.390364408493042, "incorrect_loss_raw": 1.3897190888722737, "correct_loss_per_char": 0.695182204246521, "incorrect_loss_per_char": 0.6948595444361368, "correct_loss_per_token": 1.390364408493042, "incorrect_loss_per_token": 1.3897190888722737, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3557994365692139, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.3557994365692139, "logits_per_char": -0.6778997182846069, "num_chars": 2}, {"sum_logits": -1.3811557292938232, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3811557292938232, "logits_per_char": -0.6905778646469116, "num_chars": 2}, {"sum_logits": -1.4322021007537842, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4322021007537842, "logits_per_char": -0.7161010503768921, "num_chars": 2}, {"sum_logits": -1.390364408493042, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.390364408493042, "logits_per_char": -0.695182204246521, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 441, "native_id": "Mercury_SC_401375", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4105769395828247, "incorrect_loss_raw": 1.3870285749435425, "correct_loss_per_char": 0.7052884697914124, "incorrect_loss_per_char": 0.6935142874717712, "correct_loss_per_token": 1.4105769395828247, "incorrect_loss_per_token": 1.3870285749435425, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4006386995315552, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4006386995315552, "logits_per_char": -0.7003193497657776, "num_chars": 2}, {"sum_logits": -1.2980440855026245, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.2980440855026245, "logits_per_char": -0.6490220427513123, "num_chars": 2}, {"sum_logits": -1.4105769395828247, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4105769395828247, "logits_per_char": -0.7052884697914124, "num_chars": 2}, {"sum_logits": -1.4624029397964478, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4624029397964478, "logits_per_char": -0.7312014698982239, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 442, "native_id": "VASoL_2010_3_22", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4351813793182373, "incorrect_loss_raw": 1.379175106684367, "correct_loss_per_char": 0.7175906896591187, "incorrect_loss_per_char": 0.6895875533421835, "correct_loss_per_token": 1.4351813793182373, "incorrect_loss_per_token": 1.379175106684367, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4351813793182373, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4351813793182373, "logits_per_char": -0.7175906896591187, "num_chars": 2}, {"sum_logits": -1.2625101804733276, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.2625101804733276, "logits_per_char": -0.6312550902366638, "num_chars": 2}, {"sum_logits": -1.4718455076217651, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4718455076217651, "logits_per_char": -0.7359227538108826, "num_chars": 2}, {"sum_logits": -1.4031696319580078, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4031696319580078, "logits_per_char": -0.7015848159790039, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 443, "native_id": "Mercury_SC_408358", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3522995710372925, "incorrect_loss_raw": 1.4066449403762817, "correct_loss_per_char": 0.6761497855186462, "incorrect_loss_per_char": 0.7033224701881409, "correct_loss_per_token": 1.3522995710372925, "incorrect_loss_per_token": 1.4066449403762817, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3522995710372925, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3522995710372925, "logits_per_char": -0.6761497855186462, "num_chars": 2}, {"sum_logits": -1.2895689010620117, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.2895689010620117, "logits_per_char": -0.6447844505310059, "num_chars": 2}, {"sum_logits": -1.4699909687042236, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4699909687042236, "logits_per_char": -0.7349954843521118, "num_chars": 2}, {"sum_logits": -1.4603749513626099, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4603749513626099, "logits_per_char": -0.7301874756813049, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 444, "native_id": "NYSEDREGENTS_2013_8_42", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3465145826339722, "incorrect_loss_raw": 1.4043443997701008, "correct_loss_per_char": 0.6732572913169861, "incorrect_loss_per_char": 0.7021721998850504, "correct_loss_per_token": 1.3465145826339722, "incorrect_loss_per_token": 1.4043443997701008, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3934000730514526, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3934000730514526, "logits_per_char": -0.6967000365257263, "num_chars": 2}, {"sum_logits": -1.3465145826339722, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.3465145826339722, "logits_per_char": -0.6732572913169861, "num_chars": 2}, {"sum_logits": -1.4045283794403076, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4045283794403076, "logits_per_char": -0.7022641897201538, "num_chars": 2}, {"sum_logits": -1.4151047468185425, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4151047468185425, "logits_per_char": -0.7075523734092712, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 445, "native_id": "Mercury_SC_400661", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2713428735733032, "incorrect_loss_raw": 1.4342324336369832, "correct_loss_per_char": 0.6356714367866516, "incorrect_loss_per_char": 0.7171162168184916, "correct_loss_per_token": 1.2713428735733032, "incorrect_loss_per_token": 1.4342324336369832, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4715477228164673, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4715477228164673, "logits_per_char": -0.7357738614082336, "num_chars": 2}, {"sum_logits": -1.3963143825531006, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3963143825531006, "logits_per_char": -0.6981571912765503, "num_chars": 2}, {"sum_logits": -1.4348351955413818, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4348351955413818, "logits_per_char": -0.7174175977706909, "num_chars": 2}, {"sum_logits": -1.2713428735733032, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2713428735733032, "logits_per_char": -0.6356714367866516, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 446, "native_id": "Mercury_SC_415422", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4125837087631226, "incorrect_loss_raw": 1.3836261828740437, "correct_loss_per_char": 0.7062918543815613, "incorrect_loss_per_char": 0.6918130914370219, "correct_loss_per_token": 1.4125837087631226, "incorrect_loss_per_token": 1.3836261828740437, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3234597444534302, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.3234597444534302, "logits_per_char": -0.6617298722267151, "num_chars": 2}, {"sum_logits": -1.4336766004562378, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4336766004562378, "logits_per_char": -0.7168383002281189, "num_chars": 2}, {"sum_logits": -1.4125837087631226, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4125837087631226, "logits_per_char": -0.7062918543815613, "num_chars": 2}, {"sum_logits": -1.3937422037124634, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3937422037124634, "logits_per_char": -0.6968711018562317, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 447, "native_id": "Mercury_SC_400162", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.351295828819275, "incorrect_loss_raw": 1.4067654609680176, "correct_loss_per_char": 0.6756479144096375, "incorrect_loss_per_char": 0.7033827304840088, "correct_loss_per_token": 1.351295828819275, "incorrect_loss_per_token": 1.4067654609680176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3280202150344849, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.3280202150344849, "logits_per_char": -0.6640101075172424, "num_chars": 2}, {"sum_logits": -1.4179494380950928, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4179494380950928, "logits_per_char": -0.7089747190475464, "num_chars": 2}, {"sum_logits": -1.474326729774475, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.474326729774475, "logits_per_char": -0.7371633648872375, "num_chars": 2}, {"sum_logits": -1.351295828819275, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.351295828819275, "logits_per_char": -0.6756479144096375, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 448, "native_id": "Mercury_7212328", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4112904071807861, "incorrect_loss_raw": 1.3961990276972454, "correct_loss_per_char": 0.7056452035903931, "incorrect_loss_per_char": 0.6980995138486227, "correct_loss_per_token": 1.4112904071807861, "incorrect_loss_per_token": 1.3961990276972454, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5014115571975708, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.5014115571975708, "logits_per_char": -0.7507057785987854, "num_chars": 2}, {"sum_logits": -1.4112904071807861, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.4112904071807861, "logits_per_char": -0.7056452035903931, "num_chars": 2}, {"sum_logits": -1.5103909969329834, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.5103909969329834, "logits_per_char": -0.7551954984664917, "num_chars": 2}, {"sum_logits": -1.1767945289611816, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": true, "logits_per_token": -1.1767945289611816, "logits_per_char": -0.5883972644805908, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 449, "native_id": "NCEOGA_2013_8_26", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3632293939590454, "incorrect_loss_raw": 1.401815414428711, "correct_loss_per_char": 0.6816146969795227, "incorrect_loss_per_char": 0.7009077072143555, "correct_loss_per_token": 1.3632293939590454, "incorrect_loss_per_token": 1.401815414428711, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4623427391052246, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4623427391052246, "logits_per_char": -0.7311713695526123, "num_chars": 2}, {"sum_logits": -1.4181842803955078, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4181842803955078, "logits_per_char": -0.7090921401977539, "num_chars": 2}, {"sum_logits": -1.3249192237854004, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.3249192237854004, "logits_per_char": -0.6624596118927002, "num_chars": 2}, {"sum_logits": -1.3632293939590454, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3632293939590454, "logits_per_char": -0.6816146969795227, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 450, "native_id": "Mercury_SC_407696", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4245959520339966, "incorrect_loss_raw": 1.3839161396026611, "correct_loss_per_char": 0.7122979760169983, "incorrect_loss_per_char": 0.6919580698013306, "correct_loss_per_token": 1.4245959520339966, "incorrect_loss_per_token": 1.3839161396026611, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3103960752487183, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.3103960752487183, "logits_per_char": -0.6551980376243591, "num_chars": 2}, {"sum_logits": -1.4245959520339966, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4245959520339966, "logits_per_char": -0.7122979760169983, "num_chars": 2}, {"sum_logits": -1.5034617185592651, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.5034617185592651, "logits_per_char": -0.7517308592796326, "num_chars": 2}, {"sum_logits": -1.337890625, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.337890625, "logits_per_char": -0.6689453125, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 451, "native_id": "Mercury_SC_400052", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3294974565505981, "incorrect_loss_raw": 1.4132904211680095, "correct_loss_per_char": 0.6647487282752991, "incorrect_loss_per_char": 0.7066452105840048, "correct_loss_per_token": 1.3294974565505981, "incorrect_loss_per_token": 1.4132904211680095, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.392034888267517, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.392034888267517, "logits_per_char": -0.6960174441337585, "num_chars": 2}, {"sum_logits": -1.3294974565505981, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.3294974565505981, "logits_per_char": -0.6647487282752991, "num_chars": 2}, {"sum_logits": -1.4840651750564575, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4840651750564575, "logits_per_char": -0.7420325875282288, "num_chars": 2}, {"sum_logits": -1.3637712001800537, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3637712001800537, "logits_per_char": -0.6818856000900269, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 452, "native_id": "Mercury_7212870", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2983342409133911, "incorrect_loss_raw": 1.4301513036092122, "correct_loss_per_char": 0.6491671204566956, "incorrect_loss_per_char": 0.7150756518046061, "correct_loss_per_token": 1.2983342409133911, "incorrect_loss_per_token": 1.4301513036092122, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5528991222381592, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5528991222381592, "logits_per_char": -0.7764495611190796, "num_chars": 2}, {"sum_logits": -1.3403042554855347, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3403042554855347, "logits_per_char": -0.6701521277427673, "num_chars": 2}, {"sum_logits": -1.3972505331039429, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3972505331039429, "logits_per_char": -0.6986252665519714, "num_chars": 2}, {"sum_logits": -1.2983342409133911, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.2983342409133911, "logits_per_char": -0.6491671204566956, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 453, "native_id": "NYSEDREGENTS_2010_8_35", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.402342677116394, "incorrect_loss_raw": 1.3917361895243328, "correct_loss_per_char": 0.701171338558197, "incorrect_loss_per_char": 0.6958680947621664, "correct_loss_per_token": 1.402342677116394, "incorrect_loss_per_token": 1.3917361895243328, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.515960454940796, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.515960454940796, "logits_per_char": -0.757980227470398, "num_chars": 2}, {"sum_logits": -1.4092111587524414, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4092111587524414, "logits_per_char": -0.7046055793762207, "num_chars": 2}, {"sum_logits": -1.402342677116394, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.402342677116394, "logits_per_char": -0.701171338558197, "num_chars": 2}, {"sum_logits": -1.2500369548797607, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.2500369548797607, "logits_per_char": -0.6250184774398804, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 454, "native_id": "MCAS_2010_8_12005", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3411321640014648, "incorrect_loss_raw": 1.4088533719380696, "correct_loss_per_char": 0.6705660820007324, "incorrect_loss_per_char": 0.7044266859690348, "correct_loss_per_token": 1.3411321640014648, "incorrect_loss_per_token": 1.4088533719380696, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3411321640014648, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": true, "logits_per_token": -1.3411321640014648, "logits_per_char": -0.6705660820007324, "num_chars": 2}, {"sum_logits": -1.3660507202148438, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.3660507202148438, "logits_per_char": -0.6830253601074219, "num_chars": 2}, {"sum_logits": -1.4140466451644897, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.4140466451644897, "logits_per_char": -0.7070233225822449, "num_chars": 2}, {"sum_logits": -1.4464627504348755, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.4464627504348755, "logits_per_char": -0.7232313752174377, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 455, "native_id": "Mercury_7218505", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3351314067840576, "incorrect_loss_raw": 1.4101957082748413, "correct_loss_per_char": 0.6675657033920288, "incorrect_loss_per_char": 0.7050978541374207, "correct_loss_per_token": 1.3351314067840576, "incorrect_loss_per_token": 1.4101957082748413, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4285951852798462, "num_tokens": 1, "num_tokens_all": 415, "is_greedy": false, "logits_per_token": -1.4285951852798462, "logits_per_char": -0.7142975926399231, "num_chars": 2}, {"sum_logits": -1.3351314067840576, "num_tokens": 1, "num_tokens_all": 415, "is_greedy": true, "logits_per_token": -1.3351314067840576, "logits_per_char": -0.6675657033920288, "num_chars": 2}, {"sum_logits": -1.376987099647522, "num_tokens": 1, "num_tokens_all": 415, "is_greedy": false, "logits_per_token": -1.376987099647522, "logits_per_char": -0.688493549823761, "num_chars": 2}, {"sum_logits": -1.4250048398971558, "num_tokens": 1, "num_tokens_all": 415, "is_greedy": false, "logits_per_token": -1.4250048398971558, "logits_per_char": -0.7125024199485779, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 456, "native_id": "Mercury_SC_400853", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4757707118988037, "incorrect_loss_raw": 1.3689283927281697, "correct_loss_per_char": 0.7378853559494019, "incorrect_loss_per_char": 0.6844641963640848, "correct_loss_per_token": 1.4757707118988037, "incorrect_loss_per_token": 1.3689283927281697, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2407184839248657, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.2407184839248657, "logits_per_char": -0.6203592419624329, "num_chars": 2}, {"sum_logits": -1.3662972450256348, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3662972450256348, "logits_per_char": -0.6831486225128174, "num_chars": 2}, {"sum_logits": -1.4997694492340088, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4997694492340088, "logits_per_char": -0.7498847246170044, "num_chars": 2}, {"sum_logits": -1.4757707118988037, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4757707118988037, "logits_per_char": -0.7378853559494019, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 457, "native_id": "Mercury_7210455", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.387518048286438, "incorrect_loss_raw": 1.3968761761983235, "correct_loss_per_char": 0.693759024143219, "incorrect_loss_per_char": 0.6984380880991617, "correct_loss_per_token": 1.387518048286438, "incorrect_loss_per_token": 1.3968761761983235, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4728548526763916, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4728548526763916, "logits_per_char": -0.7364274263381958, "num_chars": 2}, {"sum_logits": -1.4714252948760986, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4714252948760986, "logits_per_char": -0.7357126474380493, "num_chars": 2}, {"sum_logits": -1.387518048286438, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.387518048286438, "logits_per_char": -0.693759024143219, "num_chars": 2}, {"sum_logits": -1.2463483810424805, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.2463483810424805, "logits_per_char": -0.6231741905212402, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 458, "native_id": "Mercury_7174738", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4651768207550049, "incorrect_loss_raw": 1.3695232073465984, "correct_loss_per_char": 0.7325884103775024, "incorrect_loss_per_char": 0.6847616036732992, "correct_loss_per_token": 1.4651768207550049, "incorrect_loss_per_token": 1.3695232073465984, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4715261459350586, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": false, "logits_per_token": -1.4715261459350586, "logits_per_char": -0.7357630729675293, "num_chars": 2}, {"sum_logits": -1.3867453336715698, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": false, "logits_per_token": -1.3867453336715698, "logits_per_char": -0.6933726668357849, "num_chars": 2}, {"sum_logits": -1.4651768207550049, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": false, "logits_per_token": -1.4651768207550049, "logits_per_char": -0.7325884103775024, "num_chars": 2}, {"sum_logits": -1.2502981424331665, "num_tokens": 1, "num_tokens_all": 454, "is_greedy": true, "logits_per_token": -1.2502981424331665, "logits_per_char": -0.6251490712165833, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 459, "native_id": "MCAS_2001_5_2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.207522988319397, "incorrect_loss_raw": 1.4599286317825317, "correct_loss_per_char": 0.6037614941596985, "incorrect_loss_per_char": 0.7299643158912659, "correct_loss_per_token": 1.207522988319397, "incorrect_loss_per_token": 1.4599286317825317, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4165092706680298, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4165092706680298, "logits_per_char": -0.7082546353340149, "num_chars": 2}, {"sum_logits": -1.4875434637069702, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4875434637069702, "logits_per_char": -0.7437717318534851, "num_chars": 2}, {"sum_logits": -1.4757331609725952, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4757331609725952, "logits_per_char": -0.7378665804862976, "num_chars": 2}, {"sum_logits": -1.207522988319397, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.207522988319397, "logits_per_char": -0.6037614941596985, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 460, "native_id": "NYSEDREGENTS_2012_4_9", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.369344711303711, "incorrect_loss_raw": 1.4003721475601196, "correct_loss_per_char": 0.6846723556518555, "incorrect_loss_per_char": 0.7001860737800598, "correct_loss_per_token": 1.369344711303711, "incorrect_loss_per_token": 1.4003721475601196, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.369344711303711, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.369344711303711, "logits_per_char": -0.6846723556518555, "num_chars": 2}, {"sum_logits": -1.3944886922836304, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3944886922836304, "logits_per_char": -0.6972443461418152, "num_chars": 2}, {"sum_logits": -1.3448538780212402, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.3448538780212402, "logits_per_char": -0.6724269390106201, "num_chars": 2}, {"sum_logits": -1.4617738723754883, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4617738723754883, "logits_per_char": -0.7308869361877441, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 461, "native_id": "Mercury_416593", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3630884885787964, "incorrect_loss_raw": 1.4124566713968914, "correct_loss_per_char": 0.6815442442893982, "incorrect_loss_per_char": 0.7062283356984457, "correct_loss_per_token": 1.3630884885787964, "incorrect_loss_per_token": 1.4124566713968914, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5800600051879883, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.5800600051879883, "logits_per_char": -0.7900300025939941, "num_chars": 2}, {"sum_logits": -1.3630884885787964, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.3630884885787964, "logits_per_char": -0.6815442442893982, "num_chars": 2}, {"sum_logits": -1.4568026065826416, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4568026065826416, "logits_per_char": -0.7284013032913208, "num_chars": 2}, {"sum_logits": -1.200507402420044, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.200507402420044, "logits_per_char": -0.600253701210022, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 462, "native_id": "Mercury_7205870", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3444571495056152, "incorrect_loss_raw": 1.4070894320805867, "correct_loss_per_char": 0.6722285747528076, "incorrect_loss_per_char": 0.7035447160402933, "correct_loss_per_token": 1.3444571495056152, "incorrect_loss_per_token": 1.4070894320805867, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3464845418930054, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.3464845418930054, "logits_per_char": -0.6732422709465027, "num_chars": 2}, {"sum_logits": -1.3444571495056152, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": true, "logits_per_token": -1.3444571495056152, "logits_per_char": -0.6722285747528076, "num_chars": 2}, {"sum_logits": -1.4520134925842285, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.4520134925842285, "logits_per_char": -0.7260067462921143, "num_chars": 2}, {"sum_logits": -1.4227702617645264, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.4227702617645264, "logits_per_char": -0.7113851308822632, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 463, "native_id": "Mercury_SC_401798", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4384323358535767, "incorrect_loss_raw": 1.3750382661819458, "correct_loss_per_char": 0.7192161679267883, "incorrect_loss_per_char": 0.6875191330909729, "correct_loss_per_token": 1.4384323358535767, "incorrect_loss_per_token": 1.3750382661819458, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4000937938690186, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4000937938690186, "logits_per_char": -0.7000468969345093, "num_chars": 2}, {"sum_logits": -1.3299258947372437, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.3299258947372437, "logits_per_char": -0.6649629473686218, "num_chars": 2}, {"sum_logits": -1.4384323358535767, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4384323358535767, "logits_per_char": -0.7192161679267883, "num_chars": 2}, {"sum_logits": -1.3950951099395752, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3950951099395752, "logits_per_char": -0.6975475549697876, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 464, "native_id": "Mercury_7084228", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.436577558517456, "incorrect_loss_raw": 1.3762743870417278, "correct_loss_per_char": 0.718288779258728, "incorrect_loss_per_char": 0.6881371935208639, "correct_loss_per_token": 1.436577558517456, "incorrect_loss_per_token": 1.3762743870417278, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3059821128845215, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.3059821128845215, "logits_per_char": -0.6529910564422607, "num_chars": 2}, {"sum_logits": -1.3814382553100586, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3814382553100586, "logits_per_char": -0.6907191276550293, "num_chars": 2}, {"sum_logits": -1.441402792930603, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.441402792930603, "logits_per_char": -0.7207013964653015, "num_chars": 2}, {"sum_logits": -1.436577558517456, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.436577558517456, "logits_per_char": -0.718288779258728, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 465, "native_id": "Mercury_417460", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.384316086769104, "incorrect_loss_raw": 1.393911361694336, "correct_loss_per_char": 0.692158043384552, "incorrect_loss_per_char": 0.696955680847168, "correct_loss_per_token": 1.384316086769104, "incorrect_loss_per_token": 1.393911361694336, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.384316086769104, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.384316086769104, "logits_per_char": -0.692158043384552, "num_chars": 2}, {"sum_logits": -1.3835543394088745, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3835543394088745, "logits_per_char": -0.6917771697044373, "num_chars": 2}, {"sum_logits": -1.4499502182006836, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4499502182006836, "logits_per_char": -0.7249751091003418, "num_chars": 2}, {"sum_logits": -1.3482295274734497, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.3482295274734497, "logits_per_char": -0.6741147637367249, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 466, "native_id": "Mercury_402539", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4305750131607056, "incorrect_loss_raw": 1.3799944718678792, "correct_loss_per_char": 0.7152875065803528, "incorrect_loss_per_char": 0.6899972359339396, "correct_loss_per_token": 1.4305750131607056, "incorrect_loss_per_token": 1.3799944718678792, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4495794773101807, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.4495794773101807, "logits_per_char": -0.7247897386550903, "num_chars": 2}, {"sum_logits": -1.4305750131607056, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.4305750131607056, "logits_per_char": -0.7152875065803528, "num_chars": 2}, {"sum_logits": -1.43104088306427, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.43104088306427, "logits_per_char": -0.715520441532135, "num_chars": 2}, {"sum_logits": -1.259363055229187, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": true, "logits_per_token": -1.259363055229187, "logits_per_char": -0.6296815276145935, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 467, "native_id": "Mercury_406800", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.46781325340271, "incorrect_loss_raw": 1.3671266237894695, "correct_loss_per_char": 0.733906626701355, "incorrect_loss_per_char": 0.6835633118947347, "correct_loss_per_token": 1.46781325340271, "incorrect_loss_per_token": 1.3671266237894695, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3559168577194214, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3559168577194214, "logits_per_char": -0.6779584288597107, "num_chars": 2}, {"sum_logits": -1.46781325340271, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.46781325340271, "logits_per_char": -0.733906626701355, "num_chars": 2}, {"sum_logits": -1.457095742225647, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.457095742225647, "logits_per_char": -0.7285478711128235, "num_chars": 2}, {"sum_logits": -1.2883672714233398, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.2883672714233398, "logits_per_char": -0.6441836357116699, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 468, "native_id": "Mercury_SC_408321", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.344740867614746, "incorrect_loss_raw": 1.4091215133666992, "correct_loss_per_char": 0.672370433807373, "incorrect_loss_per_char": 0.7045607566833496, "correct_loss_per_token": 1.344740867614746, "incorrect_loss_per_token": 1.4091215133666992, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.344740867614746, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.344740867614746, "logits_per_char": -0.672370433807373, "num_chars": 2}, {"sum_logits": -1.4896948337554932, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4896948337554932, "logits_per_char": -0.7448474168777466, "num_chars": 2}, {"sum_logits": -1.4095354080200195, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4095354080200195, "logits_per_char": -0.7047677040100098, "num_chars": 2}, {"sum_logits": -1.328134298324585, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.328134298324585, "logits_per_char": -0.6640671491622925, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 469, "native_id": "Mercury_SC_406836", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.460809350013733, "incorrect_loss_raw": 1.3694966634114583, "correct_loss_per_char": 0.7304046750068665, "incorrect_loss_per_char": 0.6847483317057291, "correct_loss_per_token": 1.460809350013733, "incorrect_loss_per_token": 1.3694966634114583, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4357364177703857, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.4357364177703857, "logits_per_char": -0.7178682088851929, "num_chars": 2}, {"sum_logits": -1.365599513053894, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.365599513053894, "logits_per_char": -0.682799756526947, "num_chars": 2}, {"sum_logits": -1.460809350013733, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": false, "logits_per_token": -1.460809350013733, "logits_per_char": -0.7304046750068665, "num_chars": 2}, {"sum_logits": -1.3071540594100952, "num_tokens": 1, "num_tokens_all": 408, "is_greedy": true, "logits_per_token": -1.3071540594100952, "logits_per_char": -0.6535770297050476, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 470, "native_id": "Mercury_SC_410963", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3873968124389648, "incorrect_loss_raw": 1.3932091395060222, "correct_loss_per_char": 0.6936984062194824, "incorrect_loss_per_char": 0.6966045697530111, "correct_loss_per_token": 1.3873968124389648, "incorrect_loss_per_token": 1.3932091395060222, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3837178945541382, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.3837178945541382, "logits_per_char": -0.6918589472770691, "num_chars": 2}, {"sum_logits": -1.360520601272583, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.360520601272583, "logits_per_char": -0.6802603006362915, "num_chars": 2}, {"sum_logits": -1.4353889226913452, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4353889226913452, "logits_per_char": -0.7176944613456726, "num_chars": 2}, {"sum_logits": -1.3873968124389648, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.3873968124389648, "logits_per_char": -0.6936984062194824, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 471, "native_id": "Mercury_7132405", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4223071336746216, "incorrect_loss_raw": 1.3833040793736775, "correct_loss_per_char": 0.7111535668373108, "incorrect_loss_per_char": 0.6916520396868387, "correct_loss_per_token": 1.4223071336746216, "incorrect_loss_per_token": 1.3833040793736775, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2798197269439697, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2798197269439697, "logits_per_char": -0.6399098634719849, "num_chars": 2}, {"sum_logits": -1.4319367408752441, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4319367408752441, "logits_per_char": -0.7159683704376221, "num_chars": 2}, {"sum_logits": -1.4381557703018188, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4381557703018188, "logits_per_char": -0.7190778851509094, "num_chars": 2}, {"sum_logits": -1.4223071336746216, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4223071336746216, "logits_per_char": -0.7111535668373108, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 472, "native_id": "Mercury_SC_408872", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4107139110565186, "incorrect_loss_raw": 1.3932104110717773, "correct_loss_per_char": 0.7053569555282593, "incorrect_loss_per_char": 0.6966052055358887, "correct_loss_per_token": 1.4107139110565186, "incorrect_loss_per_token": 1.3932104110717773, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5804647207260132, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.5804647207260132, "logits_per_char": -0.7902323603630066, "num_chars": 2}, {"sum_logits": -1.3470313549041748, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.3470313549041748, "logits_per_char": -0.6735156774520874, "num_chars": 2}, {"sum_logits": -1.4107139110565186, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4107139110565186, "logits_per_char": -0.7053569555282593, "num_chars": 2}, {"sum_logits": -1.252135157585144, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.252135157585144, "logits_per_char": -0.626067578792572, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 473, "native_id": "VASoL_2008_3_25", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4533097743988037, "incorrect_loss_raw": 1.5661911567052205, "correct_loss_per_char": 0.7266548871994019, "incorrect_loss_per_char": 0.7830955783526102, "correct_loss_per_token": 1.4533097743988037, "incorrect_loss_per_token": 1.5661911567052205, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.028220534324646, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.028220534324646, "logits_per_char": -0.514110267162323, "num_chars": 2}, {"sum_logits": -1.4533097743988037, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4533097743988037, "logits_per_char": -0.7266548871994019, "num_chars": 2}, {"sum_logits": -1.764012098312378, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.764012098312378, "logits_per_char": -0.882006049156189, "num_chars": 2}, {"sum_logits": -1.9063408374786377, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.9063408374786377, "logits_per_char": -0.9531704187393188, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 474, "native_id": "WASL_2005_8_12", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4388718605041504, "incorrect_loss_raw": 1.375461260477702, "correct_loss_per_char": 0.7194359302520752, "incorrect_loss_per_char": 0.687730630238851, "correct_loss_per_token": 1.4388718605041504, "incorrect_loss_per_token": 1.375461260477702, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4388718605041504, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.4388718605041504, "logits_per_char": -0.7194359302520752, "num_chars": 2}, {"sum_logits": -1.366959810256958, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.366959810256958, "logits_per_char": -0.683479905128479, "num_chars": 2}, {"sum_logits": -1.4265516996383667, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.4265516996383667, "logits_per_char": -0.7132758498191833, "num_chars": 2}, {"sum_logits": -1.3328722715377808, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": true, "logits_per_token": -1.3328722715377808, "logits_per_char": -0.6664361357688904, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 475, "native_id": "AKDE&ED_2012_8_20", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.350677728652954, "incorrect_loss_raw": 1.4087307850519817, "correct_loss_per_char": 0.675338864326477, "incorrect_loss_per_char": 0.7043653925259908, "correct_loss_per_token": 1.350677728652954, "incorrect_loss_per_token": 1.4087307850519817, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4650603532791138, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4650603532791138, "logits_per_char": -0.7325301766395569, "num_chars": 2}, {"sum_logits": -1.350677728652954, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.350677728652954, "logits_per_char": -0.675338864326477, "num_chars": 2}, {"sum_logits": -1.480520486831665, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.480520486831665, "logits_per_char": -0.7402602434158325, "num_chars": 2}, {"sum_logits": -1.280611515045166, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.280611515045166, "logits_per_char": -0.640305757522583, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 476, "native_id": "Mercury_7056823", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3623237609863281, "incorrect_loss_raw": 1.4047939380009968, "correct_loss_per_char": 0.6811618804931641, "incorrect_loss_per_char": 0.7023969690004984, "correct_loss_per_token": 1.3623237609863281, "incorrect_loss_per_token": 1.4047939380009968, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2602195739746094, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.2602195739746094, "logits_per_char": -0.6301097869873047, "num_chars": 2}, {"sum_logits": -1.3623237609863281, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3623237609863281, "logits_per_char": -0.6811618804931641, "num_chars": 2}, {"sum_logits": -1.4757832288742065, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4757832288742065, "logits_per_char": -0.7378916144371033, "num_chars": 2}, {"sum_logits": -1.4783790111541748, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4783790111541748, "logits_per_char": -0.7391895055770874, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 477, "native_id": "Mercury_7205800", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3656554222106934, "incorrect_loss_raw": 1.4082496960957844, "correct_loss_per_char": 0.6828277111053467, "incorrect_loss_per_char": 0.7041248480478922, "correct_loss_per_token": 1.3656554222106934, "incorrect_loss_per_token": 1.4082496960957844, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6019269227981567, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.6019269227981567, "logits_per_char": -0.8009634613990784, "num_chars": 2}, {"sum_logits": -1.3656554222106934, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3656554222106934, "logits_per_char": -0.6828277111053467, "num_chars": 2}, {"sum_logits": -1.3058760166168213, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.3058760166168213, "logits_per_char": -0.6529380083084106, "num_chars": 2}, {"sum_logits": -1.3169461488723755, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3169461488723755, "logits_per_char": -0.6584730744361877, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 478, "native_id": "Mercury_SC_402282", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5316640138626099, "incorrect_loss_raw": 1.352359652519226, "correct_loss_per_char": 0.7658320069313049, "incorrect_loss_per_char": 0.676179826259613, "correct_loss_per_token": 1.5316640138626099, "incorrect_loss_per_token": 1.352359652519226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5316640138626099, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.5316640138626099, "logits_per_char": -0.7658320069313049, "num_chars": 2}, {"sum_logits": -1.469582438468933, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.469582438468933, "logits_per_char": -0.7347912192344666, "num_chars": 2}, {"sum_logits": -1.3519153594970703, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3519153594970703, "logits_per_char": -0.6759576797485352, "num_chars": 2}, {"sum_logits": -1.2355811595916748, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.2355811595916748, "logits_per_char": -0.6177905797958374, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 479, "native_id": "MCAS_1998_8_26", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4319169521331787, "incorrect_loss_raw": 1.3780194123586018, "correct_loss_per_char": 0.7159584760665894, "incorrect_loss_per_char": 0.6890097061793009, "correct_loss_per_token": 1.4319169521331787, "incorrect_loss_per_token": 1.3780194123586018, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3471769094467163, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.3471769094467163, "logits_per_char": -0.6735884547233582, "num_chars": 2}, {"sum_logits": -1.4144763946533203, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4144763946533203, "logits_per_char": -0.7072381973266602, "num_chars": 2}, {"sum_logits": -1.4319169521331787, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4319169521331787, "logits_per_char": -0.7159584760665894, "num_chars": 2}, {"sum_logits": -1.372404932975769, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.372404932975769, "logits_per_char": -0.6862024664878845, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 480, "native_id": "Mercury_7230318", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3725942373275757, "incorrect_loss_raw": 1.4042555093765259, "correct_loss_per_char": 0.6862971186637878, "incorrect_loss_per_char": 0.7021277546882629, "correct_loss_per_token": 1.3725942373275757, "incorrect_loss_per_token": 1.4042555093765259, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4607995748519897, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4607995748519897, "logits_per_char": -0.7303997874259949, "num_chars": 2}, {"sum_logits": -1.2295199632644653, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.2295199632644653, "logits_per_char": -0.6147599816322327, "num_chars": 2}, {"sum_logits": -1.5224469900131226, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.5224469900131226, "logits_per_char": -0.7612234950065613, "num_chars": 2}, {"sum_logits": -1.3725942373275757, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3725942373275757, "logits_per_char": -0.6862971186637878, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 481, "native_id": "Mercury_SC_416167", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4589101076126099, "incorrect_loss_raw": 1.3745117982228596, "correct_loss_per_char": 0.7294550538063049, "incorrect_loss_per_char": 0.6872558991114298, "correct_loss_per_token": 1.4589101076126099, "incorrect_loss_per_token": 1.3745117982228596, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3381520509719849, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3381520509719849, "logits_per_char": -0.6690760254859924, "num_chars": 2}, {"sum_logits": -1.4737584590911865, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4737584590911865, "logits_per_char": -0.7368792295455933, "num_chars": 2}, {"sum_logits": -1.4589101076126099, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4589101076126099, "logits_per_char": -0.7294550538063049, "num_chars": 2}, {"sum_logits": -1.3116248846054077, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.3116248846054077, "logits_per_char": -0.6558124423027039, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 482, "native_id": "Mercury_7027720", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3235522508621216, "incorrect_loss_raw": 1.4146323998769124, "correct_loss_per_char": 0.6617761254310608, "incorrect_loss_per_char": 0.7073161999384562, "correct_loss_per_token": 1.3235522508621216, "incorrect_loss_per_token": 1.4146323998769124, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.40201735496521, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.40201735496521, "logits_per_char": -0.701008677482605, "num_chars": 2}, {"sum_logits": -1.3235522508621216, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.3235522508621216, "logits_per_char": -0.6617761254310608, "num_chars": 2}, {"sum_logits": -1.477752923965454, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.477752923965454, "logits_per_char": -0.738876461982727, "num_chars": 2}, {"sum_logits": -1.3641269207000732, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.3641269207000732, "logits_per_char": -0.6820634603500366, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 483, "native_id": "LEAP__5_10312", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2813441753387451, "incorrect_loss_raw": 1.4316119750340779, "correct_loss_per_char": 0.6406720876693726, "incorrect_loss_per_char": 0.7158059875170389, "correct_loss_per_token": 1.2813441753387451, "incorrect_loss_per_token": 1.4316119750340779, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5199260711669922, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.5199260711669922, "logits_per_char": -0.7599630355834961, "num_chars": 2}, {"sum_logits": -1.2813441753387451, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": true, "logits_per_token": -1.2813441753387451, "logits_per_char": -0.6406720876693726, "num_chars": 2}, {"sum_logits": -1.42293381690979, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.42293381690979, "logits_per_char": -0.711466908454895, "num_chars": 2}, {"sum_logits": -1.3519760370254517, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.3519760370254517, "logits_per_char": -0.6759880185127258, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 484, "native_id": "Mercury_405161", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4464274644851685, "incorrect_loss_raw": 1.3800690174102783, "correct_loss_per_char": 0.7232137322425842, "incorrect_loss_per_char": 0.6900345087051392, "correct_loss_per_token": 1.4464274644851685, "incorrect_loss_per_token": 1.3800690174102783, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2170147895812988, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.2170147895812988, "logits_per_char": -0.6085073947906494, "num_chars": 2}, {"sum_logits": -1.404675841331482, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.404675841331482, "logits_per_char": -0.702337920665741, "num_chars": 2}, {"sum_logits": -1.5185164213180542, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.5185164213180542, "logits_per_char": -0.7592582106590271, "num_chars": 2}, {"sum_logits": -1.4464274644851685, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4464274644851685, "logits_per_char": -0.7232137322425842, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 485, "native_id": "Mercury_SC_409245", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.370185375213623, "incorrect_loss_raw": 1.3990591764450073, "correct_loss_per_char": 0.6850926876068115, "incorrect_loss_per_char": 0.6995295882225037, "correct_loss_per_token": 1.370185375213623, "incorrect_loss_per_token": 1.3990591764450073, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3704166412353516, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3704166412353516, "logits_per_char": -0.6852083206176758, "num_chars": 2}, {"sum_logits": -1.3349928855895996, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.3349928855895996, "logits_per_char": -0.6674964427947998, "num_chars": 2}, {"sum_logits": -1.4917680025100708, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4917680025100708, "logits_per_char": -0.7458840012550354, "num_chars": 2}, {"sum_logits": -1.370185375213623, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.370185375213623, "logits_per_char": -0.6850926876068115, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 486, "native_id": "ACTAAP_2011_5_8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4413564205169678, "incorrect_loss_raw": 1.378273606300354, "correct_loss_per_char": 0.7206782102584839, "incorrect_loss_per_char": 0.689136803150177, "correct_loss_per_token": 1.4413564205169678, "incorrect_loss_per_token": 1.378273606300354, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4863152503967285, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4863152503967285, "logits_per_char": -0.7431576251983643, "num_chars": 2}, {"sum_logits": -1.249605655670166, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.249605655670166, "logits_per_char": -0.624802827835083, "num_chars": 2}, {"sum_logits": -1.4413564205169678, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4413564205169678, "logits_per_char": -0.7206782102584839, "num_chars": 2}, {"sum_logits": -1.3988999128341675, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3988999128341675, "logits_per_char": -0.6994499564170837, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 487, "native_id": "Mercury_7223370", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.329784870147705, "incorrect_loss_raw": 1.412758231163025, "correct_loss_per_char": 0.6648924350738525, "incorrect_loss_per_char": 0.7063791155815125, "correct_loss_per_token": 1.329784870147705, "incorrect_loss_per_token": 1.412758231163025, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.417977213859558, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.417977213859558, "logits_per_char": -0.708988606929779, "num_chars": 2}, {"sum_logits": -1.4905855655670166, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4905855655670166, "logits_per_char": -0.7452927827835083, "num_chars": 2}, {"sum_logits": -1.3297119140625, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.3297119140625, "logits_per_char": -0.66485595703125, "num_chars": 2}, {"sum_logits": -1.329784870147705, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.329784870147705, "logits_per_char": -0.6648924350738525, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 488, "native_id": "Mercury_SC_400697", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4347354173660278, "incorrect_loss_raw": 1.3772954146067302, "correct_loss_per_char": 0.7173677086830139, "incorrect_loss_per_char": 0.6886477073033651, "correct_loss_per_token": 1.4347354173660278, "incorrect_loss_per_token": 1.3772954146067302, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4452427625656128, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4452427625656128, "logits_per_char": -0.7226213812828064, "num_chars": 2}, {"sum_logits": -1.4347354173660278, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4347354173660278, "logits_per_char": -0.7173677086830139, "num_chars": 2}, {"sum_logits": -1.3511700630187988, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3511700630187988, "logits_per_char": -0.6755850315093994, "num_chars": 2}, {"sum_logits": -1.3354734182357788, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.3354734182357788, "logits_per_char": -0.6677367091178894, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 489, "native_id": "Mercury_SC_401262", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.457427978515625, "incorrect_loss_raw": 1.3693494002024333, "correct_loss_per_char": 0.7287139892578125, "incorrect_loss_per_char": 0.6846747001012167, "correct_loss_per_token": 1.457427978515625, "incorrect_loss_per_token": 1.3693494002024333, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.398188591003418, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.398188591003418, "logits_per_char": -0.699094295501709, "num_chars": 2}, {"sum_logits": -1.3194712400436401, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.3194712400436401, "logits_per_char": -0.6597356200218201, "num_chars": 2}, {"sum_logits": -1.3903883695602417, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3903883695602417, "logits_per_char": -0.6951941847801208, "num_chars": 2}, {"sum_logits": -1.457427978515625, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.457427978515625, "logits_per_char": -0.7287139892578125, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 490, "native_id": "Mercury_7136063", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3438161611557007, "incorrect_loss_raw": 1.4068629741668701, "correct_loss_per_char": 0.6719080805778503, "incorrect_loss_per_char": 0.7034314870834351, "correct_loss_per_token": 1.3438161611557007, "incorrect_loss_per_token": 1.4068629741668701, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4499502182006836, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4499502182006836, "logits_per_char": -0.7249751091003418, "num_chars": 2}, {"sum_logits": -1.3438161611557007, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.3438161611557007, "logits_per_char": -0.6719080805778503, "num_chars": 2}, {"sum_logits": -1.4056875705718994, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4056875705718994, "logits_per_char": -0.7028437852859497, "num_chars": 2}, {"sum_logits": -1.3649511337280273, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.3649511337280273, "logits_per_char": -0.6824755668640137, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 491, "native_id": "Mercury_405876", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2789819240570068, "incorrect_loss_raw": 1.4304255644480388, "correct_loss_per_char": 0.6394909620285034, "incorrect_loss_per_char": 0.7152127822240194, "correct_loss_per_token": 1.2789819240570068, "incorrect_loss_per_token": 1.4304255644480388, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4275907278060913, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.4275907278060913, "logits_per_char": -0.7137953639030457, "num_chars": 2}, {"sum_logits": -1.3994935750961304, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.3994935750961304, "logits_per_char": -0.6997467875480652, "num_chars": 2}, {"sum_logits": -1.4641923904418945, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.4641923904418945, "logits_per_char": -0.7320961952209473, "num_chars": 2}, {"sum_logits": -1.2789819240570068, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": true, "logits_per_token": -1.2789819240570068, "logits_per_char": -0.6394909620285034, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 492, "native_id": "Mercury_7057890", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3184521198272705, "incorrect_loss_raw": 1.4154223601023357, "correct_loss_per_char": 0.6592260599136353, "incorrect_loss_per_char": 0.7077111800511678, "correct_loss_per_token": 1.3184521198272705, "incorrect_loss_per_token": 1.4154223601023357, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3184521198272705, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.3184521198272705, "logits_per_char": -0.6592260599136353, "num_chars": 2}, {"sum_logits": -1.4268386363983154, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4268386363983154, "logits_per_char": -0.7134193181991577, "num_chars": 2}, {"sum_logits": -1.443430781364441, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.443430781364441, "logits_per_char": -0.7217153906822205, "num_chars": 2}, {"sum_logits": -1.3759976625442505, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3759976625442505, "logits_per_char": -0.6879988312721252, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 493, "native_id": "LEAP_2002_4_10247", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4625039100646973, "incorrect_loss_raw": 1.3701709906260173, "correct_loss_per_char": 0.7312519550323486, "incorrect_loss_per_char": 0.6850854953130087, "correct_loss_per_token": 1.4625039100646973, "incorrect_loss_per_token": 1.3701709906260173, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3155205249786377, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.3155205249786377, "logits_per_char": -0.6577602624893188, "num_chars": 2}, {"sum_logits": -1.4625039100646973, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4625039100646973, "logits_per_char": -0.7312519550323486, "num_chars": 2}, {"sum_logits": -1.4790982007980347, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4790982007980347, "logits_per_char": -0.7395491003990173, "num_chars": 2}, {"sum_logits": -1.3158942461013794, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3158942461013794, "logits_per_char": -0.6579471230506897, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 494, "native_id": "Mercury_SC_405481", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.372534155845642, "incorrect_loss_raw": 1.396873156229655, "correct_loss_per_char": 0.686267077922821, "incorrect_loss_per_char": 0.6984365781148275, "correct_loss_per_token": 1.372534155845642, "incorrect_loss_per_token": 1.396873156229655, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3868756294250488, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3868756294250488, "logits_per_char": -0.6934378147125244, "num_chars": 2}, {"sum_logits": -1.4225363731384277, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4225363731384277, "logits_per_char": -0.7112681865692139, "num_chars": 2}, {"sum_logits": -1.372534155845642, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.372534155845642, "logits_per_char": -0.686267077922821, "num_chars": 2}, {"sum_logits": -1.3812074661254883, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3812074661254883, "logits_per_char": -0.6906037330627441, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 495, "native_id": "Mercury_SC_400401", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4207504987716675, "incorrect_loss_raw": 1.3822987874348958, "correct_loss_per_char": 0.7103752493858337, "incorrect_loss_per_char": 0.6911493937174479, "correct_loss_per_token": 1.4207504987716675, "incorrect_loss_per_token": 1.3822987874348958, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.431736707687378, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.431736707687378, "logits_per_char": -0.715868353843689, "num_chars": 2}, {"sum_logits": -1.4207504987716675, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4207504987716675, "logits_per_char": -0.7103752493858337, "num_chars": 2}, {"sum_logits": -1.4354194402694702, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4354194402694702, "logits_per_char": -0.7177097201347351, "num_chars": 2}, {"sum_logits": -1.2797402143478394, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.2797402143478394, "logits_per_char": -0.6398701071739197, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 496, "native_id": "Mercury_7064260", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4285955429077148, "incorrect_loss_raw": 1.3807673454284668, "correct_loss_per_char": 0.7142977714538574, "incorrect_loss_per_char": 0.6903836727142334, "correct_loss_per_token": 1.4285955429077148, "incorrect_loss_per_token": 1.3807673454284668, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2772148847579956, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.2772148847579956, "logits_per_char": -0.6386074423789978, "num_chars": 2}, {"sum_logits": -1.4285955429077148, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4285955429077148, "logits_per_char": -0.7142977714538574, "num_chars": 2}, {"sum_logits": -1.4665706157684326, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4665706157684326, "logits_per_char": -0.7332853078842163, "num_chars": 2}, {"sum_logits": -1.3985165357589722, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3985165357589722, "logits_per_char": -0.6992582678794861, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 497, "native_id": "Mercury_7015995", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4689313173294067, "incorrect_loss_raw": 1.3733977476755779, "correct_loss_per_char": 0.7344656586647034, "incorrect_loss_per_char": 0.6866988738377889, "correct_loss_per_token": 1.4689313173294067, "incorrect_loss_per_token": 1.3733977476755779, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4173862934112549, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4173862934112549, "logits_per_char": -0.7086931467056274, "num_chars": 2}, {"sum_logits": -1.4689313173294067, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4689313173294067, "logits_per_char": -0.7344656586647034, "num_chars": 2}, {"sum_logits": -1.21946382522583, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.21946382522583, "logits_per_char": -0.609731912612915, "num_chars": 2}, {"sum_logits": -1.4833431243896484, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4833431243896484, "logits_per_char": -0.7416715621948242, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 498, "native_id": "Mercury_400887", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2708362340927124, "incorrect_loss_raw": 1.4492512146631877, "correct_loss_per_char": 0.6354181170463562, "incorrect_loss_per_char": 0.7246256073315939, "correct_loss_per_token": 1.2708362340927124, "incorrect_loss_per_token": 1.4492512146631877, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3906652927398682, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.3906652927398682, "logits_per_char": -0.6953326463699341, "num_chars": 2}, {"sum_logits": -1.2708362340927124, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": true, "logits_per_token": -1.2708362340927124, "logits_per_char": -0.6354181170463562, "num_chars": 2}, {"sum_logits": -1.6753796339035034, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.6753796339035034, "logits_per_char": -0.8376898169517517, "num_chars": 2}, {"sum_logits": -1.2817087173461914, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.2817087173461914, "logits_per_char": -0.6408543586730957, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 499, "native_id": "Mercury_7247678", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.482325792312622, "incorrect_loss_raw": 1.3901557922363281, "correct_loss_per_char": 0.741162896156311, "incorrect_loss_per_char": 0.6950778961181641, "correct_loss_per_token": 1.482325792312622, "incorrect_loss_per_token": 1.3901557922363281, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.482325792312622, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.482325792312622, "logits_per_char": -0.741162896156311, "num_chars": 2}, {"sum_logits": -1.7090381383895874, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.7090381383895874, "logits_per_char": -0.8545190691947937, "num_chars": 2}, {"sum_logits": -1.3558334112167358, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.3558334112167358, "logits_per_char": -0.6779167056083679, "num_chars": 2}, {"sum_logits": -1.1055958271026611, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.1055958271026611, "logits_per_char": -0.5527979135513306, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 500, "native_id": "MDSA_2007_8_24", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4189807176589966, "incorrect_loss_raw": 1.381086826324463, "correct_loss_per_char": 0.7094903588294983, "incorrect_loss_per_char": 0.6905434131622314, "correct_loss_per_token": 1.4189807176589966, "incorrect_loss_per_token": 1.381086826324463, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3411903381347656, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.3411903381347656, "logits_per_char": -0.6705951690673828, "num_chars": 2}, {"sum_logits": -1.4457696676254272, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.4457696676254272, "logits_per_char": -0.7228848338127136, "num_chars": 2}, {"sum_logits": -1.4189807176589966, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.4189807176589966, "logits_per_char": -0.7094903588294983, "num_chars": 2}, {"sum_logits": -1.3563004732131958, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.3563004732131958, "logits_per_char": -0.6781502366065979, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 501, "native_id": "AKDE&ED_2008_8_48", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4282830953598022, "incorrect_loss_raw": 1.383204698562622, "correct_loss_per_char": 0.7141415476799011, "incorrect_loss_per_char": 0.691602349281311, "correct_loss_per_token": 1.4282830953598022, "incorrect_loss_per_token": 1.383204698562622, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4282830953598022, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4282830953598022, "logits_per_char": -0.7141415476799011, "num_chars": 2}, {"sum_logits": -1.3658393621444702, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3658393621444702, "logits_per_char": -0.6829196810722351, "num_chars": 2}, {"sum_logits": -1.5096561908721924, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.5096561908721924, "logits_per_char": -0.7548280954360962, "num_chars": 2}, {"sum_logits": -1.2741185426712036, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.2741185426712036, "logits_per_char": -0.6370592713356018, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 502, "native_id": "Mercury_401014", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5052862167358398, "incorrect_loss_raw": 1.3547948996225994, "correct_loss_per_char": 0.7526431083679199, "incorrect_loss_per_char": 0.6773974498112997, "correct_loss_per_token": 1.5052862167358398, "incorrect_loss_per_token": 1.3547948996225994, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3620944023132324, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.3620944023132324, "logits_per_char": -0.6810472011566162, "num_chars": 2}, {"sum_logits": -1.3420050144195557, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.3420050144195557, "logits_per_char": -0.6710025072097778, "num_chars": 2}, {"sum_logits": -1.5052862167358398, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.5052862167358398, "logits_per_char": -0.7526431083679199, "num_chars": 2}, {"sum_logits": -1.3602852821350098, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.3602852821350098, "logits_per_char": -0.6801426410675049, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 503, "native_id": "Mercury_7106698", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4132593870162964, "incorrect_loss_raw": 1.391295353571574, "correct_loss_per_char": 0.7066296935081482, "incorrect_loss_per_char": 0.695647676785787, "correct_loss_per_token": 1.4132593870162964, "incorrect_loss_per_token": 1.391295353571574, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4132593870162964, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4132593870162964, "logits_per_char": -0.7066296935081482, "num_chars": 2}, {"sum_logits": -1.4114352464675903, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4114352464675903, "logits_per_char": -0.7057176232337952, "num_chars": 2}, {"sum_logits": -1.5310730934143066, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.5310730934143066, "logits_per_char": -0.7655365467071533, "num_chars": 2}, {"sum_logits": -1.2313777208328247, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.2313777208328247, "logits_per_char": -0.6156888604164124, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 504, "native_id": "Mercury_7143308", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4212266206741333, "incorrect_loss_raw": 1.3851087888081868, "correct_loss_per_char": 0.7106133103370667, "incorrect_loss_per_char": 0.6925543944040934, "correct_loss_per_token": 1.4212266206741333, "incorrect_loss_per_token": 1.3851087888081868, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4186112880706787, "num_tokens": 1, "num_tokens_all": 422, "is_greedy": false, "logits_per_token": -1.4186112880706787, "logits_per_char": -0.7093056440353394, "num_chars": 2}, {"sum_logits": -1.4212266206741333, "num_tokens": 1, "num_tokens_all": 422, "is_greedy": false, "logits_per_token": -1.4212266206741333, "logits_per_char": -0.7106133103370667, "num_chars": 2}, {"sum_logits": -1.48189377784729, "num_tokens": 1, "num_tokens_all": 422, "is_greedy": false, "logits_per_token": -1.48189377784729, "logits_per_char": -0.740946888923645, "num_chars": 2}, {"sum_logits": -1.2548213005065918, "num_tokens": 1, "num_tokens_all": 422, "is_greedy": true, "logits_per_token": -1.2548213005065918, "logits_per_char": -0.6274106502532959, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 505, "native_id": "MCAS_2005_9_21", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3432989120483398, "incorrect_loss_raw": 1.4082676966985066, "correct_loss_per_char": 0.6716494560241699, "incorrect_loss_per_char": 0.7041338483492533, "correct_loss_per_token": 1.3432989120483398, "incorrect_loss_per_token": 1.4082676966985066, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.352794885635376, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.352794885635376, "logits_per_char": -0.676397442817688, "num_chars": 2}, {"sum_logits": -1.3432989120483398, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.3432989120483398, "logits_per_char": -0.6716494560241699, "num_chars": 2}, {"sum_logits": -1.4302293062210083, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4302293062210083, "logits_per_char": -0.7151146531105042, "num_chars": 2}, {"sum_logits": -1.4417788982391357, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4417788982391357, "logits_per_char": -0.7208894491195679, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 506, "native_id": "Mercury_400443", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5250883102416992, "incorrect_loss_raw": 1.3504287799199421, "correct_loss_per_char": 0.7625441551208496, "incorrect_loss_per_char": 0.6752143899599711, "correct_loss_per_token": 1.5250883102416992, "incorrect_loss_per_token": 1.3504287799199421, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4297192096710205, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4297192096710205, "logits_per_char": -0.7148596048355103, "num_chars": 2}, {"sum_logits": -1.5250883102416992, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.5250883102416992, "logits_per_char": -0.7625441551208496, "num_chars": 2}, {"sum_logits": -1.3551892042160034, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3551892042160034, "logits_per_char": -0.6775946021080017, "num_chars": 2}, {"sum_logits": -1.2663779258728027, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.2663779258728027, "logits_per_char": -0.6331889629364014, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 507, "native_id": "Mercury_7283430", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.388478398323059, "incorrect_loss_raw": 1.4277818202972412, "correct_loss_per_char": 0.6942391991615295, "incorrect_loss_per_char": 0.7138909101486206, "correct_loss_per_token": 1.388478398323059, "incorrect_loss_per_token": 1.4277818202972412, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1342025995254517, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.1342025995254517, "logits_per_char": -0.5671012997627258, "num_chars": 2}, {"sum_logits": -1.388478398323059, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.388478398323059, "logits_per_char": -0.6942391991615295, "num_chars": 2}, {"sum_logits": -1.4198992252349854, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4198992252349854, "logits_per_char": -0.7099496126174927, "num_chars": 2}, {"sum_logits": -1.7292436361312866, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.7292436361312866, "logits_per_char": -0.8646218180656433, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 508, "native_id": "Mercury_7159250", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4916077852249146, "incorrect_loss_raw": 1.3608735799789429, "correct_loss_per_char": 0.7458038926124573, "incorrect_loss_per_char": 0.6804367899894714, "correct_loss_per_token": 1.4916077852249146, "incorrect_loss_per_token": 1.3608735799789429, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4538774490356445, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4538774490356445, "logits_per_char": -0.7269387245178223, "num_chars": 2}, {"sum_logits": -1.3284536600112915, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3284536600112915, "logits_per_char": -0.6642268300056458, "num_chars": 2}, {"sum_logits": -1.4916077852249146, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4916077852249146, "logits_per_char": -0.7458038926124573, "num_chars": 2}, {"sum_logits": -1.3002896308898926, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.3002896308898926, "logits_per_char": -0.6501448154449463, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 509, "native_id": "Mercury_401912", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6371771097183228, "incorrect_loss_raw": 1.3511618375778198, "correct_loss_per_char": 0.8185885548591614, "incorrect_loss_per_char": 0.6755809187889099, "correct_loss_per_token": 1.6371771097183228, "incorrect_loss_per_token": 1.3511618375778198, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6371771097183228, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.6371771097183228, "logits_per_char": -0.8185885548591614, "num_chars": 2}, {"sum_logits": -1.7058161497116089, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.7058161497116089, "logits_per_char": -0.8529080748558044, "num_chars": 2}, {"sum_logits": -1.1890590190887451, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.1890590190887451, "logits_per_char": -0.5945295095443726, "num_chars": 2}, {"sum_logits": -1.1586103439331055, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.1586103439331055, "logits_per_char": -0.5793051719665527, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 510, "native_id": "Mercury_7219328", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.334043025970459, "incorrect_loss_raw": 1.4131769339243572, "correct_loss_per_char": 0.6670215129852295, "incorrect_loss_per_char": 0.7065884669621786, "correct_loss_per_token": 1.334043025970459, "incorrect_loss_per_token": 1.4131769339243572, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.511891484260559, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.511891484260559, "logits_per_char": -0.7559457421302795, "num_chars": 2}, {"sum_logits": -1.334043025970459, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.334043025970459, "logits_per_char": -0.6670215129852295, "num_chars": 2}, {"sum_logits": -1.3352776765823364, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3352776765823364, "logits_per_char": -0.6676388382911682, "num_chars": 2}, {"sum_logits": -1.3923616409301758, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3923616409301758, "logits_per_char": -0.6961808204650879, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 511, "native_id": "Mercury_7214498", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.258033275604248, "incorrect_loss_raw": 1.4422104358673096, "correct_loss_per_char": 0.629016637802124, "incorrect_loss_per_char": 0.7211052179336548, "correct_loss_per_token": 1.258033275604248, "incorrect_loss_per_token": 1.4422104358673096, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3747398853302002, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3747398853302002, "logits_per_char": -0.6873699426651001, "num_chars": 2}, {"sum_logits": -1.4871560335159302, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4871560335159302, "logits_per_char": -0.7435780167579651, "num_chars": 2}, {"sum_logits": -1.258033275604248, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.258033275604248, "logits_per_char": -0.629016637802124, "num_chars": 2}, {"sum_logits": -1.4647353887557983, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4647353887557983, "logits_per_char": -0.7323676943778992, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 512, "native_id": "TAKS_2009_5_14", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3902990818023682, "incorrect_loss_raw": 1.3900954325993855, "correct_loss_per_char": 0.6951495409011841, "incorrect_loss_per_char": 0.6950477162996928, "correct_loss_per_token": 1.3902990818023682, "incorrect_loss_per_token": 1.3900954325993855, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3902990818023682, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3902990818023682, "logits_per_char": -0.6951495409011841, "num_chars": 2}, {"sum_logits": -1.3796980381011963, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3796980381011963, "logits_per_char": -0.6898490190505981, "num_chars": 2}, {"sum_logits": -1.4180115461349487, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4180115461349487, "logits_per_char": -0.7090057730674744, "num_chars": 2}, {"sum_logits": -1.3725767135620117, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.3725767135620117, "logits_per_char": -0.6862883567810059, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 513, "native_id": "NYSEDREGENTS_2013_4_17", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5243874788284302, "incorrect_loss_raw": 1.3526496887207031, "correct_loss_per_char": 0.7621937394142151, "incorrect_loss_per_char": 0.6763248443603516, "correct_loss_per_token": 1.5243874788284302, "incorrect_loss_per_token": 1.3526496887207031, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2947280406951904, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.2947280406951904, "logits_per_char": -0.6473640203475952, "num_chars": 2}, {"sum_logits": -1.3387961387634277, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3387961387634277, "logits_per_char": -0.6693980693817139, "num_chars": 2}, {"sum_logits": -1.4244248867034912, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4244248867034912, "logits_per_char": -0.7122124433517456, "num_chars": 2}, {"sum_logits": -1.5243874788284302, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.5243874788284302, "logits_per_char": -0.7621937394142151, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 514, "native_id": "Mercury_403907", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.520864725112915, "incorrect_loss_raw": 1.351606289545695, "correct_loss_per_char": 0.7604323625564575, "incorrect_loss_per_char": 0.6758031447728475, "correct_loss_per_token": 1.520864725112915, "incorrect_loss_per_token": 1.351606289545695, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2739269733428955, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.2739269733428955, "logits_per_char": -0.6369634866714478, "num_chars": 2}, {"sum_logits": -1.3569533824920654, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3569533824920654, "logits_per_char": -0.6784766912460327, "num_chars": 2}, {"sum_logits": -1.520864725112915, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.520864725112915, "logits_per_char": -0.7604323625564575, "num_chars": 2}, {"sum_logits": -1.423938512802124, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.423938512802124, "logits_per_char": -0.711969256401062, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 515, "native_id": "Mercury_7081480", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2714526653289795, "incorrect_loss_raw": 1.433903733889262, "correct_loss_per_char": 0.6357263326644897, "incorrect_loss_per_char": 0.716951866944631, "correct_loss_per_token": 1.2714526653289795, "incorrect_loss_per_token": 1.433903733889262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2714526653289795, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2714526653289795, "logits_per_char": -0.6357263326644897, "num_chars": 2}, {"sum_logits": -1.3840641975402832, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3840641975402832, "logits_per_char": -0.6920320987701416, "num_chars": 2}, {"sum_logits": -1.4891878366470337, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4891878366470337, "logits_per_char": -0.7445939183235168, "num_chars": 2}, {"sum_logits": -1.4284591674804688, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4284591674804688, "logits_per_char": -0.7142295837402344, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 516, "native_id": "Mercury_416505", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.295430302619934, "incorrect_loss_raw": 1.4326315720876057, "correct_loss_per_char": 0.647715151309967, "incorrect_loss_per_char": 0.7163157860438029, "correct_loss_per_token": 1.295430302619934, "incorrect_loss_per_token": 1.4326315720876057, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5986884832382202, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.5986884832382202, "logits_per_char": -0.7993442416191101, "num_chars": 2}, {"sum_logits": -1.295430302619934, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.295430302619934, "logits_per_char": -0.647715151309967, "num_chars": 2}, {"sum_logits": -1.410375714302063, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.410375714302063, "logits_per_char": -0.7051878571510315, "num_chars": 2}, {"sum_logits": -1.2888305187225342, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.2888305187225342, "logits_per_char": -0.6444152593612671, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 517, "native_id": "Mercury_7041668", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.161660075187683, "incorrect_loss_raw": 1.4815709988276164, "correct_loss_per_char": 0.5808300375938416, "incorrect_loss_per_char": 0.7407854994138082, "correct_loss_per_token": 1.161660075187683, "incorrect_loss_per_token": 1.4815709988276164, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5627367496490479, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5627367496490479, "logits_per_char": -0.7813683748245239, "num_chars": 2}, {"sum_logits": -1.3976495265960693, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3976495265960693, "logits_per_char": -0.6988247632980347, "num_chars": 2}, {"sum_logits": -1.484326720237732, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.484326720237732, "logits_per_char": -0.742163360118866, "num_chars": 2}, {"sum_logits": -1.161660075187683, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.161660075187683, "logits_per_char": -0.5808300375938416, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 518, "native_id": "Mercury_SC_401309", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4653013944625854, "incorrect_loss_raw": 1.3906127214431763, "correct_loss_per_char": 0.7326506972312927, "incorrect_loss_per_char": 0.6953063607215881, "correct_loss_per_token": 1.4653013944625854, "incorrect_loss_per_token": 1.3906127214431763, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.433375597000122, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.433375597000122, "logits_per_char": -0.716687798500061, "num_chars": 2}, {"sum_logits": -1.6339441537857056, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.6339441537857056, "logits_per_char": -0.8169720768928528, "num_chars": 2}, {"sum_logits": -1.4653013944625854, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4653013944625854, "logits_per_char": -0.7326506972312927, "num_chars": 2}, {"sum_logits": -1.1045184135437012, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.1045184135437012, "logits_per_char": -0.5522592067718506, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 519, "native_id": "NYSEDREGENTS_2010_4_1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4823389053344727, "incorrect_loss_raw": 1.3627489805221558, "correct_loss_per_char": 0.7411694526672363, "incorrect_loss_per_char": 0.6813744902610779, "correct_loss_per_token": 1.4823389053344727, "incorrect_loss_per_token": 1.3627489805221558, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3881371021270752, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3881371021270752, "logits_per_char": -0.6940685510635376, "num_chars": 2}, {"sum_logits": -1.4823389053344727, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4823389053344727, "logits_per_char": -0.7411694526672363, "num_chars": 2}, {"sum_logits": -1.2911287546157837, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.2911287546157837, "logits_per_char": -0.6455643773078918, "num_chars": 2}, {"sum_logits": -1.4089810848236084, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4089810848236084, "logits_per_char": -0.7044905424118042, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 520, "native_id": "ACTAAP_2007_7_36", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3611236810684204, "incorrect_loss_raw": 1.4167016744613647, "correct_loss_per_char": 0.6805618405342102, "incorrect_loss_per_char": 0.7083508372306824, "correct_loss_per_token": 1.3611236810684204, "incorrect_loss_per_token": 1.4167016744613647, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5558140277862549, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.5558140277862549, "logits_per_char": -0.7779070138931274, "num_chars": 2}, {"sum_logits": -1.5252258777618408, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.5252258777618408, "logits_per_char": -0.7626129388809204, "num_chars": 2}, {"sum_logits": -1.3611236810684204, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3611236810684204, "logits_per_char": -0.6805618405342102, "num_chars": 2}, {"sum_logits": -1.1690651178359985, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.1690651178359985, "logits_per_char": -0.5845325589179993, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 521, "native_id": "VASoL_2009_3_12", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.364253282546997, "incorrect_loss_raw": 1.3998379707336426, "correct_loss_per_char": 0.6821266412734985, "incorrect_loss_per_char": 0.6999189853668213, "correct_loss_per_token": 1.364253282546997, "incorrect_loss_per_token": 1.3998379707336426, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4058506488800049, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4058506488800049, "logits_per_char": -0.7029253244400024, "num_chars": 2}, {"sum_logits": -1.4190080165863037, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4190080165863037, "logits_per_char": -0.7095040082931519, "num_chars": 2}, {"sum_logits": -1.3746552467346191, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3746552467346191, "logits_per_char": -0.6873276233673096, "num_chars": 2}, {"sum_logits": -1.364253282546997, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.364253282546997, "logits_per_char": -0.6821266412734985, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 522, "native_id": "Mercury_7085295", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5046489238739014, "incorrect_loss_raw": 1.35843825340271, "correct_loss_per_char": 0.7523244619369507, "incorrect_loss_per_char": 0.679219126701355, "correct_loss_per_token": 1.5046489238739014, "incorrect_loss_per_token": 1.35843825340271, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2613303661346436, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.2613303661346436, "logits_per_char": -0.6306651830673218, "num_chars": 2}, {"sum_logits": -1.339255690574646, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.339255690574646, "logits_per_char": -0.669627845287323, "num_chars": 2}, {"sum_logits": -1.5046489238739014, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.5046489238739014, "logits_per_char": -0.7523244619369507, "num_chars": 2}, {"sum_logits": -1.4747287034988403, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4747287034988403, "logits_per_char": -0.7373643517494202, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 523, "native_id": "Mercury_7201968", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.524151086807251, "incorrect_loss_raw": 1.361400882403056, "correct_loss_per_char": 0.7620755434036255, "incorrect_loss_per_char": 0.680700441201528, "correct_loss_per_token": 1.524151086807251, "incorrect_loss_per_token": 1.361400882403056, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.524151086807251, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.524151086807251, "logits_per_char": -0.7620755434036255, "num_chars": 2}, {"sum_logits": -1.3471384048461914, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.3471384048461914, "logits_per_char": -0.6735692024230957, "num_chars": 2}, {"sum_logits": -1.56496000289917, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.56496000289917, "logits_per_char": -0.782480001449585, "num_chars": 2}, {"sum_logits": -1.1721042394638062, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": true, "logits_per_token": -1.1721042394638062, "logits_per_char": -0.5860521197319031, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 524, "native_id": "Mercury_7214008", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4821882247924805, "incorrect_loss_raw": 1.3665038744608562, "correct_loss_per_char": 0.7410941123962402, "incorrect_loss_per_char": 0.6832519372304281, "correct_loss_per_token": 1.4821882247924805, "incorrect_loss_per_token": 1.3665038744608562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4896674156188965, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4896674156188965, "logits_per_char": -0.7448337078094482, "num_chars": 2}, {"sum_logits": -1.364292860031128, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.364292860031128, "logits_per_char": -0.682146430015564, "num_chars": 2}, {"sum_logits": -1.4821882247924805, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4821882247924805, "logits_per_char": -0.7410941123962402, "num_chars": 2}, {"sum_logits": -1.245551347732544, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.245551347732544, "logits_per_char": -0.622775673866272, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 525, "native_id": "Mercury_176855", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.30663001537323, "incorrect_loss_raw": 1.4236083030700684, "correct_loss_per_char": 0.653315007686615, "incorrect_loss_per_char": 0.7118041515350342, "correct_loss_per_token": 1.30663001537323, "incorrect_loss_per_token": 1.4236083030700684, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5323799848556519, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.5323799848556519, "logits_per_char": -0.7661899924278259, "num_chars": 2}, {"sum_logits": -1.30663001537323, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.30663001537323, "logits_per_char": -0.653315007686615, "num_chars": 2}, {"sum_logits": -1.3996866941452026, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3996866941452026, "logits_per_char": -0.6998433470726013, "num_chars": 2}, {"sum_logits": -1.3387582302093506, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3387582302093506, "logits_per_char": -0.6693791151046753, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 526, "native_id": "Mercury_SC_401678", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.425974726676941, "incorrect_loss_raw": 1.3824692169825237, "correct_loss_per_char": 0.7129873633384705, "incorrect_loss_per_char": 0.6912346084912618, "correct_loss_per_token": 1.425974726676941, "incorrect_loss_per_token": 1.3824692169825237, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4964168071746826, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4964168071746826, "logits_per_char": -0.7482084035873413, "num_chars": 2}, {"sum_logits": -1.425974726676941, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.425974726676941, "logits_per_char": -0.7129873633384705, "num_chars": 2}, {"sum_logits": -1.3677685260772705, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3677685260772705, "logits_per_char": -0.6838842630386353, "num_chars": 2}, {"sum_logits": -1.2832223176956177, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.2832223176956177, "logits_per_char": -0.6416111588478088, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 527, "native_id": "Mercury_417143", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.237200379371643, "incorrect_loss_raw": 1.4493515094121296, "correct_loss_per_char": 0.6186001896858215, "incorrect_loss_per_char": 0.7246757547060648, "correct_loss_per_token": 1.237200379371643, "incorrect_loss_per_token": 1.4493515094121296, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5206680297851562, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.5206680297851562, "logits_per_char": -0.7603340148925781, "num_chars": 2}, {"sum_logits": -1.4306836128234863, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4306836128234863, "logits_per_char": -0.7153418064117432, "num_chars": 2}, {"sum_logits": -1.3967028856277466, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3967028856277466, "logits_per_char": -0.6983514428138733, "num_chars": 2}, {"sum_logits": -1.237200379371643, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.237200379371643, "logits_per_char": -0.6186001896858215, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 528, "native_id": "NYSEDREGENTS_2013_4_21", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3158406019210815, "incorrect_loss_raw": 1.416876236597697, "correct_loss_per_char": 0.6579203009605408, "incorrect_loss_per_char": 0.7084381182988485, "correct_loss_per_token": 1.3158406019210815, "incorrect_loss_per_token": 1.416876236597697, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4763507843017578, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4763507843017578, "logits_per_char": -0.7381753921508789, "num_chars": 2}, {"sum_logits": -1.3435993194580078, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3435993194580078, "logits_per_char": -0.6717996597290039, "num_chars": 2}, {"sum_logits": -1.4306786060333252, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4306786060333252, "logits_per_char": -0.7153393030166626, "num_chars": 2}, {"sum_logits": -1.3158406019210815, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.3158406019210815, "logits_per_char": -0.6579203009605408, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 529, "native_id": "Mercury_7032620", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.391022801399231, "incorrect_loss_raw": 1.3971853256225586, "correct_loss_per_char": 0.6955114006996155, "incorrect_loss_per_char": 0.6985926628112793, "correct_loss_per_token": 1.391022801399231, "incorrect_loss_per_token": 1.3971853256225586, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2490079402923584, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.2490079402923584, "logits_per_char": -0.6245039701461792, "num_chars": 2}, {"sum_logits": -1.391022801399231, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.391022801399231, "logits_per_char": -0.6955114006996155, "num_chars": 2}, {"sum_logits": -1.4864022731781006, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4864022731781006, "logits_per_char": -0.7432011365890503, "num_chars": 2}, {"sum_logits": -1.4561457633972168, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4561457633972168, "logits_per_char": -0.7280728816986084, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 530, "native_id": "NYSEDREGENTS_2008_8_9", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.311051607131958, "incorrect_loss_raw": 1.421489953994751, "correct_loss_per_char": 0.655525803565979, "incorrect_loss_per_char": 0.7107449769973755, "correct_loss_per_token": 1.311051607131958, "incorrect_loss_per_token": 1.421489953994751, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.434904932975769, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.434904932975769, "logits_per_char": -0.7174524664878845, "num_chars": 2}, {"sum_logits": -1.3201653957366943, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3201653957366943, "logits_per_char": -0.6600826978683472, "num_chars": 2}, {"sum_logits": -1.5093995332717896, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.5093995332717896, "logits_per_char": -0.7546997666358948, "num_chars": 2}, {"sum_logits": -1.311051607131958, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.311051607131958, "logits_per_char": -0.655525803565979, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 531, "native_id": "TAKS_2009_8_27", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3726799488067627, "incorrect_loss_raw": 1.3971442778905232, "correct_loss_per_char": 0.6863399744033813, "incorrect_loss_per_char": 0.6985721389452616, "correct_loss_per_token": 1.3726799488067627, "incorrect_loss_per_token": 1.3971442778905232, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4299525022506714, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4299525022506714, "logits_per_char": -0.7149762511253357, "num_chars": 2}, {"sum_logits": -1.4393436908721924, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4393436908721924, "logits_per_char": -0.7196718454360962, "num_chars": 2}, {"sum_logits": -1.3726799488067627, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3726799488067627, "logits_per_char": -0.6863399744033813, "num_chars": 2}, {"sum_logits": -1.322136640548706, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.322136640548706, "logits_per_char": -0.661068320274353, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 532, "native_id": "NCEOGA_2013_8_57", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3635849952697754, "incorrect_loss_raw": 1.3997353315353394, "correct_loss_per_char": 0.6817924976348877, "incorrect_loss_per_char": 0.6998676657676697, "correct_loss_per_token": 1.3635849952697754, "incorrect_loss_per_token": 1.3997353315353394, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4391348361968994, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4391348361968994, "logits_per_char": -0.7195674180984497, "num_chars": 2}, {"sum_logits": -1.393131971359253, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.393131971359253, "logits_per_char": -0.6965659856796265, "num_chars": 2}, {"sum_logits": -1.3635849952697754, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.3635849952697754, "logits_per_char": -0.6817924976348877, "num_chars": 2}, {"sum_logits": -1.3669391870498657, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.3669391870498657, "logits_per_char": -0.6834695935249329, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 533, "native_id": "Mercury_SC_413143", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4578958749771118, "incorrect_loss_raw": 1.3685719172159831, "correct_loss_per_char": 0.7289479374885559, "incorrect_loss_per_char": 0.6842859586079916, "correct_loss_per_token": 1.4578958749771118, "incorrect_loss_per_token": 1.3685719172159831, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3793554306030273, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.3793554306030273, "logits_per_char": -0.6896777153015137, "num_chars": 2}, {"sum_logits": -1.3900452852249146, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.3900452852249146, "logits_per_char": -0.6950226426124573, "num_chars": 2}, {"sum_logits": -1.4578958749771118, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4578958749771118, "logits_per_char": -0.7289479374885559, "num_chars": 2}, {"sum_logits": -1.3363150358200073, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.3363150358200073, "logits_per_char": -0.6681575179100037, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 534, "native_id": "Mercury_401195", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3416954278945923, "incorrect_loss_raw": 1.4086386760075886, "correct_loss_per_char": 0.6708477139472961, "incorrect_loss_per_char": 0.7043193380037943, "correct_loss_per_token": 1.3416954278945923, "incorrect_loss_per_token": 1.4086386760075886, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4103177785873413, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4103177785873413, "logits_per_char": -0.7051588892936707, "num_chars": 2}, {"sum_logits": -1.3695329427719116, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3695329427719116, "logits_per_char": -0.6847664713859558, "num_chars": 2}, {"sum_logits": -1.4460653066635132, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4460653066635132, "logits_per_char": -0.7230326533317566, "num_chars": 2}, {"sum_logits": -1.3416954278945923, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.3416954278945923, "logits_per_char": -0.6708477139472961, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 535, "native_id": "CSZ10358", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.416032314300537, "incorrect_loss_raw": 1.385380506515503, "correct_loss_per_char": 0.7080161571502686, "incorrect_loss_per_char": 0.6926902532577515, "correct_loss_per_token": 1.416032314300537, "incorrect_loss_per_token": 1.385380506515503, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.416032314300537, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.416032314300537, "logits_per_char": -0.7080161571502686, "num_chars": 2}, {"sum_logits": -1.3662928342819214, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3662928342819214, "logits_per_char": -0.6831464171409607, "num_chars": 2}, {"sum_logits": -1.4339762926101685, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4339762926101685, "logits_per_char": -0.7169881463050842, "num_chars": 2}, {"sum_logits": -1.355872392654419, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.355872392654419, "logits_per_char": -0.6779361963272095, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 536, "native_id": "MCAS_1999_4_26", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4018460512161255, "incorrect_loss_raw": 1.38644278049469, "correct_loss_per_char": 0.7009230256080627, "incorrect_loss_per_char": 0.693221390247345, "correct_loss_per_token": 1.4018460512161255, "incorrect_loss_per_token": 1.38644278049469, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3706846237182617, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3706846237182617, "logits_per_char": -0.6853423118591309, "num_chars": 2}, {"sum_logits": -1.3369332551956177, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.3369332551956177, "logits_per_char": -0.6684666275978088, "num_chars": 2}, {"sum_logits": -1.4018460512161255, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4018460512161255, "logits_per_char": -0.7009230256080627, "num_chars": 2}, {"sum_logits": -1.4517104625701904, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4517104625701904, "logits_per_char": -0.7258552312850952, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 537, "native_id": "AKDE&ED_2008_8_36", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.42339026927948, "incorrect_loss_raw": 1.3797831932703655, "correct_loss_per_char": 0.71169513463974, "incorrect_loss_per_char": 0.6898915966351827, "correct_loss_per_token": 1.42339026927948, "incorrect_loss_per_token": 1.3797831932703655, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.42339026927948, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.42339026927948, "logits_per_char": -0.71169513463974, "num_chars": 2}, {"sum_logits": -1.3763426542282104, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3763426542282104, "logits_per_char": -0.6881713271141052, "num_chars": 2}, {"sum_logits": -1.3445919752120972, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.3445919752120972, "logits_per_char": -0.6722959876060486, "num_chars": 2}, {"sum_logits": -1.4184149503707886, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4184149503707886, "logits_per_char": -0.7092074751853943, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 538, "native_id": "Mercury_7017938", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.373079538345337, "incorrect_loss_raw": 1.3963666359583538, "correct_loss_per_char": 0.6865397691726685, "incorrect_loss_per_char": 0.6981833179791769, "correct_loss_per_token": 1.373079538345337, "incorrect_loss_per_token": 1.3963666359583538, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4157826900482178, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4157826900482178, "logits_per_char": -0.7078913450241089, "num_chars": 2}, {"sum_logits": -1.373079538345337, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.373079538345337, "logits_per_char": -0.6865397691726685, "num_chars": 2}, {"sum_logits": -1.4283831119537354, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4283831119537354, "logits_per_char": -0.7141915559768677, "num_chars": 2}, {"sum_logits": -1.344934105873108, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.344934105873108, "logits_per_char": -0.672467052936554, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 539, "native_id": "MDSA_2013_8_32", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.427014946937561, "incorrect_loss_raw": 1.3788445790608723, "correct_loss_per_char": 0.7135074734687805, "incorrect_loss_per_char": 0.6894222895304362, "correct_loss_per_token": 1.427014946937561, "incorrect_loss_per_token": 1.3788445790608723, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.352827787399292, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.352827787399292, "logits_per_char": -0.676413893699646, "num_chars": 2}, {"sum_logits": -1.3450936079025269, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.3450936079025269, "logits_per_char": -0.6725468039512634, "num_chars": 2}, {"sum_logits": -1.4386123418807983, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4386123418807983, "logits_per_char": -0.7193061709403992, "num_chars": 2}, {"sum_logits": -1.427014946937561, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.427014946937561, "logits_per_char": -0.7135074734687805, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 540, "native_id": "Mercury_7038028", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4740560054779053, "incorrect_loss_raw": 1.3692984978357952, "correct_loss_per_char": 0.7370280027389526, "incorrect_loss_per_char": 0.6846492489178976, "correct_loss_per_token": 1.4740560054779053, "incorrect_loss_per_token": 1.3692984978357952, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4304931163787842, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4304931163787842, "logits_per_char": -0.7152465581893921, "num_chars": 2}, {"sum_logits": -1.442049264907837, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.442049264907837, "logits_per_char": -0.7210246324539185, "num_chars": 2}, {"sum_logits": -1.4740560054779053, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4740560054779053, "logits_per_char": -0.7370280027389526, "num_chars": 2}, {"sum_logits": -1.2353531122207642, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2353531122207642, "logits_per_char": -0.6176765561103821, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 541, "native_id": "Mercury_7057103", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4143381118774414, "incorrect_loss_raw": 1.387466271718343, "correct_loss_per_char": 0.7071690559387207, "incorrect_loss_per_char": 0.6937331358591715, "correct_loss_per_token": 1.4143381118774414, "incorrect_loss_per_token": 1.387466271718343, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4861116409301758, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4861116409301758, "logits_per_char": -0.7430558204650879, "num_chars": 2}, {"sum_logits": -1.3879826068878174, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3879826068878174, "logits_per_char": -0.6939913034439087, "num_chars": 2}, {"sum_logits": -1.4143381118774414, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4143381118774414, "logits_per_char": -0.7071690559387207, "num_chars": 2}, {"sum_logits": -1.2883045673370361, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.2883045673370361, "logits_per_char": -0.6441522836685181, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 542, "native_id": "NYSEDREGENTS_2008_4_26", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4531984329223633, "incorrect_loss_raw": 1.371357003847758, "correct_loss_per_char": 0.7265992164611816, "incorrect_loss_per_char": 0.685678501923879, "correct_loss_per_token": 1.4531984329223633, "incorrect_loss_per_token": 1.371357003847758, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4229620695114136, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4229620695114136, "logits_per_char": -0.7114810347557068, "num_chars": 2}, {"sum_logits": -1.3637449741363525, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3637449741363525, "logits_per_char": -0.6818724870681763, "num_chars": 2}, {"sum_logits": -1.4531984329223633, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4531984329223633, "logits_per_char": -0.7265992164611816, "num_chars": 2}, {"sum_logits": -1.3273639678955078, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.3273639678955078, "logits_per_char": -0.6636819839477539, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 543, "native_id": "Mercury_417117", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5830060243606567, "incorrect_loss_raw": 1.3359642028808594, "correct_loss_per_char": 0.7915030121803284, "incorrect_loss_per_char": 0.6679821014404297, "correct_loss_per_token": 1.5830060243606567, "incorrect_loss_per_token": 1.3359642028808594, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3268537521362305, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.3268537521362305, "logits_per_char": -0.6634268760681152, "num_chars": 2}, {"sum_logits": -1.391830563545227, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.391830563545227, "logits_per_char": -0.6959152817726135, "num_chars": 2}, {"sum_logits": -1.5830060243606567, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": false, "logits_per_token": -1.5830060243606567, "logits_per_char": -0.7915030121803284, "num_chars": 2}, {"sum_logits": -1.2892082929611206, "num_tokens": 1, "num_tokens_all": 429, "is_greedy": true, "logits_per_token": -1.2892082929611206, "logits_per_char": -0.6446041464805603, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 544, "native_id": "MCAS_2016_8_15", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3574206829071045, "incorrect_loss_raw": 1.4072439273198445, "correct_loss_per_char": 0.6787103414535522, "incorrect_loss_per_char": 0.7036219636599222, "correct_loss_per_token": 1.3574206829071045, "incorrect_loss_per_token": 1.4072439273198445, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5522328615188599, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.5522328615188599, "logits_per_char": -0.7761164307594299, "num_chars": 2}, {"sum_logits": -1.3186602592468262, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": true, "logits_per_token": -1.3186602592468262, "logits_per_char": -0.6593301296234131, "num_chars": 2}, {"sum_logits": -1.3574206829071045, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.3574206829071045, "logits_per_char": -0.6787103414535522, "num_chars": 2}, {"sum_logits": -1.3508386611938477, "num_tokens": 1, "num_tokens_all": 470, "is_greedy": false, "logits_per_token": -1.3508386611938477, "logits_per_char": -0.6754193305969238, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 545, "native_id": "Mercury_400780", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.40494704246521, "incorrect_loss_raw": 1.4049506584803264, "correct_loss_per_char": 0.702473521232605, "incorrect_loss_per_char": 0.7024753292401632, "correct_loss_per_token": 1.40494704246521, "incorrect_loss_per_token": 1.4049506584803264, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3187739849090576, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3187739849090576, "logits_per_char": -0.6593869924545288, "num_chars": 2}, {"sum_logits": -1.6167906522750854, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.6167906522750854, "logits_per_char": -0.8083953261375427, "num_chars": 2}, {"sum_logits": -1.40494704246521, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.40494704246521, "logits_per_char": -0.702473521232605, "num_chars": 2}, {"sum_logits": -1.279287338256836, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.279287338256836, "logits_per_char": -0.639643669128418, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 546, "native_id": "NYSEDREGENTS_2008_8_32", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2882037162780762, "incorrect_loss_raw": 1.4272995789845784, "correct_loss_per_char": 0.6441018581390381, "incorrect_loss_per_char": 0.7136497894922892, "correct_loss_per_token": 1.2882037162780762, "incorrect_loss_per_token": 1.4272995789845784, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2882037162780762, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.2882037162780762, "logits_per_char": -0.6441018581390381, "num_chars": 2}, {"sum_logits": -1.3536787033081055, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3536787033081055, "logits_per_char": -0.6768393516540527, "num_chars": 2}, {"sum_logits": -1.454514741897583, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.454514741897583, "logits_per_char": -0.7272573709487915, "num_chars": 2}, {"sum_logits": -1.4737052917480469, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4737052917480469, "logits_per_char": -0.7368526458740234, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 547, "native_id": "Mercury_SC_416104", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4004788398742676, "incorrect_loss_raw": 1.3918044169743855, "correct_loss_per_char": 0.7002394199371338, "incorrect_loss_per_char": 0.6959022084871928, "correct_loss_per_token": 1.4004788398742676, "incorrect_loss_per_token": 1.3918044169743855, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.307608962059021, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.307608962059021, "logits_per_char": -0.6538044810295105, "num_chars": 2}, {"sum_logits": -1.3478120565414429, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3478120565414429, "logits_per_char": -0.6739060282707214, "num_chars": 2}, {"sum_logits": -1.4004788398742676, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4004788398742676, "logits_per_char": -0.7002394199371338, "num_chars": 2}, {"sum_logits": -1.5199922323226929, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.5199922323226929, "logits_per_char": -0.7599961161613464, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 548, "native_id": "Mercury_416646", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3338541984558105, "incorrect_loss_raw": 1.4136168162027996, "correct_loss_per_char": 0.6669270992279053, "incorrect_loss_per_char": 0.7068084081013998, "correct_loss_per_token": 1.3338541984558105, "incorrect_loss_per_token": 1.4136168162027996, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4712274074554443, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.4712274074554443, "logits_per_char": -0.7356137037277222, "num_chars": 2}, {"sum_logits": -1.3338541984558105, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.3338541984558105, "logits_per_char": -0.6669270992279053, "num_chars": 2}, {"sum_logits": -1.40012788772583, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.40012788772583, "logits_per_char": -0.700063943862915, "num_chars": 2}, {"sum_logits": -1.369495153427124, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.369495153427124, "logits_per_char": -0.684747576713562, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 549, "native_id": "Mercury_SC_405296", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3889793157577515, "incorrect_loss_raw": 1.3951996564865112, "correct_loss_per_char": 0.6944896578788757, "incorrect_loss_per_char": 0.6975998282432556, "correct_loss_per_token": 1.3889793157577515, "incorrect_loss_per_token": 1.3951996564865112, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5039979219436646, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.5039979219436646, "logits_per_char": -0.7519989609718323, "num_chars": 2}, {"sum_logits": -1.4024393558502197, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4024393558502197, "logits_per_char": -0.7012196779251099, "num_chars": 2}, {"sum_logits": -1.3889793157577515, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3889793157577515, "logits_per_char": -0.6944896578788757, "num_chars": 2}, {"sum_logits": -1.2791616916656494, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.2791616916656494, "logits_per_char": -0.6395808458328247, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 550, "native_id": "MCAS_2006_8_31", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3446763753890991, "incorrect_loss_raw": 1.4085523684819539, "correct_loss_per_char": 0.6723381876945496, "incorrect_loss_per_char": 0.7042761842409769, "correct_loss_per_token": 1.3446763753890991, "incorrect_loss_per_token": 1.4085523684819539, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5103927850723267, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5103927850723267, "logits_per_char": -0.7551963925361633, "num_chars": 2}, {"sum_logits": -1.3341941833496094, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.3341941833496094, "logits_per_char": -0.6670970916748047, "num_chars": 2}, {"sum_logits": -1.3810701370239258, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3810701370239258, "logits_per_char": -0.6905350685119629, "num_chars": 2}, {"sum_logits": -1.3446763753890991, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3446763753890991, "logits_per_char": -0.6723381876945496, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 551, "native_id": "MCAS_2015_5_14", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.428797960281372, "incorrect_loss_raw": 1.3771715958913167, "correct_loss_per_char": 0.714398980140686, "incorrect_loss_per_char": 0.6885857979456583, "correct_loss_per_token": 1.428797960281372, "incorrect_loss_per_token": 1.3771715958913167, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.428797960281372, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.428797960281372, "logits_per_char": -0.714398980140686, "num_chars": 2}, {"sum_logits": -1.3869801759719849, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3869801759719849, "logits_per_char": -0.6934900879859924, "num_chars": 2}, {"sum_logits": -1.369637131690979, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.369637131690979, "logits_per_char": -0.6848185658454895, "num_chars": 2}, {"sum_logits": -1.3748974800109863, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3748974800109863, "logits_per_char": -0.6874487400054932, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 552, "native_id": "Mercury_417465", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2825188636779785, "incorrect_loss_raw": 1.4347573121388753, "correct_loss_per_char": 0.6412594318389893, "incorrect_loss_per_char": 0.7173786560694376, "correct_loss_per_token": 1.2825188636779785, "incorrect_loss_per_token": 1.4347573121388753, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5829781293869019, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.5829781293869019, "logits_per_char": -0.7914890646934509, "num_chars": 2}, {"sum_logits": -1.4109348058700562, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4109348058700562, "logits_per_char": -0.7054674029350281, "num_chars": 2}, {"sum_logits": -1.2825188636779785, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.2825188636779785, "logits_per_char": -0.6412594318389893, "num_chars": 2}, {"sum_logits": -1.310359001159668, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.310359001159668, "logits_per_char": -0.655179500579834, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 553, "native_id": "MCAS_1998_4_19", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6407560110092163, "incorrect_loss_raw": 1.3260446389516194, "correct_loss_per_char": 0.8203780055046082, "incorrect_loss_per_char": 0.6630223194758097, "correct_loss_per_token": 1.6407560110092163, "incorrect_loss_per_token": 1.3260446389516194, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.190946340560913, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.190946340560913, "logits_per_char": -0.5954731702804565, "num_chars": 2}, {"sum_logits": -1.4035675525665283, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4035675525665283, "logits_per_char": -0.7017837762832642, "num_chars": 2}, {"sum_logits": -1.383620023727417, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.383620023727417, "logits_per_char": -0.6918100118637085, "num_chars": 2}, {"sum_logits": -1.6407560110092163, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.6407560110092163, "logits_per_char": -0.8203780055046082, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 554, "native_id": "Mercury_7214778", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2965751886367798, "incorrect_loss_raw": 1.4247815211613972, "correct_loss_per_char": 0.6482875943183899, "incorrect_loss_per_char": 0.7123907605806986, "correct_loss_per_token": 1.2965751886367798, "incorrect_loss_per_token": 1.4247815211613972, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4135442972183228, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4135442972183228, "logits_per_char": -0.7067721486091614, "num_chars": 2}, {"sum_logits": -1.3856313228607178, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3856313228607178, "logits_per_char": -0.6928156614303589, "num_chars": 2}, {"sum_logits": -1.4751689434051514, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4751689434051514, "logits_per_char": -0.7375844717025757, "num_chars": 2}, {"sum_logits": -1.2965751886367798, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.2965751886367798, "logits_per_char": -0.6482875943183899, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 555, "native_id": "Mercury_7123393", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2947869300842285, "incorrect_loss_raw": 1.4307745695114136, "correct_loss_per_char": 0.6473934650421143, "incorrect_loss_per_char": 0.7153872847557068, "correct_loss_per_token": 1.2947869300842285, "incorrect_loss_per_token": 1.4307745695114136, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2807848453521729, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.2807848453521729, "logits_per_char": -0.6403924226760864, "num_chars": 2}, {"sum_logits": -1.4630986452102661, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4630986452102661, "logits_per_char": -0.7315493226051331, "num_chars": 2}, {"sum_logits": -1.5484402179718018, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5484402179718018, "logits_per_char": -0.7742201089859009, "num_chars": 2}, {"sum_logits": -1.2947869300842285, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.2947869300842285, "logits_per_char": -0.6473934650421143, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 556, "native_id": "Mercury_7207550", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.444104552268982, "incorrect_loss_raw": 1.383804480234782, "correct_loss_per_char": 0.722052276134491, "incorrect_loss_per_char": 0.691902240117391, "correct_loss_per_token": 1.444104552268982, "incorrect_loss_per_token": 1.383804480234782, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.206995964050293, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": true, "logits_per_token": -1.206995964050293, "logits_per_char": -0.6034979820251465, "num_chars": 2}, {"sum_logits": -1.3833824396133423, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": false, "logits_per_token": -1.3833824396133423, "logits_per_char": -0.6916912198066711, "num_chars": 2}, {"sum_logits": -1.5610350370407104, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": false, "logits_per_token": -1.5610350370407104, "logits_per_char": -0.7805175185203552, "num_chars": 2}, {"sum_logits": -1.444104552268982, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": false, "logits_per_token": -1.444104552268982, "logits_per_char": -0.722052276134491, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 557, "native_id": "Mercury_SC_405827", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5137938261032104, "incorrect_loss_raw": 1.3610810836156209, "correct_loss_per_char": 0.7568969130516052, "incorrect_loss_per_char": 0.6805405418078104, "correct_loss_per_token": 1.5137938261032104, "incorrect_loss_per_token": 1.3610810836156209, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4500705003738403, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.4500705003738403, "logits_per_char": -0.7250352501869202, "num_chars": 2}, {"sum_logits": -1.5137938261032104, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.5137938261032104, "logits_per_char": -0.7568969130516052, "num_chars": 2}, {"sum_logits": -1.4651811122894287, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.4651811122894287, "logits_per_char": -0.7325905561447144, "num_chars": 2}, {"sum_logits": -1.1679916381835938, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -1.1679916381835938, "logits_per_char": -0.5839958190917969, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 558, "native_id": "NYSEDREGENTS_2015_4_11", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3751672506332397, "incorrect_loss_raw": 1.397940715154012, "correct_loss_per_char": 0.6875836253166199, "incorrect_loss_per_char": 0.698970357577006, "correct_loss_per_token": 1.3751672506332397, "incorrect_loss_per_token": 1.397940715154012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3311922550201416, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.3311922550201416, "logits_per_char": -0.6655961275100708, "num_chars": 2}, {"sum_logits": -1.3871605396270752, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3871605396270752, "logits_per_char": -0.6935802698135376, "num_chars": 2}, {"sum_logits": -1.4754693508148193, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4754693508148193, "logits_per_char": -0.7377346754074097, "num_chars": 2}, {"sum_logits": -1.3751672506332397, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3751672506332397, "logits_per_char": -0.6875836253166199, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 559, "native_id": "Mercury_404097", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5290266275405884, "incorrect_loss_raw": 1.3557428121566772, "correct_loss_per_char": 0.7645133137702942, "incorrect_loss_per_char": 0.6778714060783386, "correct_loss_per_token": 1.5290266275405884, "incorrect_loss_per_token": 1.3557428121566772, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2506269216537476, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2506269216537476, "logits_per_char": -0.6253134608268738, "num_chars": 2}, {"sum_logits": -1.2829467058181763, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.2829467058181763, "logits_per_char": -0.6414733529090881, "num_chars": 2}, {"sum_logits": -1.5290266275405884, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.5290266275405884, "logits_per_char": -0.7645133137702942, "num_chars": 2}, {"sum_logits": -1.533654808998108, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.533654808998108, "logits_per_char": -0.766827404499054, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 560, "native_id": "AIMS_2009_4_4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.417048692703247, "incorrect_loss_raw": 1.3844017187754314, "correct_loss_per_char": 0.7085243463516235, "incorrect_loss_per_char": 0.6922008593877157, "correct_loss_per_token": 1.417048692703247, "incorrect_loss_per_token": 1.3844017187754314, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.446524739265442, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.446524739265442, "logits_per_char": -0.723262369632721, "num_chars": 2}, {"sum_logits": -1.4001007080078125, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4001007080078125, "logits_per_char": -0.7000503540039062, "num_chars": 2}, {"sum_logits": -1.417048692703247, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.417048692703247, "logits_per_char": -0.7085243463516235, "num_chars": 2}, {"sum_logits": -1.3065797090530396, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.3065797090530396, "logits_per_char": -0.6532898545265198, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 561, "native_id": "NCEOGA_2013_8_18", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.25942862033844, "incorrect_loss_raw": 1.4438587427139282, "correct_loss_per_char": 0.62971431016922, "incorrect_loss_per_char": 0.7219293713569641, "correct_loss_per_token": 1.25942862033844, "incorrect_loss_per_token": 1.4438587427139282, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.25942862033844, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.25942862033844, "logits_per_char": -0.62971431016922, "num_chars": 2}, {"sum_logits": -1.3266757726669312, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3266757726669312, "logits_per_char": -0.6633378863334656, "num_chars": 2}, {"sum_logits": -1.4810148477554321, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4810148477554321, "logits_per_char": -0.7405074238777161, "num_chars": 2}, {"sum_logits": -1.5238856077194214, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5238856077194214, "logits_per_char": -0.7619428038597107, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 562, "native_id": "Mercury_400884", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4779248237609863, "incorrect_loss_raw": 1.3737996816635132, "correct_loss_per_char": 0.7389624118804932, "incorrect_loss_per_char": 0.6868998408317566, "correct_loss_per_token": 1.4779248237609863, "incorrect_loss_per_token": 1.3737996816635132, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4718669652938843, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4718669652938843, "logits_per_char": -0.7359334826469421, "num_chars": 2}, {"sum_logits": -1.4779248237609863, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4779248237609863, "logits_per_char": -0.7389624118804932, "num_chars": 2}, {"sum_logits": -1.1935408115386963, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.1935408115386963, "logits_per_char": -0.5967704057693481, "num_chars": 2}, {"sum_logits": -1.455991268157959, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.455991268157959, "logits_per_char": -0.7279956340789795, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 563, "native_id": "Mercury_7219678", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.487367868423462, "incorrect_loss_raw": 1.3713513215382893, "correct_loss_per_char": 0.743683934211731, "incorrect_loss_per_char": 0.6856756607691447, "correct_loss_per_token": 1.487367868423462, "incorrect_loss_per_token": 1.3713513215382893, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.487367868423462, "num_tokens": 1, "num_tokens_all": 422, "is_greedy": false, "logits_per_token": -1.487367868423462, "logits_per_char": -0.743683934211731, "num_chars": 2}, {"sum_logits": -1.418097734451294, "num_tokens": 1, "num_tokens_all": 422, "is_greedy": false, "logits_per_token": -1.418097734451294, "logits_per_char": -0.709048867225647, "num_chars": 2}, {"sum_logits": -1.5261199474334717, "num_tokens": 1, "num_tokens_all": 422, "is_greedy": false, "logits_per_token": -1.5261199474334717, "logits_per_char": -0.7630599737167358, "num_chars": 2}, {"sum_logits": -1.1698362827301025, "num_tokens": 1, "num_tokens_all": 422, "is_greedy": true, "logits_per_token": -1.1698362827301025, "logits_per_char": -0.5849181413650513, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 564, "native_id": "ACTAAP_2010_5_7", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.494390606880188, "incorrect_loss_raw": 1.3679780960083008, "correct_loss_per_char": 0.747195303440094, "incorrect_loss_per_char": 0.6839890480041504, "correct_loss_per_token": 1.494390606880188, "incorrect_loss_per_token": 1.3679780960083008, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5498367547988892, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.5498367547988892, "logits_per_char": -0.7749183773994446, "num_chars": 2}, {"sum_logits": -1.494390606880188, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.494390606880188, "logits_per_char": -0.747195303440094, "num_chars": 2}, {"sum_logits": -1.2811005115509033, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.2811005115509033, "logits_per_char": -0.6405502557754517, "num_chars": 2}, {"sum_logits": -1.2729970216751099, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.2729970216751099, "logits_per_char": -0.6364985108375549, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 565, "native_id": "ACTAAP_2012_7_9", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3981480598449707, "incorrect_loss_raw": 1.3906288941701253, "correct_loss_per_char": 0.6990740299224854, "incorrect_loss_per_char": 0.6953144470850626, "correct_loss_per_token": 1.3981480598449707, "incorrect_loss_per_token": 1.3906288941701253, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4489986896514893, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4489986896514893, "logits_per_char": -0.7244993448257446, "num_chars": 2}, {"sum_logits": -1.4366294145584106, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4366294145584106, "logits_per_char": -0.7183147072792053, "num_chars": 2}, {"sum_logits": -1.3981480598449707, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3981480598449707, "logits_per_char": -0.6990740299224854, "num_chars": 2}, {"sum_logits": -1.286258578300476, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.286258578300476, "logits_per_char": -0.643129289150238, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 566, "native_id": "MCAS_2005_8_6", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.402786374092102, "incorrect_loss_raw": 1.3871686855951946, "correct_loss_per_char": 0.701393187046051, "incorrect_loss_per_char": 0.6935843427975973, "correct_loss_per_token": 1.402786374092102, "incorrect_loss_per_token": 1.3871686855951946, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.402786374092102, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.402786374092102, "logits_per_char": -0.701393187046051, "num_chars": 2}, {"sum_logits": -1.4417558908462524, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.4417558908462524, "logits_per_char": -0.7208779454231262, "num_chars": 2}, {"sum_logits": -1.3582504987716675, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": true, "logits_per_token": -1.3582504987716675, "logits_per_char": -0.6791252493858337, "num_chars": 2}, {"sum_logits": -1.3614996671676636, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.3614996671676636, "logits_per_char": -0.6807498335838318, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 567, "native_id": "Mercury_SC_401162", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2659521102905273, "incorrect_loss_raw": 1.4484949509302776, "correct_loss_per_char": 0.6329760551452637, "incorrect_loss_per_char": 0.7242474754651388, "correct_loss_per_token": 1.2659521102905273, "incorrect_loss_per_token": 1.4484949509302776, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6341071128845215, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.6341071128845215, "logits_per_char": -0.8170535564422607, "num_chars": 2}, {"sum_logits": -1.4550572633743286, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4550572633743286, "logits_per_char": -0.7275286316871643, "num_chars": 2}, {"sum_logits": -1.2659521102905273, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.2659521102905273, "logits_per_char": -0.6329760551452637, "num_chars": 2}, {"sum_logits": -1.2563204765319824, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.2563204765319824, "logits_per_char": -0.6281602382659912, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 568, "native_id": "Mercury_SC_407710", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2923542261123657, "incorrect_loss_raw": 1.42532213528951, "correct_loss_per_char": 0.6461771130561829, "incorrect_loss_per_char": 0.712661067644755, "correct_loss_per_token": 1.2923542261123657, "incorrect_loss_per_token": 1.42532213528951, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3996299505233765, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3996299505233765, "logits_per_char": -0.6998149752616882, "num_chars": 2}, {"sum_logits": -1.4665740728378296, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4665740728378296, "logits_per_char": -0.7332870364189148, "num_chars": 2}, {"sum_logits": -1.4097623825073242, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4097623825073242, "logits_per_char": -0.7048811912536621, "num_chars": 2}, {"sum_logits": -1.2923542261123657, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.2923542261123657, "logits_per_char": -0.6461771130561829, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 569, "native_id": "VASoL_2009_3_23", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4746981859207153, "incorrect_loss_raw": 1.372532804807027, "correct_loss_per_char": 0.7373490929603577, "incorrect_loss_per_char": 0.6862664024035136, "correct_loss_per_token": 1.4746981859207153, "incorrect_loss_per_token": 1.372532804807027, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1911568641662598, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.1911568641662598, "logits_per_char": -0.5955784320831299, "num_chars": 2}, {"sum_logits": -1.4536192417144775, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4536192417144775, "logits_per_char": -0.7268096208572388, "num_chars": 2}, {"sum_logits": -1.4728223085403442, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4728223085403442, "logits_per_char": -0.7364111542701721, "num_chars": 2}, {"sum_logits": -1.4746981859207153, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4746981859207153, "logits_per_char": -0.7373490929603577, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 570, "native_id": "Mercury_SC_402276", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2954260110855103, "incorrect_loss_raw": 1.4267622232437134, "correct_loss_per_char": 0.6477130055427551, "incorrect_loss_per_char": 0.7133811116218567, "correct_loss_per_token": 1.2954260110855103, "incorrect_loss_per_token": 1.4267622232437134, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5186004638671875, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.5186004638671875, "logits_per_char": -0.7593002319335938, "num_chars": 2}, {"sum_logits": -1.3896729946136475, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.3896729946136475, "logits_per_char": -0.6948364973068237, "num_chars": 2}, {"sum_logits": -1.3720132112503052, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.3720132112503052, "logits_per_char": -0.6860066056251526, "num_chars": 2}, {"sum_logits": -1.2954260110855103, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.2954260110855103, "logits_per_char": -0.6477130055427551, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 571, "native_id": "Mercury_400744", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4232169389724731, "incorrect_loss_raw": 1.384833574295044, "correct_loss_per_char": 0.7116084694862366, "incorrect_loss_per_char": 0.692416787147522, "correct_loss_per_token": 1.4232169389724731, "incorrect_loss_per_token": 1.384833574295044, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4232169389724731, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4232169389724731, "logits_per_char": -0.7116084694862366, "num_chars": 2}, {"sum_logits": -1.321760654449463, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.321760654449463, "logits_per_char": -0.6608803272247314, "num_chars": 2}, {"sum_logits": -1.4965134859085083, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4965134859085083, "logits_per_char": -0.7482567429542542, "num_chars": 2}, {"sum_logits": -1.3362265825271606, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3362265825271606, "logits_per_char": -0.6681132912635803, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 572, "native_id": "Mercury_SC_LBS10902", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4266797304153442, "incorrect_loss_raw": 1.3812541564305623, "correct_loss_per_char": 0.7133398652076721, "incorrect_loss_per_char": 0.6906270782152811, "correct_loss_per_token": 1.4266797304153442, "incorrect_loss_per_token": 1.3812541564305623, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3702224493026733, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3702224493026733, "logits_per_char": -0.6851112246513367, "num_chars": 2}, {"sum_logits": -1.4266797304153442, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4266797304153442, "logits_per_char": -0.7133398652076721, "num_chars": 2}, {"sum_logits": -1.4683252573013306, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4683252573013306, "logits_per_char": -0.7341626286506653, "num_chars": 2}, {"sum_logits": -1.305214762687683, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.305214762687683, "logits_per_char": -0.6526073813438416, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 573, "native_id": "Mercury_7133245", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4871450662612915, "incorrect_loss_raw": 1.3647879759470622, "correct_loss_per_char": 0.7435725331306458, "incorrect_loss_per_char": 0.6823939879735311, "correct_loss_per_token": 1.4871450662612915, "incorrect_loss_per_token": 1.3647879759470622, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4476486444473267, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4476486444473267, "logits_per_char": -0.7238243222236633, "num_chars": 2}, {"sum_logits": -1.4871450662612915, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4871450662612915, "logits_per_char": -0.7435725331306458, "num_chars": 2}, {"sum_logits": -1.4268980026245117, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4268980026245117, "logits_per_char": -0.7134490013122559, "num_chars": 2}, {"sum_logits": -1.2198172807693481, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.2198172807693481, "logits_per_char": -0.6099086403846741, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 574, "native_id": "Mercury_7131530", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5074552297592163, "incorrect_loss_raw": 1.375407616297404, "correct_loss_per_char": 0.7537276148796082, "incorrect_loss_per_char": 0.687703808148702, "correct_loss_per_token": 1.5074552297592163, "incorrect_loss_per_token": 1.375407616297404, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5735251903533936, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.5735251903533936, "logits_per_char": -0.7867625951766968, "num_chars": 2}, {"sum_logits": -1.5074552297592163, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.5074552297592163, "logits_per_char": -0.7537276148796082, "num_chars": 2}, {"sum_logits": -1.4663668870925903, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": false, "logits_per_token": -1.4663668870925903, "logits_per_char": -0.7331834435462952, "num_chars": 2}, {"sum_logits": -1.086330771446228, "num_tokens": 1, "num_tokens_all": 445, "is_greedy": true, "logits_per_token": -1.086330771446228, "logits_per_char": -0.543165385723114, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 575, "native_id": "Mercury_7041143", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3672171831130981, "incorrect_loss_raw": 1.4004727204640706, "correct_loss_per_char": 0.6836085915565491, "incorrect_loss_per_char": 0.7002363602320353, "correct_loss_per_token": 1.3672171831130981, "incorrect_loss_per_token": 1.4004727204640706, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4619842767715454, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4619842767715454, "logits_per_char": -0.7309921383857727, "num_chars": 2}, {"sum_logits": -1.369646430015564, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.369646430015564, "logits_per_char": -0.684823215007782, "num_chars": 2}, {"sum_logits": -1.3672171831130981, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.3672171831130981, "logits_per_char": -0.6836085915565491, "num_chars": 2}, {"sum_logits": -1.3697874546051025, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3697874546051025, "logits_per_char": -0.6848937273025513, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 576, "native_id": "MCAS_2010_5_11984", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4157525300979614, "incorrect_loss_raw": 1.3834632635116577, "correct_loss_per_char": 0.7078762650489807, "incorrect_loss_per_char": 0.6917316317558289, "correct_loss_per_token": 1.4157525300979614, "incorrect_loss_per_token": 1.3834632635116577, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4157525300979614, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4157525300979614, "logits_per_char": -0.7078762650489807, "num_chars": 2}, {"sum_logits": -1.3787704706192017, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3787704706192017, "logits_per_char": -0.6893852353096008, "num_chars": 2}, {"sum_logits": -1.4304383993148804, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4304383993148804, "logits_per_char": -0.7152191996574402, "num_chars": 2}, {"sum_logits": -1.3411809206008911, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.3411809206008911, "logits_per_char": -0.6705904603004456, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 577, "native_id": "Mercury_7159285", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4669489860534668, "incorrect_loss_raw": 1.37702743212382, "correct_loss_per_char": 0.7334744930267334, "incorrect_loss_per_char": 0.68851371606191, "correct_loss_per_token": 1.4669489860534668, "incorrect_loss_per_token": 1.37702743212382, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5466216802597046, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.5466216802597046, "logits_per_char": -0.7733108401298523, "num_chars": 2}, {"sum_logits": -1.408669352531433, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.408669352531433, "logits_per_char": -0.7043346762657166, "num_chars": 2}, {"sum_logits": -1.4669489860534668, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.4669489860534668, "logits_per_char": -0.7334744930267334, "num_chars": 2}, {"sum_logits": -1.1757912635803223, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": true, "logits_per_token": -1.1757912635803223, "logits_per_char": -0.5878956317901611, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 578, "native_id": "AIMS_2008_8_13", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3306034803390503, "incorrect_loss_raw": 1.4144479036331177, "correct_loss_per_char": 0.6653017401695251, "incorrect_loss_per_char": 0.7072239518165588, "correct_loss_per_token": 1.3306034803390503, "incorrect_loss_per_token": 1.4144479036331177, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4781630039215088, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4781630039215088, "logits_per_char": -0.7390815019607544, "num_chars": 2}, {"sum_logits": -1.388515830039978, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.388515830039978, "logits_per_char": -0.694257915019989, "num_chars": 2}, {"sum_logits": -1.3766648769378662, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3766648769378662, "logits_per_char": -0.6883324384689331, "num_chars": 2}, {"sum_logits": -1.3306034803390503, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.3306034803390503, "logits_per_char": -0.6653017401695251, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 579, "native_id": "MDSA_2013_8_20", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.314253807067871, "incorrect_loss_raw": 1.416646162668864, "correct_loss_per_char": 0.6571269035339355, "incorrect_loss_per_char": 0.708323081334432, "correct_loss_per_token": 1.314253807067871, "incorrect_loss_per_token": 1.416646162668864, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4081093072891235, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4081093072891235, "logits_per_char": -0.7040546536445618, "num_chars": 2}, {"sum_logits": -1.4209754467010498, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4209754467010498, "logits_per_char": -0.7104877233505249, "num_chars": 2}, {"sum_logits": -1.314253807067871, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.314253807067871, "logits_per_char": -0.6571269035339355, "num_chars": 2}, {"sum_logits": -1.4208537340164185, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4208537340164185, "logits_per_char": -0.7104268670082092, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 580, "native_id": "Mercury_7114100", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3082882165908813, "incorrect_loss_raw": 1.4218045075734456, "correct_loss_per_char": 0.6541441082954407, "incorrect_loss_per_char": 0.7109022537867228, "correct_loss_per_token": 1.3082882165908813, "incorrect_loss_per_token": 1.4218045075734456, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3082882165908813, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.3082882165908813, "logits_per_char": -0.6541441082954407, "num_chars": 2}, {"sum_logits": -1.4071531295776367, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4071531295776367, "logits_per_char": -0.7035765647888184, "num_chars": 2}, {"sum_logits": -1.472262978553772, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.472262978553772, "logits_per_char": -0.736131489276886, "num_chars": 2}, {"sum_logits": -1.3859974145889282, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3859974145889282, "logits_per_char": -0.6929987072944641, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 581, "native_id": "Mercury_7213343", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.386944055557251, "incorrect_loss_raw": 1.392934004465739, "correct_loss_per_char": 0.6934720277786255, "incorrect_loss_per_char": 0.6964670022328695, "correct_loss_per_token": 1.386944055557251, "incorrect_loss_per_token": 1.392934004465739, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.386944055557251, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.386944055557251, "logits_per_char": -0.6934720277786255, "num_chars": 2}, {"sum_logits": -1.3924888372421265, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.3924888372421265, "logits_per_char": -0.6962444186210632, "num_chars": 2}, {"sum_logits": -1.4676986932754517, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4676986932754517, "logits_per_char": -0.7338493466377258, "num_chars": 2}, {"sum_logits": -1.3186144828796387, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.3186144828796387, "logits_per_char": -0.6593072414398193, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 582, "native_id": "Mercury_SC_LBS10597", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.410810947418213, "incorrect_loss_raw": 1.3849047819773357, "correct_loss_per_char": 0.7054054737091064, "incorrect_loss_per_char": 0.6924523909886678, "correct_loss_per_token": 1.410810947418213, "incorrect_loss_per_token": 1.3849047819773357, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.410810947418213, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.410810947418213, "logits_per_char": -0.7054054737091064, "num_chars": 2}, {"sum_logits": -1.3297780752182007, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3297780752182007, "logits_per_char": -0.6648890376091003, "num_chars": 2}, {"sum_logits": -1.4011948108673096, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4011948108673096, "logits_per_char": -0.7005974054336548, "num_chars": 2}, {"sum_logits": -1.4237414598464966, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4237414598464966, "logits_per_char": -0.7118707299232483, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 583, "native_id": "Mercury_7126263", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.344631314277649, "incorrect_loss_raw": 1.4064406156539917, "correct_loss_per_char": 0.6723156571388245, "incorrect_loss_per_char": 0.7032203078269958, "correct_loss_per_token": 1.344631314277649, "incorrect_loss_per_token": 1.4064406156539917, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3479548692703247, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3479548692703247, "logits_per_char": -0.6739774346351624, "num_chars": 2}, {"sum_logits": -1.344631314277649, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.344631314277649, "logits_per_char": -0.6723156571388245, "num_chars": 2}, {"sum_logits": -1.4029160737991333, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4029160737991333, "logits_per_char": -0.7014580368995667, "num_chars": 2}, {"sum_logits": -1.468450903892517, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.468450903892517, "logits_per_char": -0.7342254519462585, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 584, "native_id": "Mercury_7133613", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6312426328659058, "incorrect_loss_raw": 1.3331265846888225, "correct_loss_per_char": 0.8156213164329529, "incorrect_loss_per_char": 0.6665632923444113, "correct_loss_per_token": 1.6312426328659058, "incorrect_loss_per_token": 1.3331265846888225, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6312426328659058, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.6312426328659058, "logits_per_char": -0.8156213164329529, "num_chars": 2}, {"sum_logits": -1.3760497570037842, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.3760497570037842, "logits_per_char": -0.6880248785018921, "num_chars": 2}, {"sum_logits": -1.4989360570907593, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": false, "logits_per_token": -1.4989360570907593, "logits_per_char": -0.7494680285453796, "num_chars": 2}, {"sum_logits": -1.1243939399719238, "num_tokens": 1, "num_tokens_all": 458, "is_greedy": true, "logits_per_token": -1.1243939399719238, "logits_per_char": -0.5621969699859619, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 585, "native_id": "Mercury_7234605", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3346292972564697, "incorrect_loss_raw": 1.4088738759358723, "correct_loss_per_char": 0.6673146486282349, "incorrect_loss_per_char": 0.7044369379679362, "correct_loss_per_token": 1.3346292972564697, "incorrect_loss_per_token": 1.4088738759358723, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4028961658477783, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4028961658477783, "logits_per_char": -0.7014480829238892, "num_chars": 2}, {"sum_logits": -1.3865751028060913, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3865751028060913, "logits_per_char": -0.6932875514030457, "num_chars": 2}, {"sum_logits": -1.4371503591537476, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4371503591537476, "logits_per_char": -0.7185751795768738, "num_chars": 2}, {"sum_logits": -1.3346292972564697, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.3346292972564697, "logits_per_char": -0.6673146486282349, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 586, "native_id": "Mercury_SC_400839", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4542787075042725, "incorrect_loss_raw": 1.3701005379358928, "correct_loss_per_char": 0.7271393537521362, "incorrect_loss_per_char": 0.6850502689679464, "correct_loss_per_token": 1.4542787075042725, "incorrect_loss_per_token": 1.3701005379358928, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4034897089004517, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4034897089004517, "logits_per_char": -0.7017448544502258, "num_chars": 2}, {"sum_logits": -1.4542787075042725, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4542787075042725, "logits_per_char": -0.7271393537521362, "num_chars": 2}, {"sum_logits": -1.3788946866989136, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3788946866989136, "logits_per_char": -0.6894473433494568, "num_chars": 2}, {"sum_logits": -1.327917218208313, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.327917218208313, "logits_per_char": -0.6639586091041565, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 587, "native_id": "Mercury_SC_402984", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3914785385131836, "incorrect_loss_raw": 1.3984890381495159, "correct_loss_per_char": 0.6957392692565918, "incorrect_loss_per_char": 0.6992445190747579, "correct_loss_per_token": 1.3914785385131836, "incorrect_loss_per_token": 1.3984890381495159, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2339279651641846, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2339279651641846, "logits_per_char": -0.6169639825820923, "num_chars": 2}, {"sum_logits": -1.3914785385131836, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3914785385131836, "logits_per_char": -0.6957392692565918, "num_chars": 2}, {"sum_logits": -1.5057417154312134, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5057417154312134, "logits_per_char": -0.7528708577156067, "num_chars": 2}, {"sum_logits": -1.4557974338531494, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4557974338531494, "logits_per_char": -0.7278987169265747, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 588, "native_id": "NYSEDREGENTS_2012_4_29", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5710458755493164, "incorrect_loss_raw": 1.3389313618342082, "correct_loss_per_char": 0.7855229377746582, "incorrect_loss_per_char": 0.6694656809171041, "correct_loss_per_token": 1.5710458755493164, "incorrect_loss_per_token": 1.3389313618342082, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.351425290107727, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.351425290107727, "logits_per_char": -0.6757126450538635, "num_chars": 2}, {"sum_logits": -1.3360013961791992, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.3360013961791992, "logits_per_char": -0.6680006980895996, "num_chars": 2}, {"sum_logits": -1.5710458755493164, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.5710458755493164, "logits_per_char": -0.7855229377746582, "num_chars": 2}, {"sum_logits": -1.3293673992156982, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": true, "logits_per_token": -1.3293673992156982, "logits_per_char": -0.6646836996078491, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 589, "native_id": "VASoL_2009_3_22", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3515201807022095, "incorrect_loss_raw": 1.4055602550506592, "correct_loss_per_char": 0.6757600903511047, "incorrect_loss_per_char": 0.7027801275253296, "correct_loss_per_token": 1.3515201807022095, "incorrect_loss_per_token": 1.4055602550506592, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3617289066314697, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3617289066314697, "logits_per_char": -0.6808644533157349, "num_chars": 2}, {"sum_logits": -1.4912854433059692, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4912854433059692, "logits_per_char": -0.7456427216529846, "num_chars": 2}, {"sum_logits": -1.3636664152145386, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3636664152145386, "logits_per_char": -0.6818332076072693, "num_chars": 2}, {"sum_logits": -1.3515201807022095, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.3515201807022095, "logits_per_char": -0.6757600903511047, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 590, "native_id": "Mercury_409349", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.439429759979248, "incorrect_loss_raw": 1.377689282099406, "correct_loss_per_char": 0.719714879989624, "incorrect_loss_per_char": 0.688844641049703, "correct_loss_per_token": 1.439429759979248, "incorrect_loss_per_token": 1.377689282099406, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.439429759979248, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.439429759979248, "logits_per_char": -0.719714879989624, "num_chars": 2}, {"sum_logits": -1.3164788484573364, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.3164788484573364, "logits_per_char": -0.6582394242286682, "num_chars": 2}, {"sum_logits": -1.4484162330627441, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.4484162330627441, "logits_per_char": -0.7242081165313721, "num_chars": 2}, {"sum_logits": -1.3681727647781372, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.3681727647781372, "logits_per_char": -0.6840863823890686, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 591, "native_id": "Mercury_SC_407417", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3590298891067505, "incorrect_loss_raw": 1.401646335919698, "correct_loss_per_char": 0.6795149445533752, "incorrect_loss_per_char": 0.700823167959849, "correct_loss_per_token": 1.3590298891067505, "incorrect_loss_per_token": 1.401646335919698, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.370548129081726, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.370548129081726, "logits_per_char": -0.685274064540863, "num_chars": 2}, {"sum_logits": -1.3909159898757935, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3909159898757935, "logits_per_char": -0.6954579949378967, "num_chars": 2}, {"sum_logits": -1.4434748888015747, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4434748888015747, "logits_per_char": -0.7217374444007874, "num_chars": 2}, {"sum_logits": -1.3590298891067505, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.3590298891067505, "logits_per_char": -0.6795149445533752, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 592, "native_id": "VASoL_2007_5_21", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3411641120910645, "incorrect_loss_raw": 1.410069465637207, "correct_loss_per_char": 0.6705820560455322, "incorrect_loss_per_char": 0.7050347328186035, "correct_loss_per_token": 1.3411641120910645, "incorrect_loss_per_token": 1.410069465637207, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4069533348083496, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4069533348083496, "logits_per_char": -0.7034766674041748, "num_chars": 2}, {"sum_logits": -1.390393853187561, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.390393853187561, "logits_per_char": -0.6951969265937805, "num_chars": 2}, {"sum_logits": -1.3411641120910645, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3411641120910645, "logits_per_char": -0.6705820560455322, "num_chars": 2}, {"sum_logits": -1.4328612089157104, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4328612089157104, "logits_per_char": -0.7164306044578552, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 593, "native_id": "MCAS_2012_8_23651", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4558098316192627, "incorrect_loss_raw": 1.369720697402954, "correct_loss_per_char": 0.7279049158096313, "incorrect_loss_per_char": 0.684860348701477, "correct_loss_per_token": 1.4558098316192627, "incorrect_loss_per_token": 1.369720697402954, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4558098316192627, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4558098316192627, "logits_per_char": -0.7279049158096313, "num_chars": 2}, {"sum_logits": -1.3531582355499268, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3531582355499268, "logits_per_char": -0.6765791177749634, "num_chars": 2}, {"sum_logits": -1.3520699739456177, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.3520699739456177, "logits_per_char": -0.6760349869728088, "num_chars": 2}, {"sum_logits": -1.4039338827133179, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4039338827133179, "logits_per_char": -0.7019669413566589, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 594, "native_id": "MCAS_2000_4_26", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4096537828445435, "incorrect_loss_raw": 1.3846275409062703, "correct_loss_per_char": 0.7048268914222717, "incorrect_loss_per_char": 0.6923137704531351, "correct_loss_per_token": 1.4096537828445435, "incorrect_loss_per_token": 1.3846275409062703, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3305509090423584, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.3305509090423584, "logits_per_char": -0.6652754545211792, "num_chars": 2}, {"sum_logits": -1.4097756147384644, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4097756147384644, "logits_per_char": -0.7048878073692322, "num_chars": 2}, {"sum_logits": -1.4135560989379883, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4135560989379883, "logits_per_char": -0.7067780494689941, "num_chars": 2}, {"sum_logits": -1.4096537828445435, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4096537828445435, "logits_per_char": -0.7048268914222717, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 595, "native_id": "Mercury_SC_410971", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3901091814041138, "incorrect_loss_raw": 1.3913133144378662, "correct_loss_per_char": 0.6950545907020569, "incorrect_loss_per_char": 0.6956566572189331, "correct_loss_per_token": 1.3901091814041138, "incorrect_loss_per_token": 1.3913133144378662, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3901091814041138, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3901091814041138, "logits_per_char": -0.6950545907020569, "num_chars": 2}, {"sum_logits": -1.3843505382537842, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3843505382537842, "logits_per_char": -0.6921752691268921, "num_chars": 2}, {"sum_logits": -1.4543161392211914, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4543161392211914, "logits_per_char": -0.7271580696105957, "num_chars": 2}, {"sum_logits": -1.335273265838623, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.335273265838623, "logits_per_char": -0.6676366329193115, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 596, "native_id": "Mercury_404841", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3368569612503052, "incorrect_loss_raw": 1.41525399684906, "correct_loss_per_char": 0.6684284806251526, "incorrect_loss_per_char": 0.70762699842453, "correct_loss_per_token": 1.3368569612503052, "incorrect_loss_per_token": 1.41525399684906, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5195711851119995, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5195711851119995, "logits_per_char": -0.7597855925559998, "num_chars": 2}, {"sum_logits": -1.4629926681518555, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4629926681518555, "logits_per_char": -0.7314963340759277, "num_chars": 2}, {"sum_logits": -1.2631981372833252, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2631981372833252, "logits_per_char": -0.6315990686416626, "num_chars": 2}, {"sum_logits": -1.3368569612503052, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3368569612503052, "logits_per_char": -0.6684284806251526, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 597, "native_id": "Mercury_416651", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.622766137123108, "incorrect_loss_raw": 1.3262944618860881, "correct_loss_per_char": 0.811383068561554, "incorrect_loss_per_char": 0.6631472309430441, "correct_loss_per_token": 1.622766137123108, "incorrect_loss_per_token": 1.3262944618860881, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.622766137123108, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.622766137123108, "logits_per_char": -0.811383068561554, "num_chars": 2}, {"sum_logits": -1.365007758140564, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.365007758140564, "logits_per_char": -0.682503879070282, "num_chars": 2}, {"sum_logits": -1.3798705339431763, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3798705339431763, "logits_per_char": -0.6899352669715881, "num_chars": 2}, {"sum_logits": -1.234005093574524, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.234005093574524, "logits_per_char": -0.617002546787262, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 598, "native_id": "Mercury_416576", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3596150875091553, "incorrect_loss_raw": 1.4016342163085938, "correct_loss_per_char": 0.6798075437545776, "incorrect_loss_per_char": 0.7008171081542969, "correct_loss_per_token": 1.3596150875091553, "incorrect_loss_per_token": 1.4016342163085938, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3889554738998413, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.3889554738998413, "logits_per_char": -0.6944777369499207, "num_chars": 2}, {"sum_logits": -1.3596150875091553, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": true, "logits_per_token": -1.3596150875091553, "logits_per_char": -0.6798075437545776, "num_chars": 2}, {"sum_logits": -1.4246407747268677, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.4246407747268677, "logits_per_char": -0.7123203873634338, "num_chars": 2}, {"sum_logits": -1.3913064002990723, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.3913064002990723, "logits_per_char": -0.6956532001495361, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 599, "native_id": "MCAS_1998_8_24", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3100578784942627, "incorrect_loss_raw": 1.4198927879333496, "correct_loss_per_char": 0.6550289392471313, "incorrect_loss_per_char": 0.7099463939666748, "correct_loss_per_token": 1.3100578784942627, "incorrect_loss_per_token": 1.4198927879333496, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.396628975868225, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.396628975868225, "logits_per_char": -0.6983144879341125, "num_chars": 2}, {"sum_logits": -1.404371976852417, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.404371976852417, "logits_per_char": -0.7021859884262085, "num_chars": 2}, {"sum_logits": -1.4586774110794067, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.4586774110794067, "logits_per_char": -0.7293387055397034, "num_chars": 2}, {"sum_logits": -1.3100578784942627, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": true, "logits_per_token": -1.3100578784942627, "logits_per_char": -0.6550289392471313, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 600, "native_id": "Mercury_SC_408367", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2851468324661255, "incorrect_loss_raw": 1.4320269425710042, "correct_loss_per_char": 0.6425734162330627, "incorrect_loss_per_char": 0.7160134712855021, "correct_loss_per_token": 1.2851468324661255, "incorrect_loss_per_token": 1.4320269425710042, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2978200912475586, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.2978200912475586, "logits_per_char": -0.6489100456237793, "num_chars": 2}, {"sum_logits": -1.2851468324661255, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.2851468324661255, "logits_per_char": -0.6425734162330627, "num_chars": 2}, {"sum_logits": -1.493129014968872, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.493129014968872, "logits_per_char": -0.746564507484436, "num_chars": 2}, {"sum_logits": -1.505131721496582, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.505131721496582, "logits_per_char": -0.752565860748291, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 601, "native_id": "Mercury_405804", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1473853588104248, "incorrect_loss_raw": 1.4969985087712605, "correct_loss_per_char": 0.5736926794052124, "incorrect_loss_per_char": 0.7484992543856303, "correct_loss_per_token": 1.1473853588104248, "incorrect_loss_per_token": 1.4969985087712605, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5839108228683472, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.5839108228683472, "logits_per_char": -0.7919554114341736, "num_chars": 2}, {"sum_logits": -1.5962049961090088, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.5962049961090088, "logits_per_char": -0.7981024980545044, "num_chars": 2}, {"sum_logits": -1.3108797073364258, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.3108797073364258, "logits_per_char": -0.6554398536682129, "num_chars": 2}, {"sum_logits": -1.1473853588104248, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": true, "logits_per_token": -1.1473853588104248, "logits_per_char": -0.5736926794052124, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 602, "native_id": "Mercury_7216318", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4826773405075073, "incorrect_loss_raw": 1.3662023544311523, "correct_loss_per_char": 0.7413386702537537, "incorrect_loss_per_char": 0.6831011772155762, "correct_loss_per_token": 1.4826773405075073, "incorrect_loss_per_token": 1.3662023544311523, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4826773405075073, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4826773405075073, "logits_per_char": -0.7413386702537537, "num_chars": 2}, {"sum_logits": -1.364013671875, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.364013671875, "logits_per_char": -0.6820068359375, "num_chars": 2}, {"sum_logits": -1.490670919418335, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.490670919418335, "logits_per_char": -0.7453354597091675, "num_chars": 2}, {"sum_logits": -1.243922472000122, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.243922472000122, "logits_per_char": -0.621961236000061, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 603, "native_id": "Mercury_401312", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4663581848144531, "incorrect_loss_raw": 1.3736608823140461, "correct_loss_per_char": 0.7331790924072266, "incorrect_loss_per_char": 0.6868304411570231, "correct_loss_per_token": 1.4663581848144531, "incorrect_loss_per_token": 1.3736608823140461, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2600852251052856, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.2600852251052856, "logits_per_char": -0.6300426125526428, "num_chars": 2}, {"sum_logits": -1.3221209049224854, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3221209049224854, "logits_per_char": -0.6610604524612427, "num_chars": 2}, {"sum_logits": -1.5387765169143677, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5387765169143677, "logits_per_char": -0.7693882584571838, "num_chars": 2}, {"sum_logits": -1.4663581848144531, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4663581848144531, "logits_per_char": -0.7331790924072266, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 604, "native_id": "MDSA_2013_8_23", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.214337706565857, "incorrect_loss_raw": 1.4559694528579712, "correct_loss_per_char": 0.6071688532829285, "incorrect_loss_per_char": 0.7279847264289856, "correct_loss_per_token": 1.214337706565857, "incorrect_loss_per_token": 1.4559694528579712, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4383748769760132, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4383748769760132, "logits_per_char": -0.7191874384880066, "num_chars": 2}, {"sum_logits": -1.4382290840148926, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4382290840148926, "logits_per_char": -0.7191145420074463, "num_chars": 2}, {"sum_logits": -1.4913043975830078, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4913043975830078, "logits_per_char": -0.7456521987915039, "num_chars": 2}, {"sum_logits": -1.214337706565857, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.214337706565857, "logits_per_char": -0.6071688532829285, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 605, "native_id": "Mercury_SC_405880", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5175539255142212, "incorrect_loss_raw": 1.3587717215220134, "correct_loss_per_char": 0.7587769627571106, "incorrect_loss_per_char": 0.6793858607610067, "correct_loss_per_token": 1.5175539255142212, "incorrect_loss_per_token": 1.3587717215220134, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5175539255142212, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.5175539255142212, "logits_per_char": -0.7587769627571106, "num_chars": 2}, {"sum_logits": -1.4587284326553345, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4587284326553345, "logits_per_char": -0.7293642163276672, "num_chars": 2}, {"sum_logits": -1.4318791627883911, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4318791627883911, "logits_per_char": -0.7159395813941956, "num_chars": 2}, {"sum_logits": -1.1857075691223145, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.1857075691223145, "logits_per_char": -0.5928537845611572, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 606, "native_id": "ACTAAP_2009_5_12", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5611546039581299, "incorrect_loss_raw": 1.3433222770690918, "correct_loss_per_char": 0.7805773019790649, "incorrect_loss_per_char": 0.6716611385345459, "correct_loss_per_token": 1.5611546039581299, "incorrect_loss_per_token": 1.3433222770690918, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3575366735458374, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3575366735458374, "logits_per_char": -0.6787683367729187, "num_chars": 2}, {"sum_logits": -1.5611546039581299, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.5611546039581299, "logits_per_char": -0.7805773019790649, "num_chars": 2}, {"sum_logits": -1.396287202835083, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.396287202835083, "logits_per_char": -0.6981436014175415, "num_chars": 2}, {"sum_logits": -1.276142954826355, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.276142954826355, "logits_per_char": -0.6380714774131775, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 607, "native_id": "CSZ20754", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4723409414291382, "incorrect_loss_raw": 1.3642750183741252, "correct_loss_per_char": 0.7361704707145691, "incorrect_loss_per_char": 0.6821375091870626, "correct_loss_per_token": 1.4723409414291382, "incorrect_loss_per_token": 1.3642750183741252, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3498976230621338, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.3498976230621338, "logits_per_char": -0.6749488115310669, "num_chars": 2}, {"sum_logits": -1.4723409414291382, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4723409414291382, "logits_per_char": -0.7361704707145691, "num_chars": 2}, {"sum_logits": -1.3920093774795532, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3920093774795532, "logits_per_char": -0.6960046887397766, "num_chars": 2}, {"sum_logits": -1.3509180545806885, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3509180545806885, "logits_per_char": -0.6754590272903442, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 608, "native_id": "Mercury_184363", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2899798154830933, "incorrect_loss_raw": 1.431233247121175, "correct_loss_per_char": 0.6449899077415466, "incorrect_loss_per_char": 0.7156166235605875, "correct_loss_per_token": 1.2899798154830933, "incorrect_loss_per_token": 1.431233247121175, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.564815878868103, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.564815878868103, "logits_per_char": -0.7824079394340515, "num_chars": 2}, {"sum_logits": -1.4050874710083008, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4050874710083008, "logits_per_char": -0.7025437355041504, "num_chars": 2}, {"sum_logits": -1.3237963914871216, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3237963914871216, "logits_per_char": -0.6618981957435608, "num_chars": 2}, {"sum_logits": -1.2899798154830933, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.2899798154830933, "logits_per_char": -0.6449899077415466, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 609, "native_id": "Mercury_7188195", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4201687574386597, "incorrect_loss_raw": 1.3918787638346355, "correct_loss_per_char": 0.7100843787193298, "incorrect_loss_per_char": 0.6959393819173177, "correct_loss_per_token": 1.4201687574386597, "incorrect_loss_per_token": 1.3918787638346355, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5572816133499146, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.5572816133499146, "logits_per_char": -0.7786408066749573, "num_chars": 2}, {"sum_logits": -1.4227787256240845, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4227787256240845, "logits_per_char": -0.7113893628120422, "num_chars": 2}, {"sum_logits": -1.4201687574386597, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4201687574386597, "logits_per_char": -0.7100843787193298, "num_chars": 2}, {"sum_logits": -1.1955759525299072, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.1955759525299072, "logits_per_char": -0.5977879762649536, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 610, "native_id": "Mercury_7221043", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.345210075378418, "incorrect_loss_raw": 1.406041105588277, "correct_loss_per_char": 0.672605037689209, "incorrect_loss_per_char": 0.7030205527941386, "correct_loss_per_token": 1.345210075378418, "incorrect_loss_per_token": 1.406041105588277, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4637012481689453, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4637012481689453, "logits_per_char": -0.7318506240844727, "num_chars": 2}, {"sum_logits": -1.3871886730194092, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3871886730194092, "logits_per_char": -0.6935943365097046, "num_chars": 2}, {"sum_logits": -1.367233395576477, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.367233395576477, "logits_per_char": -0.6836166977882385, "num_chars": 2}, {"sum_logits": -1.345210075378418, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.345210075378418, "logits_per_char": -0.672605037689209, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 611, "native_id": "Mercury_7107328", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.433045506477356, "incorrect_loss_raw": 1.3846340576807659, "correct_loss_per_char": 0.716522753238678, "incorrect_loss_per_char": 0.6923170288403829, "correct_loss_per_token": 1.433045506477356, "incorrect_loss_per_token": 1.3846340576807659, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2088704109191895, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2088704109191895, "logits_per_char": -0.6044352054595947, "num_chars": 2}, {"sum_logits": -1.433045506477356, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.433045506477356, "logits_per_char": -0.716522753238678, "num_chars": 2}, {"sum_logits": -1.4997445344924927, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4997445344924927, "logits_per_char": -0.7498722672462463, "num_chars": 2}, {"sum_logits": -1.4452872276306152, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4452872276306152, "logits_per_char": -0.7226436138153076, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 612, "native_id": "Mercury_415084", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4464374780654907, "incorrect_loss_raw": 1.3748150269190471, "correct_loss_per_char": 0.7232187390327454, "incorrect_loss_per_char": 0.6874075134595236, "correct_loss_per_token": 1.4464374780654907, "incorrect_loss_per_token": 1.3748150269190471, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.473697543144226, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.473697543144226, "logits_per_char": -0.736848771572113, "num_chars": 2}, {"sum_logits": -1.3334269523620605, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3334269523620605, "logits_per_char": -0.6667134761810303, "num_chars": 2}, {"sum_logits": -1.4464374780654907, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4464374780654907, "logits_per_char": -0.7232187390327454, "num_chars": 2}, {"sum_logits": -1.3173205852508545, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.3173205852508545, "logits_per_char": -0.6586602926254272, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 613, "native_id": "Mercury_415082", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.474188208580017, "incorrect_loss_raw": 1.3635121981302898, "correct_loss_per_char": 0.7370941042900085, "incorrect_loss_per_char": 0.6817560990651449, "correct_loss_per_token": 1.474188208580017, "incorrect_loss_per_token": 1.3635121981302898, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.474188208580017, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.474188208580017, "logits_per_char": -0.7370941042900085, "num_chars": 2}, {"sum_logits": -1.3685311079025269, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3685311079025269, "logits_per_char": -0.6842655539512634, "num_chars": 2}, {"sum_logits": -1.3608849048614502, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.3608849048614502, "logits_per_char": -0.6804424524307251, "num_chars": 2}, {"sum_logits": -1.361120581626892, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.361120581626892, "logits_per_char": -0.680560290813446, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 614, "native_id": "Mercury_SC_416169", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3775144815444946, "incorrect_loss_raw": 1.397110899289449, "correct_loss_per_char": 0.6887572407722473, "incorrect_loss_per_char": 0.6985554496447245, "correct_loss_per_token": 1.3775144815444946, "incorrect_loss_per_token": 1.397110899289449, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3775144815444946, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3775144815444946, "logits_per_char": -0.6887572407722473, "num_chars": 2}, {"sum_logits": -1.3868407011032104, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3868407011032104, "logits_per_char": -0.6934203505516052, "num_chars": 2}, {"sum_logits": -1.4956046342849731, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4956046342849731, "logits_per_char": -0.7478023171424866, "num_chars": 2}, {"sum_logits": -1.3088873624801636, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.3088873624801636, "logits_per_char": -0.6544436812400818, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 615, "native_id": "MEA_2011_8_13", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.448179841041565, "incorrect_loss_raw": 1.3791139523188274, "correct_loss_per_char": 0.7240899205207825, "incorrect_loss_per_char": 0.6895569761594137, "correct_loss_per_token": 1.448179841041565, "incorrect_loss_per_token": 1.3791139523188274, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.448179841041565, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.448179841041565, "logits_per_char": -0.7240899205207825, "num_chars": 2}, {"sum_logits": -1.4713026285171509, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.4713026285171509, "logits_per_char": -0.7356513142585754, "num_chars": 2}, {"sum_logits": -1.4602813720703125, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.4602813720703125, "logits_per_char": -0.7301406860351562, "num_chars": 2}, {"sum_logits": -1.2057578563690186, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": true, "logits_per_token": -1.2057578563690186, "logits_per_char": -0.6028789281845093, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 616, "native_id": "TIMSS_2003_4_pg82", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3870830535888672, "incorrect_loss_raw": 1.3928263584772747, "correct_loss_per_char": 0.6935415267944336, "incorrect_loss_per_char": 0.6964131792386373, "correct_loss_per_token": 1.3870830535888672, "incorrect_loss_per_token": 1.3928263584772747, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3870830535888672, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3870830535888672, "logits_per_char": -0.6935415267944336, "num_chars": 2}, {"sum_logits": -1.4343609809875488, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4343609809875488, "logits_per_char": -0.7171804904937744, "num_chars": 2}, {"sum_logits": -1.4218838214874268, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4218838214874268, "logits_per_char": -0.7109419107437134, "num_chars": 2}, {"sum_logits": -1.3222342729568481, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.3222342729568481, "logits_per_char": -0.6611171364784241, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 617, "native_id": "CSZ30338", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4336031675338745, "incorrect_loss_raw": 1.376966079076131, "correct_loss_per_char": 0.7168015837669373, "incorrect_loss_per_char": 0.6884830395380656, "correct_loss_per_token": 1.4336031675338745, "incorrect_loss_per_token": 1.376966079076131, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4290014505386353, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4290014505386353, "logits_per_char": -0.7145007252693176, "num_chars": 2}, {"sum_logits": -1.3261334896087646, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.3261334896087646, "logits_per_char": -0.6630667448043823, "num_chars": 2}, {"sum_logits": -1.4336031675338745, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4336031675338745, "logits_per_char": -0.7168015837669373, "num_chars": 2}, {"sum_logits": -1.3757632970809937, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3757632970809937, "logits_per_char": -0.6878816485404968, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 618, "native_id": "TIMSS_2003_8_pg85", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.376068353652954, "incorrect_loss_raw": 1.398456374804179, "correct_loss_per_char": 0.688034176826477, "incorrect_loss_per_char": 0.6992281874020895, "correct_loss_per_token": 1.376068353652954, "incorrect_loss_per_token": 1.398456374804179, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3034238815307617, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.3034238815307617, "logits_per_char": -0.6517119407653809, "num_chars": 2}, {"sum_logits": -1.376068353652954, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.376068353652954, "logits_per_char": -0.688034176826477, "num_chars": 2}, {"sum_logits": -1.4690418243408203, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4690418243408203, "logits_per_char": -0.7345209121704102, "num_chars": 2}, {"sum_logits": -1.4229034185409546, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4229034185409546, "logits_per_char": -0.7114517092704773, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 619, "native_id": "Mercury_7221988", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3674066066741943, "incorrect_loss_raw": 1.3975481986999512, "correct_loss_per_char": 0.6837033033370972, "incorrect_loss_per_char": 0.6987740993499756, "correct_loss_per_token": 1.3674066066741943, "incorrect_loss_per_token": 1.3975481986999512, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4346027374267578, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4346027374267578, "logits_per_char": -0.7173013687133789, "num_chars": 2}, {"sum_logits": -1.3913286924362183, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3913286924362183, "logits_per_char": -0.6956643462181091, "num_chars": 2}, {"sum_logits": -1.3674066066741943, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3674066066741943, "logits_per_char": -0.6837033033370972, "num_chars": 2}, {"sum_logits": -1.3667131662368774, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.3667131662368774, "logits_per_char": -0.6833565831184387, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 620, "native_id": "NCEOGA_2013_5_11", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.388506531715393, "incorrect_loss_raw": 1.39258869489034, "correct_loss_per_char": 0.6942532658576965, "incorrect_loss_per_char": 0.69629434744517, "correct_loss_per_token": 1.388506531715393, "incorrect_loss_per_token": 1.39258869489034, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4222322702407837, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4222322702407837, "logits_per_char": -0.7111161351203918, "num_chars": 2}, {"sum_logits": -1.4375463724136353, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4375463724136353, "logits_per_char": -0.7187731862068176, "num_chars": 2}, {"sum_logits": -1.3179874420166016, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.3179874420166016, "logits_per_char": -0.6589937210083008, "num_chars": 2}, {"sum_logits": -1.388506531715393, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.388506531715393, "logits_per_char": -0.6942532658576965, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 621, "native_id": "MCAS_2013_8_29416", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3869757652282715, "incorrect_loss_raw": 1.3955224752426147, "correct_loss_per_char": 0.6934878826141357, "incorrect_loss_per_char": 0.6977612376213074, "correct_loss_per_token": 1.3869757652282715, "incorrect_loss_per_token": 1.3955224752426147, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5201278924942017, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.5201278924942017, "logits_per_char": -0.7600639462471008, "num_chars": 2}, {"sum_logits": -1.3869757652282715, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3869757652282715, "logits_per_char": -0.6934878826141357, "num_chars": 2}, {"sum_logits": -1.2893800735473633, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.2893800735473633, "logits_per_char": -0.6446900367736816, "num_chars": 2}, {"sum_logits": -1.3770594596862793, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3770594596862793, "logits_per_char": -0.6885297298431396, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 622, "native_id": "Mercury_SC_401142", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3276818990707397, "incorrect_loss_raw": 1.4156756401062012, "correct_loss_per_char": 0.6638409495353699, "incorrect_loss_per_char": 0.7078378200531006, "correct_loss_per_token": 1.3276818990707397, "incorrect_loss_per_token": 1.4156756401062012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3141288757324219, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.3141288757324219, "logits_per_char": -0.6570644378662109, "num_chars": 2}, {"sum_logits": -1.3276818990707397, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3276818990707397, "logits_per_char": -0.6638409495353699, "num_chars": 2}, {"sum_logits": -1.515039086341858, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.515039086341858, "logits_per_char": -0.757519543170929, "num_chars": 2}, {"sum_logits": -1.4178589582443237, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4178589582443237, "logits_per_char": -0.7089294791221619, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 623, "native_id": "Mercury_7206395", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4410001039505005, "incorrect_loss_raw": 1.3835560480753581, "correct_loss_per_char": 0.7205000519752502, "incorrect_loss_per_char": 0.6917780240376791, "correct_loss_per_token": 1.4410001039505005, "incorrect_loss_per_token": 1.3835560480753581, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5141611099243164, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.5141611099243164, "logits_per_char": -0.7570805549621582, "num_chars": 2}, {"sum_logits": -1.4418697357177734, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4418697357177734, "logits_per_char": -0.7209348678588867, "num_chars": 2}, {"sum_logits": -1.4410001039505005, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4410001039505005, "logits_per_char": -0.7205000519752502, "num_chars": 2}, {"sum_logits": -1.1946372985839844, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.1946372985839844, "logits_per_char": -0.5973186492919922, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 624, "native_id": "Mercury_179025", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.575656533241272, "incorrect_loss_raw": 1.3450312217076619, "correct_loss_per_char": 0.787828266620636, "incorrect_loss_per_char": 0.6725156108538309, "correct_loss_per_token": 1.575656533241272, "incorrect_loss_per_token": 1.3450312217076619, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4833637475967407, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4833637475967407, "logits_per_char": -0.7416818737983704, "num_chars": 2}, {"sum_logits": -1.575656533241272, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.575656533241272, "logits_per_char": -0.787828266620636, "num_chars": 2}, {"sum_logits": -1.339923620223999, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.339923620223999, "logits_per_char": -0.6699618101119995, "num_chars": 2}, {"sum_logits": -1.211806297302246, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.211806297302246, "logits_per_char": -0.605903148651123, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 625, "native_id": "Mercury_7130620", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5397087335586548, "incorrect_loss_raw": 1.3565167983373005, "correct_loss_per_char": 0.7698543667793274, "incorrect_loss_per_char": 0.6782583991686503, "correct_loss_per_token": 1.5397087335586548, "incorrect_loss_per_token": 1.3565167983373005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.526428461074829, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.526428461074829, "logits_per_char": -0.7632142305374146, "num_chars": 2}, {"sum_logits": -1.5397087335586548, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.5397087335586548, "logits_per_char": -0.7698543667793274, "num_chars": 2}, {"sum_logits": -1.3871541023254395, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3871541023254395, "logits_per_char": -0.6935770511627197, "num_chars": 2}, {"sum_logits": -1.1559678316116333, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.1559678316116333, "logits_per_char": -0.5779839158058167, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 626, "native_id": "Mercury_177870", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.36753249168396, "incorrect_loss_raw": 1.4001108010609944, "correct_loss_per_char": 0.68376624584198, "incorrect_loss_per_char": 0.7000554005304972, "correct_loss_per_token": 1.36753249168396, "incorrect_loss_per_token": 1.4001108010609944, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4225964546203613, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4225964546203613, "logits_per_char": -0.7112982273101807, "num_chars": 2}, {"sum_logits": -1.4670469760894775, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4670469760894775, "logits_per_char": -0.7335234880447388, "num_chars": 2}, {"sum_logits": -1.36753249168396, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.36753249168396, "logits_per_char": -0.68376624584198, "num_chars": 2}, {"sum_logits": -1.3106889724731445, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.3106889724731445, "logits_per_char": -0.6553444862365723, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 627, "native_id": "Mercury_7282083", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4367984533309937, "incorrect_loss_raw": 1.375065525372823, "correct_loss_per_char": 0.7183992266654968, "incorrect_loss_per_char": 0.6875327626864115, "correct_loss_per_token": 1.4367984533309937, "incorrect_loss_per_token": 1.375065525372823, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3450220823287964, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.3450220823287964, "logits_per_char": -0.6725110411643982, "num_chars": 2}, {"sum_logits": -1.4367984533309937, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4367984533309937, "logits_per_char": -0.7183992266654968, "num_chars": 2}, {"sum_logits": -1.393776535987854, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.393776535987854, "logits_per_char": -0.696888267993927, "num_chars": 2}, {"sum_logits": -1.3863979578018188, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3863979578018188, "logits_per_char": -0.6931989789009094, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 628, "native_id": "Mercury_SC_400233", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3638511896133423, "incorrect_loss_raw": 1.4014657735824585, "correct_loss_per_char": 0.6819255948066711, "incorrect_loss_per_char": 0.7007328867912292, "correct_loss_per_token": 1.3638511896133423, "incorrect_loss_per_token": 1.4014657735824585, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3570516109466553, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.3570516109466553, "logits_per_char": -0.6785258054733276, "num_chars": 2}, {"sum_logits": -1.4599748849868774, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4599748849868774, "logits_per_char": -0.7299874424934387, "num_chars": 2}, {"sum_logits": -1.3638511896133423, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3638511896133423, "logits_per_char": -0.6819255948066711, "num_chars": 2}, {"sum_logits": -1.3873708248138428, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3873708248138428, "logits_per_char": -0.6936854124069214, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 629, "native_id": "Mercury_7082443", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2579305171966553, "incorrect_loss_raw": 1.440498908360799, "correct_loss_per_char": 0.6289652585983276, "incorrect_loss_per_char": 0.7202494541803995, "correct_loss_per_token": 1.2579305171966553, "incorrect_loss_per_token": 1.440498908360799, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3892145156860352, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3892145156860352, "logits_per_char": -0.6946072578430176, "num_chars": 2}, {"sum_logits": -1.2579305171966553, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.2579305171966553, "logits_per_char": -0.6289652585983276, "num_chars": 2}, {"sum_logits": -1.449216604232788, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.449216604232788, "logits_per_char": -0.724608302116394, "num_chars": 2}, {"sum_logits": -1.4830656051635742, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4830656051635742, "logits_per_char": -0.7415328025817871, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 630, "native_id": "NCEOGA_2013_8_15", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4132144451141357, "incorrect_loss_raw": 1.383742133776347, "correct_loss_per_char": 0.7066072225570679, "incorrect_loss_per_char": 0.6918710668881735, "correct_loss_per_token": 1.4132144451141357, "incorrect_loss_per_token": 1.383742133776347, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3158009052276611, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.3158009052276611, "logits_per_char": -0.6579004526138306, "num_chars": 2}, {"sum_logits": -1.4132144451141357, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4132144451141357, "logits_per_char": -0.7066072225570679, "num_chars": 2}, {"sum_logits": -1.4665734767913818, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4665734767913818, "logits_per_char": -0.7332867383956909, "num_chars": 2}, {"sum_logits": -1.3688520193099976, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3688520193099976, "logits_per_char": -0.6844260096549988, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 631, "native_id": "Mercury_7210140", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6578645706176758, "incorrect_loss_raw": 1.3244266907374065, "correct_loss_per_char": 0.8289322853088379, "incorrect_loss_per_char": 0.6622133453687032, "correct_loss_per_token": 1.6578645706176758, "incorrect_loss_per_token": 1.3244266907374065, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6578645706176758, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.6578645706176758, "logits_per_char": -0.8289322853088379, "num_chars": 2}, {"sum_logits": -1.4474469423294067, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.4474469423294067, "logits_per_char": -0.7237234711647034, "num_chars": 2}, {"sum_logits": -1.386681079864502, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.386681079864502, "logits_per_char": -0.693340539932251, "num_chars": 2}, {"sum_logits": -1.1391520500183105, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -1.1391520500183105, "logits_per_char": -0.5695760250091553, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 632, "native_id": "Mercury_7106593", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.296818733215332, "incorrect_loss_raw": 1.4288485447565715, "correct_loss_per_char": 0.648409366607666, "incorrect_loss_per_char": 0.7144242723782858, "correct_loss_per_token": 1.296818733215332, "incorrect_loss_per_token": 1.4288485447565715, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5578848123550415, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.5578848123550415, "logits_per_char": -0.7789424061775208, "num_chars": 2}, {"sum_logits": -1.3526105880737305, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.3526105880737305, "logits_per_char": -0.6763052940368652, "num_chars": 2}, {"sum_logits": -1.3760502338409424, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.3760502338409424, "logits_per_char": -0.6880251169204712, "num_chars": 2}, {"sum_logits": -1.296818733215332, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.296818733215332, "logits_per_char": -0.648409366607666, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 633, "native_id": "Mercury_416536", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.339857578277588, "incorrect_loss_raw": 1.4086207548777263, "correct_loss_per_char": 0.669928789138794, "incorrect_loss_per_char": 0.7043103774388632, "correct_loss_per_token": 1.339857578277588, "incorrect_loss_per_token": 1.4086207548777263, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4804891347885132, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4804891347885132, "logits_per_char": -0.7402445673942566, "num_chars": 2}, {"sum_logits": -1.339857578277588, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.339857578277588, "logits_per_char": -0.669928789138794, "num_chars": 2}, {"sum_logits": -1.404607892036438, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.404607892036438, "logits_per_char": -0.702303946018219, "num_chars": 2}, {"sum_logits": -1.3407652378082275, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3407652378082275, "logits_per_char": -0.6703826189041138, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 634, "native_id": "Mercury_410026", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2625420093536377, "incorrect_loss_raw": 1.4503229061762493, "correct_loss_per_char": 0.6312710046768188, "incorrect_loss_per_char": 0.7251614530881246, "correct_loss_per_token": 1.2625420093536377, "incorrect_loss_per_token": 1.4503229061762493, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6483670473098755, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.6483670473098755, "logits_per_char": -0.8241835236549377, "num_chars": 2}, {"sum_logits": -1.2463619709014893, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": true, "logits_per_token": -1.2463619709014893, "logits_per_char": -0.6231809854507446, "num_chars": 2}, {"sum_logits": -1.4562397003173828, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.4562397003173828, "logits_per_char": -0.7281198501586914, "num_chars": 2}, {"sum_logits": -1.2625420093536377, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.2625420093536377, "logits_per_char": -0.6312710046768188, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 635, "native_id": "ACTAAP_2011_5_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3571374416351318, "incorrect_loss_raw": 1.4081761439641316, "correct_loss_per_char": 0.6785687208175659, "incorrect_loss_per_char": 0.7040880719820658, "correct_loss_per_token": 1.3571374416351318, "incorrect_loss_per_token": 1.4081761439641316, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2975324392318726, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": true, "logits_per_token": -1.2975324392318726, "logits_per_char": -0.6487662196159363, "num_chars": 2}, {"sum_logits": -1.3571374416351318, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.3571374416351318, "logits_per_char": -0.6785687208175659, "num_chars": 2}, {"sum_logits": -1.5159507989883423, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.5159507989883423, "logits_per_char": -0.7579753994941711, "num_chars": 2}, {"sum_logits": -1.4110451936721802, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.4110451936721802, "logits_per_char": -0.7055225968360901, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 636, "native_id": "Mercury_417138", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4276379346847534, "incorrect_loss_raw": 1.3785178263982136, "correct_loss_per_char": 0.7138189673423767, "incorrect_loss_per_char": 0.6892589131991068, "correct_loss_per_token": 1.4276379346847534, "incorrect_loss_per_token": 1.3785178263982136, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.432421088218689, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.432421088218689, "logits_per_char": -0.7162105441093445, "num_chars": 2}, {"sum_logits": -1.4276379346847534, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.4276379346847534, "logits_per_char": -0.7138189673423767, "num_chars": 2}, {"sum_logits": -1.361843228340149, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.361843228340149, "logits_per_char": -0.6809216141700745, "num_chars": 2}, {"sum_logits": -1.3412891626358032, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": true, "logits_per_token": -1.3412891626358032, "logits_per_char": -0.6706445813179016, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 637, "native_id": "Mercury_7138915", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.086961269378662, "incorrect_loss_raw": 1.5166617631912231, "correct_loss_per_char": 0.543480634689331, "incorrect_loss_per_char": 0.7583308815956116, "correct_loss_per_token": 1.086961269378662, "incorrect_loss_per_token": 1.5166617631912231, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6140124797821045, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -1.6140124797821045, "logits_per_char": -0.8070062398910522, "num_chars": 2}, {"sum_logits": -1.4641462564468384, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -1.4641462564468384, "logits_per_char": -0.7320731282234192, "num_chars": 2}, {"sum_logits": -1.4718265533447266, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": false, "logits_per_token": -1.4718265533447266, "logits_per_char": -0.7359132766723633, "num_chars": 2}, {"sum_logits": -1.086961269378662, "num_tokens": 1, "num_tokens_all": 420, "is_greedy": true, "logits_per_token": -1.086961269378662, "logits_per_char": -0.543480634689331, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 638, "native_id": "NYSEDREGENTS_2008_4_11", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3516227006912231, "incorrect_loss_raw": 1.405744155248006, "correct_loss_per_char": 0.6758113503456116, "incorrect_loss_per_char": 0.702872077624003, "correct_loss_per_token": 1.3516227006912231, "incorrect_loss_per_token": 1.405744155248006, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.343139410018921, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.343139410018921, "logits_per_char": -0.6715697050094604, "num_chars": 2}, {"sum_logits": -1.4661509990692139, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.4661509990692139, "logits_per_char": -0.7330754995346069, "num_chars": 2}, {"sum_logits": -1.3516227006912231, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.3516227006912231, "logits_per_char": -0.6758113503456116, "num_chars": 2}, {"sum_logits": -1.4079420566558838, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.4079420566558838, "logits_per_char": -0.7039710283279419, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 639, "native_id": "Mercury_404435", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2120540142059326, "incorrect_loss_raw": 1.4617545207341511, "correct_loss_per_char": 0.6060270071029663, "incorrect_loss_per_char": 0.7308772603670756, "correct_loss_per_token": 1.2120540142059326, "incorrect_loss_per_token": 1.4617545207341511, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5351351499557495, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.5351351499557495, "logits_per_char": -0.7675675749778748, "num_chars": 2}, {"sum_logits": -1.3778349161148071, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3778349161148071, "logits_per_char": -0.6889174580574036, "num_chars": 2}, {"sum_logits": -1.472293496131897, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.472293496131897, "logits_per_char": -0.7361467480659485, "num_chars": 2}, {"sum_logits": -1.2120540142059326, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.2120540142059326, "logits_per_char": -0.6060270071029663, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 640, "native_id": "MDSA_2009_5_25", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3513211011886597, "incorrect_loss_raw": 1.4058737754821777, "correct_loss_per_char": 0.6756605505943298, "incorrect_loss_per_char": 0.7029368877410889, "correct_loss_per_token": 1.3513211011886597, "incorrect_loss_per_token": 1.4058737754821777, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4611694812774658, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.4611694812774658, "logits_per_char": -0.7305847406387329, "num_chars": 2}, {"sum_logits": -1.3513211011886597, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.3513211011886597, "logits_per_char": -0.6756605505943298, "num_chars": 2}, {"sum_logits": -1.3792567253112793, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.3792567253112793, "logits_per_char": -0.6896283626556396, "num_chars": 2}, {"sum_logits": -1.377195119857788, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.377195119857788, "logits_per_char": -0.688597559928894, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 641, "native_id": "OHAT_2007_8_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.380615472793579, "incorrect_loss_raw": 1.3935364484786987, "correct_loss_per_char": 0.6903077363967896, "incorrect_loss_per_char": 0.6967682242393494, "correct_loss_per_token": 1.380615472793579, "incorrect_loss_per_token": 1.3935364484786987, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4113497734069824, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4113497734069824, "logits_per_char": -0.7056748867034912, "num_chars": 2}, {"sum_logits": -1.3608988523483276, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.3608988523483276, "logits_per_char": -0.6804494261741638, "num_chars": 2}, {"sum_logits": -1.380615472793579, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.380615472793579, "logits_per_char": -0.6903077363967896, "num_chars": 2}, {"sum_logits": -1.4083607196807861, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4083607196807861, "logits_per_char": -0.7041803598403931, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 642, "native_id": "Mercury_LBS10302", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4970673322677612, "incorrect_loss_raw": 1.3611596028010051, "correct_loss_per_char": 0.7485336661338806, "incorrect_loss_per_char": 0.6805798014005026, "correct_loss_per_token": 1.4970673322677612, "incorrect_loss_per_token": 1.3611596028010051, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2281267642974854, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.2281267642974854, "logits_per_char": -0.6140633821487427, "num_chars": 2}, {"sum_logits": -1.4484319686889648, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4484319686889648, "logits_per_char": -0.7242159843444824, "num_chars": 2}, {"sum_logits": -1.4970673322677612, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4970673322677612, "logits_per_char": -0.7485336661338806, "num_chars": 2}, {"sum_logits": -1.406920075416565, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.406920075416565, "logits_per_char": -0.7034600377082825, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 643, "native_id": "Mercury_7027248", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4097367525100708, "incorrect_loss_raw": 1.3900563319524128, "correct_loss_per_char": 0.7048683762550354, "incorrect_loss_per_char": 0.6950281659762064, "correct_loss_per_token": 1.4097367525100708, "incorrect_loss_per_token": 1.3900563319524128, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4324384927749634, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4324384927749634, "logits_per_char": -0.7162192463874817, "num_chars": 2}, {"sum_logits": -1.4097367525100708, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4097367525100708, "logits_per_char": -0.7048683762550354, "num_chars": 2}, {"sum_logits": -1.4812557697296143, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4812557697296143, "logits_per_char": -0.7406278848648071, "num_chars": 2}, {"sum_logits": -1.2564747333526611, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2564747333526611, "logits_per_char": -0.6282373666763306, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 644, "native_id": "Mercury_SC_401360", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2628124952316284, "incorrect_loss_raw": 1.4389508565266926, "correct_loss_per_char": 0.6314062476158142, "incorrect_loss_per_char": 0.7194754282633463, "correct_loss_per_token": 1.2628124952316284, "incorrect_loss_per_token": 1.4389508565266926, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2628124952316284, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.2628124952316284, "logits_per_char": -0.6314062476158142, "num_chars": 2}, {"sum_logits": -1.3575407266616821, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.3575407266616821, "logits_per_char": -0.6787703633308411, "num_chars": 2}, {"sum_logits": -1.4235539436340332, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.4235539436340332, "logits_per_char": -0.7117769718170166, "num_chars": 2}, {"sum_logits": -1.5357578992843628, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.5357578992843628, "logits_per_char": -0.7678789496421814, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 645, "native_id": "ACTAAP_2013_5_17", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.411824107170105, "incorrect_loss_raw": 1.3842251300811768, "correct_loss_per_char": 0.7059120535850525, "incorrect_loss_per_char": 0.6921125650405884, "correct_loss_per_token": 1.411824107170105, "incorrect_loss_per_token": 1.3842251300811768, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.411824107170105, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.411824107170105, "logits_per_char": -0.7059120535850525, "num_chars": 2}, {"sum_logits": -1.355404019355774, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.355404019355774, "logits_per_char": -0.677702009677887, "num_chars": 2}, {"sum_logits": -1.4227056503295898, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4227056503295898, "logits_per_char": -0.7113528251647949, "num_chars": 2}, {"sum_logits": -1.3745657205581665, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3745657205581665, "logits_per_char": -0.6872828602790833, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 646, "native_id": "Mercury_407125", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4725258350372314, "incorrect_loss_raw": 1.364621639251709, "correct_loss_per_char": 0.7362629175186157, "incorrect_loss_per_char": 0.6823108196258545, "correct_loss_per_token": 1.4725258350372314, "incorrect_loss_per_token": 1.364621639251709, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3713741302490234, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3713741302490234, "logits_per_char": -0.6856870651245117, "num_chars": 2}, {"sum_logits": -1.3655133247375488, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3655133247375488, "logits_per_char": -0.6827566623687744, "num_chars": 2}, {"sum_logits": -1.4725258350372314, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4725258350372314, "logits_per_char": -0.7362629175186157, "num_chars": 2}, {"sum_logits": -1.3569774627685547, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.3569774627685547, "logits_per_char": -0.6784887313842773, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 647, "native_id": "Mercury_404820", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2813243865966797, "incorrect_loss_raw": 1.4347014427185059, "correct_loss_per_char": 0.6406621932983398, "incorrect_loss_per_char": 0.7173507213592529, "correct_loss_per_token": 1.2813243865966797, "incorrect_loss_per_token": 1.4347014427185059, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4735850095748901, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4735850095748901, "logits_per_char": -0.7367925047874451, "num_chars": 2}, {"sum_logits": -1.5195417404174805, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5195417404174805, "logits_per_char": -0.7597708702087402, "num_chars": 2}, {"sum_logits": -1.310977578163147, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.310977578163147, "logits_per_char": -0.6554887890815735, "num_chars": 2}, {"sum_logits": -1.2813243865966797, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.2813243865966797, "logits_per_char": -0.6406621932983398, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 648, "native_id": "Mercury_SC_416168", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4812960624694824, "incorrect_loss_raw": 1.3684406677881877, "correct_loss_per_char": 0.7406480312347412, "incorrect_loss_per_char": 0.6842203338940939, "correct_loss_per_token": 1.4812960624694824, "incorrect_loss_per_token": 1.3684406677881877, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2253037691116333, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": true, "logits_per_token": -1.2253037691116333, "logits_per_char": -0.6126518845558167, "num_chars": 2}, {"sum_logits": -1.4812960624694824, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.4812960624694824, "logits_per_char": -0.7406480312347412, "num_chars": 2}, {"sum_logits": -1.4261903762817383, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.4261903762817383, "logits_per_char": -0.7130951881408691, "num_chars": 2}, {"sum_logits": -1.4538278579711914, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.4538278579711914, "logits_per_char": -0.7269139289855957, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 649, "native_id": "TIMSS_1995_8_K18", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.489026665687561, "incorrect_loss_raw": 1.3611745834350586, "correct_loss_per_char": 0.7445133328437805, "incorrect_loss_per_char": 0.6805872917175293, "correct_loss_per_token": 1.489026665687561, "incorrect_loss_per_token": 1.3611745834350586, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.489026665687561, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.489026665687561, "logits_per_char": -0.7445133328437805, "num_chars": 2}, {"sum_logits": -1.328049898147583, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.328049898147583, "logits_per_char": -0.6640249490737915, "num_chars": 2}, {"sum_logits": -1.4551316499710083, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4551316499710083, "logits_per_char": -0.7275658249855042, "num_chars": 2}, {"sum_logits": -1.3003422021865845, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.3003422021865845, "logits_per_char": -0.6501711010932922, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 650, "native_id": "Mercury_SC_405130", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.367133617401123, "incorrect_loss_raw": 1.4072822729746501, "correct_loss_per_char": 0.6835668087005615, "incorrect_loss_per_char": 0.7036411364873251, "correct_loss_per_token": 1.367133617401123, "incorrect_loss_per_token": 1.4072822729746501, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.367133617401123, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.367133617401123, "logits_per_char": -0.6835668087005615, "num_chars": 2}, {"sum_logits": -1.3427180051803589, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.3427180051803589, "logits_per_char": -0.6713590025901794, "num_chars": 2}, {"sum_logits": -1.6048479080200195, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.6048479080200195, "logits_per_char": -0.8024239540100098, "num_chars": 2}, {"sum_logits": -1.2742809057235718, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.2742809057235718, "logits_per_char": -0.6371404528617859, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 651, "native_id": "Mercury_SC_408631", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.414345145225525, "incorrect_loss_raw": 1.392218788464864, "correct_loss_per_char": 0.7071725726127625, "incorrect_loss_per_char": 0.696109394232432, "correct_loss_per_token": 1.414345145225525, "incorrect_loss_per_token": 1.392218788464864, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5821387767791748, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.5821387767791748, "logits_per_char": -0.7910693883895874, "num_chars": 2}, {"sum_logits": -1.3514326810836792, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.3514326810836792, "logits_per_char": -0.6757163405418396, "num_chars": 2}, {"sum_logits": -1.414345145225525, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.414345145225525, "logits_per_char": -0.7071725726127625, "num_chars": 2}, {"sum_logits": -1.2430849075317383, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -1.2430849075317383, "logits_per_char": -0.6215424537658691, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 652, "native_id": "Mercury_SC_408763", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.517091989517212, "incorrect_loss_raw": 1.353432297706604, "correct_loss_per_char": 0.758545994758606, "incorrect_loss_per_char": 0.676716148853302, "correct_loss_per_token": 1.517091989517212, "incorrect_loss_per_token": 1.353432297706604, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.428933024406433, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.428933024406433, "logits_per_char": -0.7144665122032166, "num_chars": 2}, {"sum_logits": -1.3673427104949951, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3673427104949951, "logits_per_char": -0.6836713552474976, "num_chars": 2}, {"sum_logits": -1.517091989517212, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.517091989517212, "logits_per_char": -0.758545994758606, "num_chars": 2}, {"sum_logits": -1.2640211582183838, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2640211582183838, "logits_per_char": -0.6320105791091919, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 653, "native_id": "MCAS_8_2015_18", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.614964485168457, "incorrect_loss_raw": 1.3289963006973267, "correct_loss_per_char": 0.8074822425842285, "incorrect_loss_per_char": 0.6644981503486633, "correct_loss_per_token": 1.614964485168457, "incorrect_loss_per_token": 1.3289963006973267, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.614964485168457, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.614964485168457, "logits_per_char": -0.8074822425842285, "num_chars": 2}, {"sum_logits": -1.2806272506713867, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.2806272506713867, "logits_per_char": -0.6403136253356934, "num_chars": 2}, {"sum_logits": -1.44180428981781, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.44180428981781, "logits_per_char": -0.720902144908905, "num_chars": 2}, {"sum_logits": -1.2645573616027832, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.2645573616027832, "logits_per_char": -0.6322786808013916, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 654, "native_id": "Mercury_411729", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.553501844406128, "incorrect_loss_raw": 1.3537306388219197, "correct_loss_per_char": 0.776750922203064, "incorrect_loss_per_char": 0.6768653194109598, "correct_loss_per_token": 1.553501844406128, "incorrect_loss_per_token": 1.3537306388219197, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.49577796459198, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.49577796459198, "logits_per_char": -0.74788898229599, "num_chars": 2}, {"sum_logits": -1.553501844406128, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.553501844406128, "logits_per_char": -0.776750922203064, "num_chars": 2}, {"sum_logits": -1.2409483194351196, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2409483194351196, "logits_per_char": -0.6204741597175598, "num_chars": 2}, {"sum_logits": -1.3244656324386597, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3244656324386597, "logits_per_char": -0.6622328162193298, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 655, "native_id": "MDSA_2012_8_6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5173513889312744, "incorrect_loss_raw": 1.352500279744466, "correct_loss_per_char": 0.7586756944656372, "incorrect_loss_per_char": 0.676250139872233, "correct_loss_per_token": 1.5173513889312744, "incorrect_loss_per_token": 1.352500279744466, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.411475419998169, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.411475419998169, "logits_per_char": -0.7057377099990845, "num_chars": 2}, {"sum_logits": -1.3220176696777344, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.3220176696777344, "logits_per_char": -0.6610088348388672, "num_chars": 2}, {"sum_logits": -1.5173513889312744, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5173513889312744, "logits_per_char": -0.7586756944656372, "num_chars": 2}, {"sum_logits": -1.3240077495574951, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3240077495574951, "logits_per_char": -0.6620038747787476, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 656, "native_id": "MCAS_1999_8_5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4280370473861694, "incorrect_loss_raw": 1.3801355361938477, "correct_loss_per_char": 0.7140185236930847, "incorrect_loss_per_char": 0.6900677680969238, "correct_loss_per_token": 1.4280370473861694, "incorrect_loss_per_token": 1.3801355361938477, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3006049394607544, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.3006049394607544, "logits_per_char": -0.6503024697303772, "num_chars": 2}, {"sum_logits": -1.4280370473861694, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4280370473861694, "logits_per_char": -0.7140185236930847, "num_chars": 2}, {"sum_logits": -1.4549891948699951, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4549891948699951, "logits_per_char": -0.7274945974349976, "num_chars": 2}, {"sum_logits": -1.3848124742507935, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3848124742507935, "logits_per_char": -0.6924062371253967, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 657, "native_id": "WASL_2004_8_17", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1159175634384155, "incorrect_loss_raw": 1.5029109716415405, "correct_loss_per_char": 0.5579587817192078, "incorrect_loss_per_char": 0.7514554858207703, "correct_loss_per_token": 1.1159175634384155, "incorrect_loss_per_token": 1.5029109716415405, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5470564365386963, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": false, "logits_per_token": -1.5470564365386963, "logits_per_char": -0.7735282182693481, "num_chars": 2}, {"sum_logits": -1.546813726425171, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": false, "logits_per_token": -1.546813726425171, "logits_per_char": -0.7734068632125854, "num_chars": 2}, {"sum_logits": -1.4148627519607544, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": false, "logits_per_token": -1.4148627519607544, "logits_per_char": -0.7074313759803772, "num_chars": 2}, {"sum_logits": -1.1159175634384155, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": true, "logits_per_token": -1.1159175634384155, "logits_per_char": -0.5579587817192078, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 658, "native_id": "Mercury_414365", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3776686191558838, "incorrect_loss_raw": 1.4010246992111206, "correct_loss_per_char": 0.6888343095779419, "incorrect_loss_per_char": 0.7005123496055603, "correct_loss_per_token": 1.3776686191558838, "incorrect_loss_per_token": 1.4010246992111206, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5491863489151, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.5491863489151, "logits_per_char": -0.77459317445755, "num_chars": 2}, {"sum_logits": -1.3788081407546997, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3788081407546997, "logits_per_char": -0.6894040703773499, "num_chars": 2}, {"sum_logits": -1.3776686191558838, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3776686191558838, "logits_per_char": -0.6888343095779419, "num_chars": 2}, {"sum_logits": -1.275079607963562, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.275079607963562, "logits_per_char": -0.637539803981781, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 659, "native_id": "Mercury_SC_415406", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3480559587478638, "incorrect_loss_raw": 1.428856094678243, "correct_loss_per_char": 0.6740279793739319, "incorrect_loss_per_char": 0.7144280473391215, "correct_loss_per_token": 1.3480559587478638, "incorrect_loss_per_token": 1.428856094678243, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1828769445419312, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.1828769445419312, "logits_per_char": -0.5914384722709656, "num_chars": 2}, {"sum_logits": -1.3480559587478638, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3480559587478638, "logits_per_char": -0.6740279793739319, "num_chars": 2}, {"sum_logits": -1.5482597351074219, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.5482597351074219, "logits_per_char": -0.7741298675537109, "num_chars": 2}, {"sum_logits": -1.555431604385376, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.555431604385376, "logits_per_char": -0.777715802192688, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 660, "native_id": "MCAS_2000_8_29", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3277995586395264, "incorrect_loss_raw": 1.4144494533538818, "correct_loss_per_char": 0.6638997793197632, "incorrect_loss_per_char": 0.7072247266769409, "correct_loss_per_token": 1.3277995586395264, "incorrect_loss_per_token": 1.4144494533538818, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3361042737960815, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.3361042737960815, "logits_per_char": -0.6680521368980408, "num_chars": 2}, {"sum_logits": -1.3277995586395264, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": true, "logits_per_token": -1.3277995586395264, "logits_per_char": -0.6638997793197632, "num_chars": 2}, {"sum_logits": -1.4767061471939087, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.4767061471939087, "logits_per_char": -0.7383530735969543, "num_chars": 2}, {"sum_logits": -1.4305379390716553, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.4305379390716553, "logits_per_char": -0.7152689695358276, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 661, "native_id": "Mercury_416230", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4649194478988647, "incorrect_loss_raw": 1.3742459217707317, "correct_loss_per_char": 0.7324597239494324, "incorrect_loss_per_char": 0.6871229608853658, "correct_loss_per_token": 1.4649194478988647, "incorrect_loss_per_token": 1.3742459217707317, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4634231328964233, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.4634231328964233, "logits_per_char": -0.7317115664482117, "num_chars": 2}, {"sum_logits": -1.2194082736968994, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.2194082736968994, "logits_per_char": -0.6097041368484497, "num_chars": 2}, {"sum_logits": -1.4649194478988647, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.4649194478988647, "logits_per_char": -0.7324597239494324, "num_chars": 2}, {"sum_logits": -1.439906358718872, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.439906358718872, "logits_per_char": -0.719953179359436, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 662, "native_id": "Mercury_7001295", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4216418266296387, "incorrect_loss_raw": 1.3824643691380818, "correct_loss_per_char": 0.7108209133148193, "incorrect_loss_per_char": 0.6912321845690409, "correct_loss_per_token": 1.4216418266296387, "incorrect_loss_per_token": 1.3824643691380818, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2794758081436157, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2794758081436157, "logits_per_char": -0.6397379040718079, "num_chars": 2}, {"sum_logits": -1.4459285736083984, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4459285736083984, "logits_per_char": -0.7229642868041992, "num_chars": 2}, {"sum_logits": -1.4216418266296387, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4216418266296387, "logits_per_char": -0.7108209133148193, "num_chars": 2}, {"sum_logits": -1.4219887256622314, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4219887256622314, "logits_per_char": -0.7109943628311157, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 663, "native_id": "MSA_2012_5_2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5077043771743774, "incorrect_loss_raw": 1.3549638589223225, "correct_loss_per_char": 0.7538521885871887, "incorrect_loss_per_char": 0.6774819294611613, "correct_loss_per_token": 1.5077043771743774, "incorrect_loss_per_token": 1.3549638589223225, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5077043771743774, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5077043771743774, "logits_per_char": -0.7538521885871887, "num_chars": 2}, {"sum_logits": -1.391753911972046, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.391753911972046, "logits_per_char": -0.695876955986023, "num_chars": 2}, {"sum_logits": -1.3933062553405762, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3933062553405762, "logits_per_char": -0.6966531276702881, "num_chars": 2}, {"sum_logits": -1.2798314094543457, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2798314094543457, "logits_per_char": -0.6399157047271729, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 664, "native_id": "MCAS_2005_8_7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4263867139816284, "incorrect_loss_raw": 1.3792231877644856, "correct_loss_per_char": 0.7131933569908142, "incorrect_loss_per_char": 0.6896115938822428, "correct_loss_per_token": 1.4263867139816284, "incorrect_loss_per_token": 1.3792231877644856, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3403853178024292, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.3403853178024292, "logits_per_char": -0.6701926589012146, "num_chars": 2}, {"sum_logits": -1.4452857971191406, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4452857971191406, "logits_per_char": -0.7226428985595703, "num_chars": 2}, {"sum_logits": -1.4263867139816284, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4263867139816284, "logits_per_char": -0.7131933569908142, "num_chars": 2}, {"sum_logits": -1.3519984483718872, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3519984483718872, "logits_per_char": -0.6759992241859436, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 665, "native_id": "Mercury_7206553", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4305341243743896, "incorrect_loss_raw": 1.3787930806477864, "correct_loss_per_char": 0.7152670621871948, "incorrect_loss_per_char": 0.6893965403238932, "correct_loss_per_token": 1.4305341243743896, "incorrect_loss_per_token": 1.3787930806477864, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.392171859741211, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.392171859741211, "logits_per_char": -0.6960859298706055, "num_chars": 2}, {"sum_logits": -1.3264906406402588, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.3264906406402588, "logits_per_char": -0.6632453203201294, "num_chars": 2}, {"sum_logits": -1.4177167415618896, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4177167415618896, "logits_per_char": -0.7088583707809448, "num_chars": 2}, {"sum_logits": -1.4305341243743896, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4305341243743896, "logits_per_char": -0.7152670621871948, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 666, "native_id": "VASoL_2010_3_39", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2933216094970703, "incorrect_loss_raw": 1.430605371793111, "correct_loss_per_char": 0.6466608047485352, "incorrect_loss_per_char": 0.7153026858965555, "correct_loss_per_token": 1.2933216094970703, "incorrect_loss_per_token": 1.430605371793111, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2933216094970703, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.2933216094970703, "logits_per_char": -0.6466608047485352, "num_chars": 2}, {"sum_logits": -1.3537949323654175, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3537949323654175, "logits_per_char": -0.6768974661827087, "num_chars": 2}, {"sum_logits": -1.5717183351516724, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.5717183351516724, "logits_per_char": -0.7858591675758362, "num_chars": 2}, {"sum_logits": -1.3663028478622437, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3663028478622437, "logits_per_char": -0.6831514239311218, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 667, "native_id": "Mercury_416380", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6615290641784668, "incorrect_loss_raw": 1.3248107433319092, "correct_loss_per_char": 0.8307645320892334, "incorrect_loss_per_char": 0.6624053716659546, "correct_loss_per_token": 1.6615290641784668, "incorrect_loss_per_token": 1.3248107433319092, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6615290641784668, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.6615290641784668, "logits_per_char": -0.8307645320892334, "num_chars": 2}, {"sum_logits": -1.3364977836608887, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.3364977836608887, "logits_per_char": -0.6682488918304443, "num_chars": 2}, {"sum_logits": -1.4859042167663574, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.4859042167663574, "logits_per_char": -0.7429521083831787, "num_chars": 2}, {"sum_logits": -1.1520302295684814, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.1520302295684814, "logits_per_char": -0.5760151147842407, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 668, "native_id": "OHAT_2008_5_34", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4229934215545654, "incorrect_loss_raw": 1.3822156985600789, "correct_loss_per_char": 0.7114967107772827, "incorrect_loss_per_char": 0.6911078492800394, "correct_loss_per_token": 1.4229934215545654, "incorrect_loss_per_token": 1.3822156985600789, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4913853406906128, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4913853406906128, "logits_per_char": -0.7456926703453064, "num_chars": 2}, {"sum_logits": -1.3067564964294434, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.3067564964294434, "logits_per_char": -0.6533782482147217, "num_chars": 2}, {"sum_logits": -1.4229934215545654, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4229934215545654, "logits_per_char": -0.7114967107772827, "num_chars": 2}, {"sum_logits": -1.3485052585601807, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3485052585601807, "logits_per_char": -0.6742526292800903, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 669, "native_id": "Mercury_7268328", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3495358228683472, "incorrect_loss_raw": 1.4183499415715535, "correct_loss_per_char": 0.6747679114341736, "incorrect_loss_per_char": 0.7091749707857767, "correct_loss_per_token": 1.3495358228683472, "incorrect_loss_per_token": 1.4183499415715535, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5359052419662476, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.5359052419662476, "logits_per_char": -0.7679526209831238, "num_chars": 2}, {"sum_logits": -1.5354421138763428, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.5354421138763428, "logits_per_char": -0.7677210569381714, "num_chars": 2}, {"sum_logits": -1.3495358228683472, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3495358228683472, "logits_per_char": -0.6747679114341736, "num_chars": 2}, {"sum_logits": -1.1837024688720703, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.1837024688720703, "logits_per_char": -0.5918512344360352, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 670, "native_id": "NYSEDREGENTS_2008_8_36", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5021274089813232, "incorrect_loss_raw": 1.3588454325993855, "correct_loss_per_char": 0.7510637044906616, "incorrect_loss_per_char": 0.6794227162996928, "correct_loss_per_token": 1.5021274089813232, "incorrect_loss_per_token": 1.3588454325993855, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.249236822128296, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.249236822128296, "logits_per_char": -0.624618411064148, "num_chars": 2}, {"sum_logits": -1.4660698175430298, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4660698175430298, "logits_per_char": -0.7330349087715149, "num_chars": 2}, {"sum_logits": -1.5021274089813232, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.5021274089813232, "logits_per_char": -0.7510637044906616, "num_chars": 2}, {"sum_logits": -1.361229658126831, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.361229658126831, "logits_per_char": -0.6806148290634155, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 671, "native_id": "Mercury_SC_414156", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4561372995376587, "incorrect_loss_raw": 1.3701352675755818, "correct_loss_per_char": 0.7280686497688293, "incorrect_loss_per_char": 0.6850676337877909, "correct_loss_per_token": 1.4561372995376587, "incorrect_loss_per_token": 1.3701352675755818, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3801846504211426, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.3801846504211426, "logits_per_char": -0.6900923252105713, "num_chars": 2}, {"sum_logits": -1.3179943561553955, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": true, "logits_per_token": -1.3179943561553955, "logits_per_char": -0.6589971780776978, "num_chars": 2}, {"sum_logits": -1.4561372995376587, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.4561372995376587, "logits_per_char": -0.7280686497688293, "num_chars": 2}, {"sum_logits": -1.4122267961502075, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.4122267961502075, "logits_per_char": -0.7061133980751038, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 672, "native_id": "Mercury_7094133", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3512341976165771, "incorrect_loss_raw": 1.4040800333023071, "correct_loss_per_char": 0.6756170988082886, "incorrect_loss_per_char": 0.7020400166511536, "correct_loss_per_token": 1.3512341976165771, "incorrect_loss_per_token": 1.4040800333023071, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4306758642196655, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4306758642196655, "logits_per_char": -0.7153379321098328, "num_chars": 2}, {"sum_logits": -1.4070570468902588, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4070570468902588, "logits_per_char": -0.7035285234451294, "num_chars": 2}, {"sum_logits": -1.374507188796997, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.374507188796997, "logits_per_char": -0.6872535943984985, "num_chars": 2}, {"sum_logits": -1.3512341976165771, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.3512341976165771, "logits_per_char": -0.6756170988082886, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 673, "native_id": "MEA_2013_5_15", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4113050699234009, "incorrect_loss_raw": 1.416391928990682, "correct_loss_per_char": 0.7056525349617004, "incorrect_loss_per_char": 0.708195964495341, "correct_loss_per_token": 1.4113050699234009, "incorrect_loss_per_token": 1.416391928990682, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3872233629226685, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.3872233629226685, "logits_per_char": -0.6936116814613342, "num_chars": 2}, {"sum_logits": -1.7416561841964722, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.7416561841964722, "logits_per_char": -0.8708280920982361, "num_chars": 2}, {"sum_logits": -1.4113050699234009, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4113050699234009, "logits_per_char": -0.7056525349617004, "num_chars": 2}, {"sum_logits": -1.1202962398529053, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.1202962398529053, "logits_per_char": -0.5601481199264526, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 674, "native_id": "OHAT_2010_8_35", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.9934021234512329, "incorrect_loss_raw": 1.6854971647262573, "correct_loss_per_char": 0.49670106172561646, "incorrect_loss_per_char": 0.8427485823631287, "correct_loss_per_token": 0.9934021234512329, "incorrect_loss_per_token": 1.6854971647262573, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9934021234512329, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -0.9934021234512329, "logits_per_char": -0.49670106172561646, "num_chars": 2}, {"sum_logits": -1.4058369398117065, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4058369398117065, "logits_per_char": -0.7029184699058533, "num_chars": 2}, {"sum_logits": -1.5244863033294678, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5244863033294678, "logits_per_char": -0.7622431516647339, "num_chars": 2}, {"sum_logits": -2.1261682510375977, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -2.1261682510375977, "logits_per_char": -1.0630841255187988, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 675, "native_id": "Mercury_SC_416174", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4407939910888672, "incorrect_loss_raw": 1.3781035741170247, "correct_loss_per_char": 0.7203969955444336, "incorrect_loss_per_char": 0.6890517870585123, "correct_loss_per_token": 1.4407939910888672, "incorrect_loss_per_token": 1.3781035741170247, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3275400400161743, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.3275400400161743, "logits_per_char": -0.6637700200080872, "num_chars": 2}, {"sum_logits": -1.3959760665893555, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3959760665893555, "logits_per_char": -0.6979880332946777, "num_chars": 2}, {"sum_logits": -1.4407939910888672, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4407939910888672, "logits_per_char": -0.7203969955444336, "num_chars": 2}, {"sum_logits": -1.4107946157455444, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4107946157455444, "logits_per_char": -0.7053973078727722, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 676, "native_id": "TIMSS_1995_8_J6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.398673415184021, "incorrect_loss_raw": 1.3875184456507366, "correct_loss_per_char": 0.6993367075920105, "incorrect_loss_per_char": 0.6937592228253683, "correct_loss_per_token": 1.398673415184021, "incorrect_loss_per_token": 1.3875184456507366, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3409783840179443, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.3409783840179443, "logits_per_char": -0.6704891920089722, "num_chars": 2}, {"sum_logits": -1.398673415184021, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.398673415184021, "logits_per_char": -0.6993367075920105, "num_chars": 2}, {"sum_logits": -1.4008398056030273, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4008398056030273, "logits_per_char": -0.7004199028015137, "num_chars": 2}, {"sum_logits": -1.4207371473312378, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4207371473312378, "logits_per_char": -0.7103685736656189, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 677, "native_id": "Mercury_SC_401587", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.557716965675354, "incorrect_loss_raw": 1.343439261118571, "correct_loss_per_char": 0.778858482837677, "incorrect_loss_per_char": 0.6717196305592855, "correct_loss_per_token": 1.557716965675354, "incorrect_loss_per_token": 1.343439261118571, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.557716965675354, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.557716965675354, "logits_per_char": -0.778858482837677, "num_chars": 2}, {"sum_logits": -1.3430973291397095, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3430973291397095, "logits_per_char": -0.6715486645698547, "num_chars": 2}, {"sum_logits": -1.438025712966919, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.438025712966919, "logits_per_char": -0.7190128564834595, "num_chars": 2}, {"sum_logits": -1.2491947412490845, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2491947412490845, "logits_per_char": -0.6245973706245422, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 678, "native_id": "MDSA_2011_5_23", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1597909927368164, "incorrect_loss_raw": 1.480901837348938, "correct_loss_per_char": 0.5798954963684082, "incorrect_loss_per_char": 0.740450918674469, "correct_loss_per_token": 1.1597909927368164, "incorrect_loss_per_token": 1.480901837348938, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.510276198387146, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.510276198387146, "logits_per_char": -0.755138099193573, "num_chars": 2}, {"sum_logits": -1.434437870979309, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.434437870979309, "logits_per_char": -0.7172189354896545, "num_chars": 2}, {"sum_logits": -1.4979914426803589, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4979914426803589, "logits_per_char": -0.7489957213401794, "num_chars": 2}, {"sum_logits": -1.1597909927368164, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.1597909927368164, "logits_per_char": -0.5798954963684082, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 679, "native_id": "AIMS_2008_8_11", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.33767569065094, "incorrect_loss_raw": 1.408758282661438, "correct_loss_per_char": 0.66883784532547, "incorrect_loss_per_char": 0.704379141330719, "correct_loss_per_token": 1.33767569065094, "incorrect_loss_per_token": 1.408758282661438, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4091417789459229, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4091417789459229, "logits_per_char": -0.7045708894729614, "num_chars": 2}, {"sum_logits": -1.4088807106018066, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4088807106018066, "logits_per_char": -0.7044403553009033, "num_chars": 2}, {"sum_logits": -1.33767569065094, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.33767569065094, "logits_per_char": -0.66883784532547, "num_chars": 2}, {"sum_logits": -1.4082523584365845, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4082523584365845, "logits_per_char": -0.7041261792182922, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 680, "native_id": "Mercury_7159215", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2103488445281982, "incorrect_loss_raw": 1.4573402802149455, "correct_loss_per_char": 0.6051744222640991, "incorrect_loss_per_char": 0.7286701401074728, "correct_loss_per_token": 1.2103488445281982, "incorrect_loss_per_token": 1.4573402802149455, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.497802972793579, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.497802972793579, "logits_per_char": -0.7489014863967896, "num_chars": 2}, {"sum_logits": -1.4399633407592773, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4399633407592773, "logits_per_char": -0.7199816703796387, "num_chars": 2}, {"sum_logits": -1.43425452709198, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.43425452709198, "logits_per_char": -0.71712726354599, "num_chars": 2}, {"sum_logits": -1.2103488445281982, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.2103488445281982, "logits_per_char": -0.6051744222640991, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 681, "native_id": "MCAS_2006_9_30", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3253090381622314, "incorrect_loss_raw": 1.418844183286031, "correct_loss_per_char": 0.6626545190811157, "incorrect_loss_per_char": 0.7094220916430155, "correct_loss_per_token": 1.3253090381622314, "incorrect_loss_per_token": 1.418844183286031, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4959908723831177, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4959908723831177, "logits_per_char": -0.7479954361915588, "num_chars": 2}, {"sum_logits": -1.3253090381622314, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3253090381622314, "logits_per_char": -0.6626545190811157, "num_chars": 2}, {"sum_logits": -1.4838882684707642, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4838882684707642, "logits_per_char": -0.7419441342353821, "num_chars": 2}, {"sum_logits": -1.2766534090042114, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2766534090042114, "logits_per_char": -0.6383267045021057, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 682, "native_id": "MCAS_1999_4_27", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3705604076385498, "incorrect_loss_raw": 1.4010094006856282, "correct_loss_per_char": 0.6852802038192749, "incorrect_loss_per_char": 0.7005047003428141, "correct_loss_per_token": 1.3705604076385498, "incorrect_loss_per_token": 1.4010094006856282, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3705604076385498, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3705604076385498, "logits_per_char": -0.6852802038192749, "num_chars": 2}, {"sum_logits": -1.3667035102844238, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.3667035102844238, "logits_per_char": -0.6833517551422119, "num_chars": 2}, {"sum_logits": -1.468106985092163, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.468106985092163, "logits_per_char": -0.7340534925460815, "num_chars": 2}, {"sum_logits": -1.3682177066802979, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3682177066802979, "logits_per_char": -0.6841088533401489, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 683, "native_id": "Mercury_7016538", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4486362934112549, "incorrect_loss_raw": 1.3741799195607503, "correct_loss_per_char": 0.7243181467056274, "incorrect_loss_per_char": 0.6870899597803751, "correct_loss_per_token": 1.4486362934112549, "incorrect_loss_per_token": 1.3741799195607503, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.419682502746582, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.419682502746582, "logits_per_char": -0.709841251373291, "num_chars": 2}, {"sum_logits": -1.2900892496109009, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2900892496109009, "logits_per_char": -0.6450446248054504, "num_chars": 2}, {"sum_logits": -1.4486362934112549, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4486362934112549, "logits_per_char": -0.7243181467056274, "num_chars": 2}, {"sum_logits": -1.412768006324768, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.412768006324768, "logits_per_char": -0.706384003162384, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 684, "native_id": "Mercury_SC_409266", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2997205257415771, "incorrect_loss_raw": 1.4236843983332317, "correct_loss_per_char": 0.6498602628707886, "incorrect_loss_per_char": 0.7118421991666158, "correct_loss_per_token": 1.2997205257415771, "incorrect_loss_per_token": 1.4236843983332317, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2997205257415771, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.2997205257415771, "logits_per_char": -0.6498602628707886, "num_chars": 2}, {"sum_logits": -1.4609808921813965, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4609808921813965, "logits_per_char": -0.7304904460906982, "num_chars": 2}, {"sum_logits": -1.4300463199615479, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4300463199615479, "logits_per_char": -0.7150231599807739, "num_chars": 2}, {"sum_logits": -1.3800259828567505, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3800259828567505, "logits_per_char": -0.6900129914283752, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 685, "native_id": "OHAT_2007_5_15", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4318480491638184, "incorrect_loss_raw": 1.3886754512786865, "correct_loss_per_char": 0.7159240245819092, "incorrect_loss_per_char": 0.6943377256393433, "correct_loss_per_token": 1.4318480491638184, "incorrect_loss_per_token": 1.3886754512786865, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2728588581085205, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.2728588581085205, "logits_per_char": -0.6364294290542603, "num_chars": 2}, {"sum_logits": -1.4318480491638184, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4318480491638184, "logits_per_char": -0.7159240245819092, "num_chars": 2}, {"sum_logits": -1.5873775482177734, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.5873775482177734, "logits_per_char": -0.7936887741088867, "num_chars": 2}, {"sum_logits": -1.3057899475097656, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3057899475097656, "logits_per_char": -0.6528949737548828, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 686, "native_id": "Mercury_7230073", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3786431550979614, "incorrect_loss_raw": 1.408929983774821, "correct_loss_per_char": 0.6893215775489807, "incorrect_loss_per_char": 0.7044649918874105, "correct_loss_per_token": 1.3786431550979614, "incorrect_loss_per_token": 1.408929983774821, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5177667140960693, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.5177667140960693, "logits_per_char": -0.7588833570480347, "num_chars": 2}, {"sum_logits": -1.3786431550979614, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3786431550979614, "logits_per_char": -0.6893215775489807, "num_chars": 2}, {"sum_logits": -1.5362224578857422, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.5362224578857422, "logits_per_char": -0.7681112289428711, "num_chars": 2}, {"sum_logits": -1.1728007793426514, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.1728007793426514, "logits_per_char": -0.5864003896713257, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 687, "native_id": "Mercury_7245840", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3712149858474731, "incorrect_loss_raw": 1.398871898651123, "correct_loss_per_char": 0.6856074929237366, "incorrect_loss_per_char": 0.6994359493255615, "correct_loss_per_token": 1.3712149858474731, "incorrect_loss_per_token": 1.398871898651123, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3911101818084717, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3911101818084717, "logits_per_char": -0.6955550909042358, "num_chars": 2}, {"sum_logits": -1.3712149858474731, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3712149858474731, "logits_per_char": -0.6856074929237366, "num_chars": 2}, {"sum_logits": -1.448991060256958, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.448991060256958, "logits_per_char": -0.724495530128479, "num_chars": 2}, {"sum_logits": -1.3565144538879395, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.3565144538879395, "logits_per_char": -0.6782572269439697, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 688, "native_id": "Mercury_SC_401788", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.312773585319519, "incorrect_loss_raw": 1.4532562494277954, "correct_loss_per_char": 0.6563867926597595, "incorrect_loss_per_char": 0.7266281247138977, "correct_loss_per_token": 1.312773585319519, "incorrect_loss_per_token": 1.4532562494277954, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1014209985733032, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.1014209985733032, "logits_per_char": -0.5507104992866516, "num_chars": 2}, {"sum_logits": -1.312773585319519, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.312773585319519, "logits_per_char": -0.6563867926597595, "num_chars": 2}, {"sum_logits": -1.669772744178772, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.669772744178772, "logits_per_char": -0.834886372089386, "num_chars": 2}, {"sum_logits": -1.588575005531311, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.588575005531311, "logits_per_char": -0.7942875027656555, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 689, "native_id": "ACTAAP_2014_7_5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5400562286376953, "incorrect_loss_raw": 1.350112517674764, "correct_loss_per_char": 0.7700281143188477, "incorrect_loss_per_char": 0.675056258837382, "correct_loss_per_token": 1.5400562286376953, "incorrect_loss_per_token": 1.350112517674764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2922570705413818, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2922570705413818, "logits_per_char": -0.6461285352706909, "num_chars": 2}, {"sum_logits": -1.3349255323410034, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3349255323410034, "logits_per_char": -0.6674627661705017, "num_chars": 2}, {"sum_logits": -1.4231549501419067, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4231549501419067, "logits_per_char": -0.7115774750709534, "num_chars": 2}, {"sum_logits": -1.5400562286376953, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5400562286376953, "logits_per_char": -0.7700281143188477, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 690, "native_id": "MCAS_2004_5_11", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2575688362121582, "incorrect_loss_raw": 1.4385484059651692, "correct_loss_per_char": 0.6287844181060791, "incorrect_loss_per_char": 0.7192742029825846, "correct_loss_per_token": 1.2575688362121582, "incorrect_loss_per_token": 1.4385484059651692, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3905469179153442, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3905469179153442, "logits_per_char": -0.6952734589576721, "num_chars": 2}, {"sum_logits": -1.4372872114181519, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4372872114181519, "logits_per_char": -0.7186436057090759, "num_chars": 2}, {"sum_logits": -1.4878110885620117, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4878110885620117, "logits_per_char": -0.7439055442810059, "num_chars": 2}, {"sum_logits": -1.2575688362121582, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.2575688362121582, "logits_per_char": -0.6287844181060791, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 691, "native_id": "NCEOGA_2013_8_7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4318106174468994, "incorrect_loss_raw": 1.3813763459523518, "correct_loss_per_char": 0.7159053087234497, "incorrect_loss_per_char": 0.6906881729761759, "correct_loss_per_token": 1.4318106174468994, "incorrect_loss_per_token": 1.3813763459523518, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4318106174468994, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4318106174468994, "logits_per_char": -0.7159053087234497, "num_chars": 2}, {"sum_logits": -1.2952123880386353, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.2952123880386353, "logits_per_char": -0.6476061940193176, "num_chars": 2}, {"sum_logits": -1.5060806274414062, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5060806274414062, "logits_per_char": -0.7530403137207031, "num_chars": 2}, {"sum_logits": -1.3428360223770142, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3428360223770142, "logits_per_char": -0.6714180111885071, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 692, "native_id": "LEAP__7_10339", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3805958032608032, "incorrect_loss_raw": 1.4078432321548462, "correct_loss_per_char": 0.6902979016304016, "incorrect_loss_per_char": 0.7039216160774231, "correct_loss_per_token": 1.3805958032608032, "incorrect_loss_per_token": 1.4078432321548462, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3805958032608032, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.3805958032608032, "logits_per_char": -0.6902979016304016, "num_chars": 2}, {"sum_logits": -1.1734923124313354, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": true, "logits_per_token": -1.1734923124313354, "logits_per_char": -0.5867461562156677, "num_chars": 2}, {"sum_logits": -1.5384881496429443, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.5384881496429443, "logits_per_char": -0.7692440748214722, "num_chars": 2}, {"sum_logits": -1.5115492343902588, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.5115492343902588, "logits_per_char": -0.7557746171951294, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 693, "native_id": "Mercury_7018270", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4657286405563354, "incorrect_loss_raw": 1.3683054049809773, "correct_loss_per_char": 0.7328643202781677, "incorrect_loss_per_char": 0.6841527024904887, "correct_loss_per_token": 1.4657286405563354, "incorrect_loss_per_token": 1.3683054049809773, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2920385599136353, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2920385599136353, "logits_per_char": -0.6460192799568176, "num_chars": 2}, {"sum_logits": -1.3608858585357666, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3608858585357666, "logits_per_char": -0.6804429292678833, "num_chars": 2}, {"sum_logits": -1.4657286405563354, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4657286405563354, "logits_per_char": -0.7328643202781677, "num_chars": 2}, {"sum_logits": -1.4519917964935303, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4519917964935303, "logits_per_char": -0.7259958982467651, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 694, "native_id": "Mercury_7034808", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.427156925201416, "incorrect_loss_raw": 1.3784886598587036, "correct_loss_per_char": 0.713578462600708, "incorrect_loss_per_char": 0.6892443299293518, "correct_loss_per_token": 1.427156925201416, "incorrect_loss_per_token": 1.3784886598587036, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.427156925201416, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.427156925201416, "logits_per_char": -0.713578462600708, "num_chars": 2}, {"sum_logits": -1.3291807174682617, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.3291807174682617, "logits_per_char": -0.6645903587341309, "num_chars": 2}, {"sum_logits": -1.3811149597167969, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3811149597167969, "logits_per_char": -0.6905574798583984, "num_chars": 2}, {"sum_logits": -1.4251703023910522, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4251703023910522, "logits_per_char": -0.7125851511955261, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 695, "native_id": "Mercury_7216300", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5285528898239136, "incorrect_loss_raw": 1.36124054590861, "correct_loss_per_char": 0.7642764449119568, "incorrect_loss_per_char": 0.680620272954305, "correct_loss_per_token": 1.5285528898239136, "incorrect_loss_per_token": 1.36124054590861, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1543910503387451, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.1543910503387451, "logits_per_char": -0.5771955251693726, "num_chars": 2}, {"sum_logits": -1.4325411319732666, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4325411319732666, "logits_per_char": -0.7162705659866333, "num_chars": 2}, {"sum_logits": -1.5285528898239136, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5285528898239136, "logits_per_char": -0.7642764449119568, "num_chars": 2}, {"sum_logits": -1.4967894554138184, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4967894554138184, "logits_per_char": -0.7483947277069092, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 696, "native_id": "Mercury_SC_400985", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4368772506713867, "incorrect_loss_raw": 1.378583510716756, "correct_loss_per_char": 0.7184386253356934, "incorrect_loss_per_char": 0.689291755358378, "correct_loss_per_token": 1.4368772506713867, "incorrect_loss_per_token": 1.378583510716756, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3991385698318481, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3991385698318481, "logits_per_char": -0.6995692849159241, "num_chars": 2}, {"sum_logits": -1.4435617923736572, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4435617923736572, "logits_per_char": -0.7217808961868286, "num_chars": 2}, {"sum_logits": -1.4368772506713867, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4368772506713867, "logits_per_char": -0.7184386253356934, "num_chars": 2}, {"sum_logits": -1.2930501699447632, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.2930501699447632, "logits_per_char": -0.6465250849723816, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 697, "native_id": "Mercury_7188528", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4697941541671753, "incorrect_loss_raw": 1.3660557667414348, "correct_loss_per_char": 0.7348970770835876, "incorrect_loss_per_char": 0.6830278833707174, "correct_loss_per_token": 1.4697941541671753, "incorrect_loss_per_token": 1.3660557667414348, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4697941541671753, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4697941541671753, "logits_per_char": -0.7348970770835876, "num_chars": 2}, {"sum_logits": -1.3277864456176758, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.3277864456176758, "logits_per_char": -0.6638932228088379, "num_chars": 2}, {"sum_logits": -1.4119160175323486, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4119160175323486, "logits_per_char": -0.7059580087661743, "num_chars": 2}, {"sum_logits": -1.3584648370742798, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3584648370742798, "logits_per_char": -0.6792324185371399, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 698, "native_id": "TIMSS_1995_8_R2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4505876302719116, "incorrect_loss_raw": 1.373268763224284, "correct_loss_per_char": 0.7252938151359558, "incorrect_loss_per_char": 0.686634381612142, "correct_loss_per_token": 1.4505876302719116, "incorrect_loss_per_token": 1.373268763224284, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.445378303527832, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.445378303527832, "logits_per_char": -0.722689151763916, "num_chars": 2}, {"sum_logits": -1.4505876302719116, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.4505876302719116, "logits_per_char": -0.7252938151359558, "num_chars": 2}, {"sum_logits": -1.429937481880188, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.429937481880188, "logits_per_char": -0.714968740940094, "num_chars": 2}, {"sum_logits": -1.2444905042648315, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": true, "logits_per_token": -1.2444905042648315, "logits_per_char": -0.6222452521324158, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 699, "native_id": "Mercury_SC_400032", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3468886613845825, "incorrect_loss_raw": 1.4053905804951985, "correct_loss_per_char": 0.6734443306922913, "incorrect_loss_per_char": 0.7026952902475992, "correct_loss_per_token": 1.3468886613845825, "incorrect_loss_per_token": 1.4053905804951985, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3678181171417236, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3678181171417236, "logits_per_char": -0.6839090585708618, "num_chars": 2}, {"sum_logits": -1.4102805852890015, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4102805852890015, "logits_per_char": -0.7051402926445007, "num_chars": 2}, {"sum_logits": -1.3468886613845825, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.3468886613845825, "logits_per_char": -0.6734443306922913, "num_chars": 2}, {"sum_logits": -1.4380730390548706, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4380730390548706, "logits_per_char": -0.7190365195274353, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 700, "native_id": "Mercury_7252245", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3303967714309692, "incorrect_loss_raw": 1.4192235072453816, "correct_loss_per_char": 0.6651983857154846, "incorrect_loss_per_char": 0.7096117536226908, "correct_loss_per_token": 1.3303967714309692, "incorrect_loss_per_token": 1.4192235072453816, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5141003131866455, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.5141003131866455, "logits_per_char": -0.7570501565933228, "num_chars": 2}, {"sum_logits": -1.4752197265625, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4752197265625, "logits_per_char": -0.73760986328125, "num_chars": 2}, {"sum_logits": -1.2683504819869995, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.2683504819869995, "logits_per_char": -0.6341752409934998, "num_chars": 2}, {"sum_logits": -1.3303967714309692, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3303967714309692, "logits_per_char": -0.6651983857154846, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 701, "native_id": "MCAS_2002_8_17", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.339517593383789, "incorrect_loss_raw": 1.4087380568186443, "correct_loss_per_char": 0.6697587966918945, "incorrect_loss_per_char": 0.7043690284093221, "correct_loss_per_token": 1.339517593383789, "incorrect_loss_per_token": 1.4087380568186443, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4389830827713013, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4389830827713013, "logits_per_char": -0.7194915413856506, "num_chars": 2}, {"sum_logits": -1.3671449422836304, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3671449422836304, "logits_per_char": -0.6835724711418152, "num_chars": 2}, {"sum_logits": -1.420086145401001, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.420086145401001, "logits_per_char": -0.7100430727005005, "num_chars": 2}, {"sum_logits": -1.339517593383789, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.339517593383789, "logits_per_char": -0.6697587966918945, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 702, "native_id": "MDSA_2007_8_30", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4562128782272339, "incorrect_loss_raw": 1.3758840958277385, "correct_loss_per_char": 0.7281064391136169, "incorrect_loss_per_char": 0.6879420479138693, "correct_loss_per_token": 1.4562128782272339, "incorrect_loss_per_token": 1.3758840958277385, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4827021360397339, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4827021360397339, "logits_per_char": -0.7413510680198669, "num_chars": 2}, {"sum_logits": -1.4267029762268066, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4267029762268066, "logits_per_char": -0.7133514881134033, "num_chars": 2}, {"sum_logits": -1.4562128782272339, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4562128782272339, "logits_per_char": -0.7281064391136169, "num_chars": 2}, {"sum_logits": -1.2182471752166748, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2182471752166748, "logits_per_char": -0.6091235876083374, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 703, "native_id": "NCEOGA_2013_5_35", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.286520004272461, "incorrect_loss_raw": 1.4270302454630535, "correct_loss_per_char": 0.6432600021362305, "incorrect_loss_per_char": 0.7135151227315267, "correct_loss_per_token": 1.286520004272461, "incorrect_loss_per_token": 1.4270302454630535, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.459503173828125, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.459503173828125, "logits_per_char": -0.7297515869140625, "num_chars": 2}, {"sum_logits": -1.3910919427871704, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3910919427871704, "logits_per_char": -0.6955459713935852, "num_chars": 2}, {"sum_logits": -1.4304956197738647, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4304956197738647, "logits_per_char": -0.7152478098869324, "num_chars": 2}, {"sum_logits": -1.286520004272461, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.286520004272461, "logits_per_char": -0.6432600021362305, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 704, "native_id": "Mercury_7082758", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.373469591140747, "incorrect_loss_raw": 1.4032222032546997, "correct_loss_per_char": 0.6867347955703735, "incorrect_loss_per_char": 0.7016111016273499, "correct_loss_per_token": 1.373469591140747, "incorrect_loss_per_token": 1.4032222032546997, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2473390102386475, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.2473390102386475, "logits_per_char": -0.6236695051193237, "num_chars": 2}, {"sum_logits": -1.373469591140747, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.373469591140747, "logits_per_char": -0.6867347955703735, "num_chars": 2}, {"sum_logits": -1.4485301971435547, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4485301971435547, "logits_per_char": -0.7242650985717773, "num_chars": 2}, {"sum_logits": -1.513797402381897, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.513797402381897, "logits_per_char": -0.7568987011909485, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 705, "native_id": "Mercury_7094308", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4912041425704956, "incorrect_loss_raw": 1.361786961555481, "correct_loss_per_char": 0.7456020712852478, "incorrect_loss_per_char": 0.6808934807777405, "correct_loss_per_token": 1.4912041425704956, "incorrect_loss_per_token": 1.361786961555481, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3218655586242676, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": true, "logits_per_token": -1.3218655586242676, "logits_per_char": -0.6609327793121338, "num_chars": 2}, {"sum_logits": -1.4023746252059937, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.4023746252059937, "logits_per_char": -0.7011873126029968, "num_chars": 2}, {"sum_logits": -1.4912041425704956, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.4912041425704956, "logits_per_char": -0.7456020712852478, "num_chars": 2}, {"sum_logits": -1.3611207008361816, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.3611207008361816, "logits_per_char": -0.6805603504180908, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 706, "native_id": "Mercury_7136028", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.468705177307129, "incorrect_loss_raw": 1.3685239950815837, "correct_loss_per_char": 0.7343525886535645, "incorrect_loss_per_char": 0.6842619975407919, "correct_loss_per_token": 1.468705177307129, "incorrect_loss_per_token": 1.3685239950815837, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.468705177307129, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.468705177307129, "logits_per_char": -0.7343525886535645, "num_chars": 2}, {"sum_logits": -1.4511545896530151, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4511545896530151, "logits_per_char": -0.7255772948265076, "num_chars": 2}, {"sum_logits": -1.4110852479934692, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4110852479934692, "logits_per_char": -0.7055426239967346, "num_chars": 2}, {"sum_logits": -1.2433321475982666, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.2433321475982666, "logits_per_char": -0.6216660737991333, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 707, "native_id": "Mercury_7159075", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3737752437591553, "incorrect_loss_raw": 1.404400904973348, "correct_loss_per_char": 0.6868876218795776, "incorrect_loss_per_char": 0.702200452486674, "correct_loss_per_token": 1.3737752437591553, "incorrect_loss_per_token": 1.404400904973348, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.508840560913086, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.508840560913086, "logits_per_char": -0.754420280456543, "num_chars": 2}, {"sum_logits": -1.3737752437591553, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.3737752437591553, "logits_per_char": -0.6868876218795776, "num_chars": 2}, {"sum_logits": -1.4594478607177734, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.4594478607177734, "logits_per_char": -0.7297239303588867, "num_chars": 2}, {"sum_logits": -1.2449142932891846, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": true, "logits_per_token": -1.2449142932891846, "logits_per_char": -0.6224571466445923, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 708, "native_id": "MCAS_2015_5_19", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3513621091842651, "incorrect_loss_raw": 1.406162699063619, "correct_loss_per_char": 0.6756810545921326, "incorrect_loss_per_char": 0.7030813495318095, "correct_loss_per_token": 1.3513621091842651, "incorrect_loss_per_token": 1.406162699063619, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4404088258743286, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4404088258743286, "logits_per_char": -0.7202044129371643, "num_chars": 2}, {"sum_logits": -1.3513621091842651, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3513621091842651, "logits_per_char": -0.6756810545921326, "num_chars": 2}, {"sum_logits": -1.4705144166946411, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4705144166946411, "logits_per_char": -0.7352572083473206, "num_chars": 2}, {"sum_logits": -1.3075648546218872, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.3075648546218872, "logits_per_char": -0.6537824273109436, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 709, "native_id": "MSA_2012_5_12", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2624982595443726, "incorrect_loss_raw": 1.4382633368174236, "correct_loss_per_char": 0.6312491297721863, "incorrect_loss_per_char": 0.7191316684087118, "correct_loss_per_token": 1.2624982595443726, "incorrect_loss_per_token": 1.4382633368174236, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2624982595443726, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": true, "logits_per_token": -1.2624982595443726, "logits_per_char": -0.6312491297721863, "num_chars": 2}, {"sum_logits": -1.439914584159851, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.439914584159851, "logits_per_char": -0.7199572920799255, "num_chars": 2}, {"sum_logits": -1.4754959344863892, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.4754959344863892, "logits_per_char": -0.7377479672431946, "num_chars": 2}, {"sum_logits": -1.3993794918060303, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.3993794918060303, "logits_per_char": -0.6996897459030151, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 710, "native_id": "MCAS_2014_5_13", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3841632604599, "incorrect_loss_raw": 1.3939168453216553, "correct_loss_per_char": 0.69208163022995, "incorrect_loss_per_char": 0.6969584226608276, "correct_loss_per_token": 1.3841632604599, "incorrect_loss_per_token": 1.3939168453216553, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4201301336288452, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4201301336288452, "logits_per_char": -0.7100650668144226, "num_chars": 2}, {"sum_logits": -1.3102915287017822, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.3102915287017822, "logits_per_char": -0.6551457643508911, "num_chars": 2}, {"sum_logits": -1.4513288736343384, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4513288736343384, "logits_per_char": -0.7256644368171692, "num_chars": 2}, {"sum_logits": -1.3841632604599, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3841632604599, "logits_per_char": -0.69208163022995, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 711, "native_id": "Mercury_SC_400392", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6658222675323486, "incorrect_loss_raw": 1.381637175877889, "correct_loss_per_char": 0.8329111337661743, "incorrect_loss_per_char": 0.6908185879389445, "correct_loss_per_token": 1.6658222675323486, "incorrect_loss_per_token": 1.381637175877889, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0614473819732666, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.0614473819732666, "logits_per_char": -0.5307236909866333, "num_chars": 2}, {"sum_logits": -1.3104678392410278, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3104678392410278, "logits_per_char": -0.6552339196205139, "num_chars": 2}, {"sum_logits": -1.6658222675323486, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.6658222675323486, "logits_per_char": -0.8329111337661743, "num_chars": 2}, {"sum_logits": -1.7729963064193726, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.7729963064193726, "logits_per_char": -0.8864981532096863, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 712, "native_id": "Mercury_7159320", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2612076997756958, "incorrect_loss_raw": 1.4395986795425415, "correct_loss_per_char": 0.6306038498878479, "incorrect_loss_per_char": 0.7197993397712708, "correct_loss_per_token": 1.2612076997756958, "incorrect_loss_per_token": 1.4395986795425415, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2612076997756958, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.2612076997756958, "logits_per_char": -0.6306038498878479, "num_chars": 2}, {"sum_logits": -1.380192518234253, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.380192518234253, "logits_per_char": -0.6900962591171265, "num_chars": 2}, {"sum_logits": -1.5288939476013184, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5288939476013184, "logits_per_char": -0.7644469738006592, "num_chars": 2}, {"sum_logits": -1.4097095727920532, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4097095727920532, "logits_per_char": -0.7048547863960266, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 713, "native_id": "Mercury_7218365", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3553054332733154, "incorrect_loss_raw": 1.4124233325322468, "correct_loss_per_char": 0.6776527166366577, "incorrect_loss_per_char": 0.7062116662661234, "correct_loss_per_token": 1.3553054332733154, "incorrect_loss_per_token": 1.4124233325322468, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6019285917282104, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.6019285917282104, "logits_per_char": -0.8009642958641052, "num_chars": 2}, {"sum_logits": -1.3553054332733154, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3553054332733154, "logits_per_char": -0.6776527166366577, "num_chars": 2}, {"sum_logits": -1.336563229560852, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.336563229560852, "logits_per_char": -0.668281614780426, "num_chars": 2}, {"sum_logits": -1.2987781763076782, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.2987781763076782, "logits_per_char": -0.6493890881538391, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 714, "native_id": "MCAS_2004_9_10-v1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3762938976287842, "incorrect_loss_raw": 1.4009676376978557, "correct_loss_per_char": 0.6881469488143921, "incorrect_loss_per_char": 0.7004838188489279, "correct_loss_per_token": 1.3762938976287842, "incorrect_loss_per_token": 1.4009676376978557, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4737099409103394, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.4737099409103394, "logits_per_char": -0.7368549704551697, "num_chars": 2}, {"sum_logits": -1.436704397201538, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.436704397201538, "logits_per_char": -0.718352198600769, "num_chars": 2}, {"sum_logits": -1.3762938976287842, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.3762938976287842, "logits_per_char": -0.6881469488143921, "num_chars": 2}, {"sum_logits": -1.2924885749816895, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": true, "logits_per_token": -1.2924885749816895, "logits_per_char": -0.6462442874908447, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 715, "native_id": "AIMS_2009_4_12", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5739353895187378, "incorrect_loss_raw": 1.368047555287679, "correct_loss_per_char": 0.7869676947593689, "incorrect_loss_per_char": 0.6840237776438395, "correct_loss_per_token": 1.5739353895187378, "incorrect_loss_per_token": 1.368047555287679, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0794639587402344, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.0794639587402344, "logits_per_char": -0.5397319793701172, "num_chars": 2}, {"sum_logits": -1.4013545513153076, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4013545513153076, "logits_per_char": -0.7006772756576538, "num_chars": 2}, {"sum_logits": -1.6233241558074951, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.6233241558074951, "logits_per_char": -0.8116620779037476, "num_chars": 2}, {"sum_logits": -1.5739353895187378, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.5739353895187378, "logits_per_char": -0.7869676947593689, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 716, "native_id": "Mercury_SC_414274", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3448607921600342, "incorrect_loss_raw": 1.4074266751607258, "correct_loss_per_char": 0.6724303960800171, "incorrect_loss_per_char": 0.7037133375803629, "correct_loss_per_token": 1.3448607921600342, "incorrect_loss_per_token": 1.4074266751607258, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4372010231018066, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4372010231018066, "logits_per_char": -0.7186005115509033, "num_chars": 2}, {"sum_logits": -1.3448607921600342, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.3448607921600342, "logits_per_char": -0.6724303960800171, "num_chars": 2}, {"sum_logits": -1.3991025686264038, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3991025686264038, "logits_per_char": -0.6995512843132019, "num_chars": 2}, {"sum_logits": -1.3859764337539673, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3859764337539673, "logits_per_char": -0.6929882168769836, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 717, "native_id": "MCAS_2005_9_6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3933804035186768, "incorrect_loss_raw": 1.3905680179595947, "correct_loss_per_char": 0.6966902017593384, "incorrect_loss_per_char": 0.6952840089797974, "correct_loss_per_token": 1.3933804035186768, "incorrect_loss_per_token": 1.3905680179595947, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3790721893310547, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3790721893310547, "logits_per_char": -0.6895360946655273, "num_chars": 2}, {"sum_logits": -1.3332428932189941, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.3332428932189941, "logits_per_char": -0.6666214466094971, "num_chars": 2}, {"sum_logits": -1.3933804035186768, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3933804035186768, "logits_per_char": -0.6966902017593384, "num_chars": 2}, {"sum_logits": -1.4593889713287354, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4593889713287354, "logits_per_char": -0.7296944856643677, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 718, "native_id": "MCAS_1998_4_23", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5171725749969482, "incorrect_loss_raw": 1.366064230600993, "correct_loss_per_char": 0.7585862874984741, "incorrect_loss_per_char": 0.6830321153004965, "correct_loss_per_token": 1.5171725749969482, "incorrect_loss_per_token": 1.366064230600993, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5171725749969482, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.5171725749969482, "logits_per_char": -0.7585862874984741, "num_chars": 2}, {"sum_logits": -1.5416648387908936, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.5416648387908936, "logits_per_char": -0.7708324193954468, "num_chars": 2}, {"sum_logits": -1.4269441366195679, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.4269441366195679, "logits_per_char": -0.7134720683097839, "num_chars": 2}, {"sum_logits": -1.129583716392517, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -1.129583716392517, "logits_per_char": -0.5647918581962585, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 719, "native_id": "Mercury_7075023", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5051873922348022, "incorrect_loss_raw": 1.3598272800445557, "correct_loss_per_char": 0.7525936961174011, "incorrect_loss_per_char": 0.6799136400222778, "correct_loss_per_token": 1.5051873922348022, "incorrect_loss_per_token": 1.3598272800445557, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2552390098571777, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.2552390098571777, "logits_per_char": -0.6276195049285889, "num_chars": 2}, {"sum_logits": -1.361759066581726, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.361759066581726, "logits_per_char": -0.680879533290863, "num_chars": 2}, {"sum_logits": -1.4624837636947632, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4624837636947632, "logits_per_char": -0.7312418818473816, "num_chars": 2}, {"sum_logits": -1.5051873922348022, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5051873922348022, "logits_per_char": -0.7525936961174011, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 720, "native_id": "Mercury_SC_400182", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.422805905342102, "incorrect_loss_raw": 1.3837107419967651, "correct_loss_per_char": 0.711402952671051, "incorrect_loss_per_char": 0.6918553709983826, "correct_loss_per_token": 1.422805905342102, "incorrect_loss_per_token": 1.3837107419967651, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2993067502975464, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.2993067502975464, "logits_per_char": -0.6496533751487732, "num_chars": 2}, {"sum_logits": -1.422805905342102, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.422805905342102, "logits_per_char": -0.711402952671051, "num_chars": 2}, {"sum_logits": -1.385013222694397, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.385013222694397, "logits_per_char": -0.6925066113471985, "num_chars": 2}, {"sum_logits": -1.466812252998352, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.466812252998352, "logits_per_char": -0.733406126499176, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 721, "native_id": "Mercury_SC_400133", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.447709560394287, "incorrect_loss_raw": 1.3751270771026611, "correct_loss_per_char": 0.7238547801971436, "incorrect_loss_per_char": 0.6875635385513306, "correct_loss_per_token": 1.447709560394287, "incorrect_loss_per_token": 1.3751270771026611, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.454757809638977, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.454757809638977, "logits_per_char": -0.7273789048194885, "num_chars": 2}, {"sum_logits": -1.447709560394287, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.447709560394287, "logits_per_char": -0.7238547801971436, "num_chars": 2}, {"sum_logits": -1.4049278497695923, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4049278497695923, "logits_per_char": -0.7024639248847961, "num_chars": 2}, {"sum_logits": -1.265695571899414, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.265695571899414, "logits_per_char": -0.632847785949707, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 722, "native_id": "MSA_2013_5_11", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.360575795173645, "incorrect_loss_raw": 1.4016040166219075, "correct_loss_per_char": 0.6802878975868225, "incorrect_loss_per_char": 0.7008020083109537, "correct_loss_per_token": 1.360575795173645, "incorrect_loss_per_token": 1.4016040166219075, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4327389001846313, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4327389001846313, "logits_per_char": -0.7163694500923157, "num_chars": 2}, {"sum_logits": -1.3233002424240112, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.3233002424240112, "logits_per_char": -0.6616501212120056, "num_chars": 2}, {"sum_logits": -1.44877290725708, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.44877290725708, "logits_per_char": -0.72438645362854, "num_chars": 2}, {"sum_logits": -1.360575795173645, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.360575795173645, "logits_per_char": -0.6802878975868225, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 723, "native_id": "Mercury_SC_408706", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3765137195587158, "incorrect_loss_raw": 1.4083576202392578, "correct_loss_per_char": 0.6882568597793579, "incorrect_loss_per_char": 0.7041788101196289, "correct_loss_per_token": 1.3765137195587158, "incorrect_loss_per_token": 1.4083576202392578, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.585605263710022, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.585605263710022, "logits_per_char": -0.792802631855011, "num_chars": 2}, {"sum_logits": -1.3765137195587158, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.3765137195587158, "logits_per_char": -0.6882568597793579, "num_chars": 2}, {"sum_logits": -1.40020751953125, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.40020751953125, "logits_per_char": -0.700103759765625, "num_chars": 2}, {"sum_logits": -1.2392600774765015, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.2392600774765015, "logits_per_char": -0.6196300387382507, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 724, "native_id": "Mercury_7213325", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4119783639907837, "incorrect_loss_raw": 1.3893234332402546, "correct_loss_per_char": 0.7059891819953918, "incorrect_loss_per_char": 0.6946617166201273, "correct_loss_per_token": 1.4119783639907837, "incorrect_loss_per_token": 1.3893234332402546, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2445223331451416, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.2445223331451416, "logits_per_char": -0.6222611665725708, "num_chars": 2}, {"sum_logits": -1.435804843902588, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.435804843902588, "logits_per_char": -0.717902421951294, "num_chars": 2}, {"sum_logits": -1.4876431226730347, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4876431226730347, "logits_per_char": -0.7438215613365173, "num_chars": 2}, {"sum_logits": -1.4119783639907837, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4119783639907837, "logits_per_char": -0.7059891819953918, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 725, "native_id": "Mercury_SC_LBS10932", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.454712986946106, "incorrect_loss_raw": 1.375527024269104, "correct_loss_per_char": 0.727356493473053, "incorrect_loss_per_char": 0.687763512134552, "correct_loss_per_token": 1.454712986946106, "incorrect_loss_per_token": 1.375527024269104, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4832756519317627, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4832756519317627, "logits_per_char": -0.7416378259658813, "num_chars": 2}, {"sum_logits": -1.454712986946106, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.454712986946106, "logits_per_char": -0.727356493473053, "num_chars": 2}, {"sum_logits": -1.4086748361587524, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4086748361587524, "logits_per_char": -0.7043374180793762, "num_chars": 2}, {"sum_logits": -1.2346305847167969, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.2346305847167969, "logits_per_char": -0.6173152923583984, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 726, "native_id": "Mercury_192220", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4557949304580688, "incorrect_loss_raw": 1.3754075368245442, "correct_loss_per_char": 0.7278974652290344, "incorrect_loss_per_char": 0.6877037684122721, "correct_loss_per_token": 1.4557949304580688, "incorrect_loss_per_token": 1.3754075368245442, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4054034948349, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4054034948349, "logits_per_char": -0.70270174741745, "num_chars": 2}, {"sum_logits": -1.2233809232711792, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2233809232711792, "logits_per_char": -0.6116904616355896, "num_chars": 2}, {"sum_logits": -1.4974381923675537, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4974381923675537, "logits_per_char": -0.7487190961837769, "num_chars": 2}, {"sum_logits": -1.4557949304580688, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4557949304580688, "logits_per_char": -0.7278974652290344, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 727, "native_id": "Mercury_SC_407247", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4408884048461914, "incorrect_loss_raw": 1.3827741940816243, "correct_loss_per_char": 0.7204442024230957, "incorrect_loss_per_char": 0.6913870970408121, "correct_loss_per_token": 1.4408884048461914, "incorrect_loss_per_token": 1.3827741940816243, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5670217275619507, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.5670217275619507, "logits_per_char": -0.7835108637809753, "num_chars": 2}, {"sum_logits": -1.354504942893982, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.354504942893982, "logits_per_char": -0.677252471446991, "num_chars": 2}, {"sum_logits": -1.4408884048461914, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.4408884048461914, "logits_per_char": -0.7204442024230957, "num_chars": 2}, {"sum_logits": -1.2267959117889404, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": true, "logits_per_token": -1.2267959117889404, "logits_per_char": -0.6133979558944702, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 728, "native_id": "Mercury_7024798", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1365785598754883, "incorrect_loss_raw": 1.4946902990341187, "correct_loss_per_char": 0.5682892799377441, "incorrect_loss_per_char": 0.7473451495170593, "correct_loss_per_token": 1.1365785598754883, "incorrect_loss_per_token": 1.4946902990341187, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4581823348999023, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4581823348999023, "logits_per_char": -0.7290911674499512, "num_chars": 2}, {"sum_logits": -1.5980297327041626, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.5980297327041626, "logits_per_char": -0.7990148663520813, "num_chars": 2}, {"sum_logits": -1.427858829498291, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.427858829498291, "logits_per_char": -0.7139294147491455, "num_chars": 2}, {"sum_logits": -1.1365785598754883, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.1365785598754883, "logits_per_char": -0.5682892799377441, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 729, "native_id": "Mercury_7180810", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4620401859283447, "incorrect_loss_raw": 1.3684098323186238, "correct_loss_per_char": 0.7310200929641724, "incorrect_loss_per_char": 0.6842049161593119, "correct_loss_per_token": 1.4620401859283447, "incorrect_loss_per_token": 1.3684098323186238, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.421878457069397, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.421878457069397, "logits_per_char": -0.7109392285346985, "num_chars": 2}, {"sum_logits": -1.3832015991210938, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.3832015991210938, "logits_per_char": -0.6916007995605469, "num_chars": 2}, {"sum_logits": -1.4620401859283447, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.4620401859283447, "logits_per_char": -0.7310200929641724, "num_chars": 2}, {"sum_logits": -1.3001494407653809, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.3001494407653809, "logits_per_char": -0.6500747203826904, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 730, "native_id": "Mercury_412780", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2831532955169678, "incorrect_loss_raw": 1.4306005636850994, "correct_loss_per_char": 0.6415766477584839, "incorrect_loss_per_char": 0.7153002818425497, "correct_loss_per_token": 1.2831532955169678, "incorrect_loss_per_token": 1.4306005636850994, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5061806440353394, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.5061806440353394, "logits_per_char": -0.7530903220176697, "num_chars": 2}, {"sum_logits": -1.2831532955169678, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": true, "logits_per_token": -1.2831532955169678, "logits_per_char": -0.6415766477584839, "num_chars": 2}, {"sum_logits": -1.3988176584243774, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.3988176584243774, "logits_per_char": -0.6994088292121887, "num_chars": 2}, {"sum_logits": -1.386803388595581, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.386803388595581, "logits_per_char": -0.6934016942977905, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 731, "native_id": "LEAP_2011_8_10434", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.416931390762329, "incorrect_loss_raw": 1.3898187478383381, "correct_loss_per_char": 0.7084656953811646, "incorrect_loss_per_char": 0.6949093739191691, "correct_loss_per_token": 1.416931390762329, "incorrect_loss_per_token": 1.3898187478383381, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5104900598526, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.5104900598526, "logits_per_char": -0.7552450299263, "num_chars": 2}, {"sum_logits": -1.416931390762329, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.416931390762329, "logits_per_char": -0.7084656953811646, "num_chars": 2}, {"sum_logits": -1.4452416896820068, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.4452416896820068, "logits_per_char": -0.7226208448410034, "num_chars": 2}, {"sum_logits": -1.2137244939804077, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": true, "logits_per_token": -1.2137244939804077, "logits_per_char": -0.6068622469902039, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 732, "native_id": "Mercury_7200340", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.259087324142456, "incorrect_loss_raw": 1.440625508626302, "correct_loss_per_char": 0.629543662071228, "incorrect_loss_per_char": 0.720312754313151, "correct_loss_per_token": 1.259087324142456, "incorrect_loss_per_token": 1.440625508626302, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3484704494476318, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3484704494476318, "logits_per_char": -0.6742352247238159, "num_chars": 2}, {"sum_logits": -1.499804973602295, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.499804973602295, "logits_per_char": -0.7499024868011475, "num_chars": 2}, {"sum_logits": -1.4736011028289795, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4736011028289795, "logits_per_char": -0.7368005514144897, "num_chars": 2}, {"sum_logits": -1.259087324142456, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.259087324142456, "logits_per_char": -0.629543662071228, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 733, "native_id": "Mercury_7056525", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4332884550094604, "incorrect_loss_raw": 1.3822370767593384, "correct_loss_per_char": 0.7166442275047302, "incorrect_loss_per_char": 0.6911185383796692, "correct_loss_per_token": 1.4332884550094604, "incorrect_loss_per_token": 1.3822370767593384, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4921308755874634, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4921308755874634, "logits_per_char": -0.7460654377937317, "num_chars": 2}, {"sum_logits": -1.4337897300720215, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4337897300720215, "logits_per_char": -0.7168948650360107, "num_chars": 2}, {"sum_logits": -1.4332884550094604, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4332884550094604, "logits_per_char": -0.7166442275047302, "num_chars": 2}, {"sum_logits": -1.2207906246185303, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2207906246185303, "logits_per_char": -0.6103953123092651, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 734, "native_id": "Mercury_7085278", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4175931215286255, "incorrect_loss_raw": 1.393152912457784, "correct_loss_per_char": 0.7087965607643127, "incorrect_loss_per_char": 0.696576456228892, "correct_loss_per_token": 1.4175931215286255, "incorrect_loss_per_token": 1.393152912457784, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.206365942955017, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.206365942955017, "logits_per_char": -0.6031829714775085, "num_chars": 2}, {"sum_logits": -1.3829078674316406, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3829078674316406, "logits_per_char": -0.6914539337158203, "num_chars": 2}, {"sum_logits": -1.5901849269866943, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.5901849269866943, "logits_per_char": -0.7950924634933472, "num_chars": 2}, {"sum_logits": -1.4175931215286255, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4175931215286255, "logits_per_char": -0.7087965607643127, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 735, "native_id": "AKDE&ED_2008_4_35", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2855005264282227, "incorrect_loss_raw": 1.4292181730270386, "correct_loss_per_char": 0.6427502632141113, "incorrect_loss_per_char": 0.7146090865135193, "correct_loss_per_token": 1.2855005264282227, "incorrect_loss_per_token": 1.4292181730270386, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.423308253288269, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.423308253288269, "logits_per_char": -0.7116541266441345, "num_chars": 2}, {"sum_logits": -1.4938573837280273, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4938573837280273, "logits_per_char": -0.7469286918640137, "num_chars": 2}, {"sum_logits": -1.3704888820648193, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3704888820648193, "logits_per_char": -0.6852444410324097, "num_chars": 2}, {"sum_logits": -1.2855005264282227, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2855005264282227, "logits_per_char": -0.6427502632141113, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 736, "native_id": "MCAS_1999_8_16", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2343336343765259, "incorrect_loss_raw": 1.447237213452657, "correct_loss_per_char": 0.6171668171882629, "incorrect_loss_per_char": 0.7236186067263285, "correct_loss_per_token": 1.2343336343765259, "incorrect_loss_per_token": 1.447237213452657, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.467159628868103, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.467159628868103, "logits_per_char": -0.7335798144340515, "num_chars": 2}, {"sum_logits": -1.4742540121078491, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4742540121078491, "logits_per_char": -0.7371270060539246, "num_chars": 2}, {"sum_logits": -1.400297999382019, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.400297999382019, "logits_per_char": -0.7001489996910095, "num_chars": 2}, {"sum_logits": -1.2343336343765259, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.2343336343765259, "logits_per_char": -0.6171668171882629, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 737, "native_id": "Mercury_SC_400063", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.454945683479309, "incorrect_loss_raw": 1.3712256749471028, "correct_loss_per_char": 0.7274728417396545, "incorrect_loss_per_char": 0.6856128374735514, "correct_loss_per_token": 1.454945683479309, "incorrect_loss_per_token": 1.3712256749471028, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3180949687957764, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.3180949687957764, "logits_per_char": -0.6590474843978882, "num_chars": 2}, {"sum_logits": -1.3522695302963257, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3522695302963257, "logits_per_char": -0.6761347651481628, "num_chars": 2}, {"sum_logits": -1.4433125257492065, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4433125257492065, "logits_per_char": -0.7216562628746033, "num_chars": 2}, {"sum_logits": -1.454945683479309, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.454945683479309, "logits_per_char": -0.7274728417396545, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 738, "native_id": "Mercury_SC_401666", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4669007062911987, "incorrect_loss_raw": 1.3820878664652507, "correct_loss_per_char": 0.7334503531455994, "incorrect_loss_per_char": 0.6910439332326254, "correct_loss_per_token": 1.4669007062911987, "incorrect_loss_per_token": 1.3820878664652507, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2019294500350952, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.2019294500350952, "logits_per_char": -0.6009647250175476, "num_chars": 2}, {"sum_logits": -1.3697007894515991, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3697007894515991, "logits_per_char": -0.6848503947257996, "num_chars": 2}, {"sum_logits": -1.5746333599090576, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5746333599090576, "logits_per_char": -0.7873166799545288, "num_chars": 2}, {"sum_logits": -1.4669007062911987, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4669007062911987, "logits_per_char": -0.7334503531455994, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 739, "native_id": "TIMSS_2011_8_pg31", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4293298721313477, "incorrect_loss_raw": 1.3813101450602214, "correct_loss_per_char": 0.7146649360656738, "incorrect_loss_per_char": 0.6906550725301107, "correct_loss_per_token": 1.4293298721313477, "incorrect_loss_per_token": 1.3813101450602214, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2659050226211548, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.2659050226211548, "logits_per_char": -0.6329525113105774, "num_chars": 2}, {"sum_logits": -1.4236760139465332, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4236760139465332, "logits_per_char": -0.7118380069732666, "num_chars": 2}, {"sum_logits": -1.4293298721313477, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4293298721313477, "logits_per_char": -0.7146649360656738, "num_chars": 2}, {"sum_logits": -1.454349398612976, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.454349398612976, "logits_per_char": -0.727174699306488, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 740, "native_id": "Mercury_412673", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3972175121307373, "incorrect_loss_raw": 1.4017456769943237, "correct_loss_per_char": 0.6986087560653687, "incorrect_loss_per_char": 0.7008728384971619, "correct_loss_per_token": 1.3972175121307373, "incorrect_loss_per_token": 1.4017456769943237, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.560437798500061, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.560437798500061, "logits_per_char": -0.7802188992500305, "num_chars": 2}, {"sum_logits": -1.3972175121307373, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3972175121307373, "logits_per_char": -0.6986087560653687, "num_chars": 2}, {"sum_logits": -1.440455436706543, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.440455436706543, "logits_per_char": -0.7202277183532715, "num_chars": 2}, {"sum_logits": -1.2043437957763672, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.2043437957763672, "logits_per_char": -0.6021718978881836, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 741, "native_id": "Mercury_7130655", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2396788597106934, "incorrect_loss_raw": 1.4488985538482666, "correct_loss_per_char": 0.6198394298553467, "incorrect_loss_per_char": 0.7244492769241333, "correct_loss_per_token": 1.2396788597106934, "incorrect_loss_per_token": 1.4488985538482666, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.55183744430542, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": false, "logits_per_token": -1.55183744430542, "logits_per_char": -0.77591872215271, "num_chars": 2}, {"sum_logits": -1.3753540515899658, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": false, "logits_per_token": -1.3753540515899658, "logits_per_char": -0.6876770257949829, "num_chars": 2}, {"sum_logits": -1.419504165649414, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": false, "logits_per_token": -1.419504165649414, "logits_per_char": -0.709752082824707, "num_chars": 2}, {"sum_logits": -1.2396788597106934, "num_tokens": 1, "num_tokens_all": 424, "is_greedy": true, "logits_per_token": -1.2396788597106934, "logits_per_char": -0.6198394298553467, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 742, "native_id": "MCAS_2004_5_7", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3810125589370728, "incorrect_loss_raw": 1.3967033227284749, "correct_loss_per_char": 0.6905062794685364, "incorrect_loss_per_char": 0.6983516613642374, "correct_loss_per_token": 1.3810125589370728, "incorrect_loss_per_token": 1.3967033227284749, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.452818751335144, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.452818751335144, "logits_per_char": -0.726409375667572, "num_chars": 2}, {"sum_logits": -1.3810125589370728, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3810125589370728, "logits_per_char": -0.6905062794685364, "num_chars": 2}, {"sum_logits": -1.4228966236114502, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4228966236114502, "logits_per_char": -0.7114483118057251, "num_chars": 2}, {"sum_logits": -1.3143945932388306, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.3143945932388306, "logits_per_char": -0.6571972966194153, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 743, "native_id": "Mercury_7187373", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1038028001785278, "incorrect_loss_raw": 1.511832316716512, "correct_loss_per_char": 0.5519014000892639, "incorrect_loss_per_char": 0.755916158358256, "correct_loss_per_token": 1.1038028001785278, "incorrect_loss_per_token": 1.511832316716512, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.651925802230835, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.651925802230835, "logits_per_char": -0.8259629011154175, "num_chars": 2}, {"sum_logits": -1.451566457748413, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.451566457748413, "logits_per_char": -0.7257832288742065, "num_chars": 2}, {"sum_logits": -1.432004690170288, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.432004690170288, "logits_per_char": -0.716002345085144, "num_chars": 2}, {"sum_logits": -1.1038028001785278, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.1038028001785278, "logits_per_char": -0.5519014000892639, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 744, "native_id": "Mercury_SC_401361", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4032962322235107, "incorrect_loss_raw": 1.394319216410319, "correct_loss_per_char": 0.7016481161117554, "incorrect_loss_per_char": 0.6971596082051595, "correct_loss_per_token": 1.4032962322235107, "incorrect_loss_per_token": 1.394319216410319, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2475500106811523, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.2475500106811523, "logits_per_char": -0.6237750053405762, "num_chars": 2}, {"sum_logits": -1.3853332996368408, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3853332996368408, "logits_per_char": -0.6926666498184204, "num_chars": 2}, {"sum_logits": -1.4032962322235107, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4032962322235107, "logits_per_char": -0.7016481161117554, "num_chars": 2}, {"sum_logits": -1.5500743389129639, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.5500743389129639, "logits_per_char": -0.7750371694564819, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 745, "native_id": "MCAS_2006_8_12", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3182913064956665, "incorrect_loss_raw": 1.4181946118672688, "correct_loss_per_char": 0.6591456532478333, "incorrect_loss_per_char": 0.7090973059336344, "correct_loss_per_token": 1.3182913064956665, "incorrect_loss_per_token": 1.4181946118672688, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4083003997802734, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4083003997802734, "logits_per_char": -0.7041501998901367, "num_chars": 2}, {"sum_logits": -1.3640908002853394, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3640908002853394, "logits_per_char": -0.6820454001426697, "num_chars": 2}, {"sum_logits": -1.4821926355361938, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4821926355361938, "logits_per_char": -0.7410963177680969, "num_chars": 2}, {"sum_logits": -1.3182913064956665, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.3182913064956665, "logits_per_char": -0.6591456532478333, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 746, "native_id": "Mercury_7233765", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4571117162704468, "incorrect_loss_raw": 1.3789423704147339, "correct_loss_per_char": 0.7285558581352234, "incorrect_loss_per_char": 0.6894711852073669, "correct_loss_per_token": 1.4571117162704468, "incorrect_loss_per_token": 1.3789423704147339, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5100445747375488, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.5100445747375488, "logits_per_char": -0.7550222873687744, "num_chars": 2}, {"sum_logits": -1.4571117162704468, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4571117162704468, "logits_per_char": -0.7285558581352234, "num_chars": 2}, {"sum_logits": -1.4312183856964111, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4312183856964111, "logits_per_char": -0.7156091928482056, "num_chars": 2}, {"sum_logits": -1.1955641508102417, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.1955641508102417, "logits_per_char": -0.5977820754051208, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 747, "native_id": "Mercury_SC_407613", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4673399925231934, "incorrect_loss_raw": 1.3751087586085002, "correct_loss_per_char": 0.7336699962615967, "incorrect_loss_per_char": 0.6875543793042501, "correct_loss_per_token": 1.4673399925231934, "incorrect_loss_per_token": 1.3751087586085002, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.210749864578247, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.210749864578247, "logits_per_char": -0.6053749322891235, "num_chars": 2}, {"sum_logits": -1.3818306922912598, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3818306922912598, "logits_per_char": -0.6909153461456299, "num_chars": 2}, {"sum_logits": -1.4673399925231934, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.4673399925231934, "logits_per_char": -0.7336699962615967, "num_chars": 2}, {"sum_logits": -1.5327457189559937, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.5327457189559937, "logits_per_char": -0.7663728594779968, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 748, "native_id": "MCAS_2005_5_24", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.343895435333252, "incorrect_loss_raw": 1.4080408414204915, "correct_loss_per_char": 0.671947717666626, "incorrect_loss_per_char": 0.7040204207102457, "correct_loss_per_token": 1.343895435333252, "incorrect_loss_per_token": 1.4080408414204915, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.343895435333252, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.343895435333252, "logits_per_char": -0.671947717666626, "num_chars": 2}, {"sum_logits": -1.335410475730896, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.335410475730896, "logits_per_char": -0.667705237865448, "num_chars": 2}, {"sum_logits": -1.4688103199005127, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4688103199005127, "logits_per_char": -0.7344051599502563, "num_chars": 2}, {"sum_logits": -1.419901728630066, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.419901728630066, "logits_per_char": -0.709950864315033, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 749, "native_id": "Mercury_405778", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3267505168914795, "incorrect_loss_raw": 1.4386661450068157, "correct_loss_per_char": 0.6633752584457397, "incorrect_loss_per_char": 0.7193330725034078, "correct_loss_per_token": 1.3267505168914795, "incorrect_loss_per_token": 1.4386661450068157, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6518553495407104, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.6518553495407104, "logits_per_char": -0.8259276747703552, "num_chars": 2}, {"sum_logits": -1.529699683189392, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.529699683189392, "logits_per_char": -0.764849841594696, "num_chars": 2}, {"sum_logits": -1.3267505168914795, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3267505168914795, "logits_per_char": -0.6633752584457397, "num_chars": 2}, {"sum_logits": -1.1344434022903442, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.1344434022903442, "logits_per_char": -0.5672217011451721, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 750, "native_id": "Mercury_7263060", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.257491111755371, "incorrect_loss_raw": 1.440460244814555, "correct_loss_per_char": 0.6287455558776855, "incorrect_loss_per_char": 0.7202301224072775, "correct_loss_per_token": 1.257491111755371, "incorrect_loss_per_token": 1.440460244814555, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3831690549850464, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3831690549850464, "logits_per_char": -0.6915845274925232, "num_chars": 2}, {"sum_logits": -1.5082900524139404, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5082900524139404, "logits_per_char": -0.7541450262069702, "num_chars": 2}, {"sum_logits": -1.4299216270446777, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4299216270446777, "logits_per_char": -0.7149608135223389, "num_chars": 2}, {"sum_logits": -1.257491111755371, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.257491111755371, "logits_per_char": -0.6287455558776855, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 751, "native_id": "Mercury_SC_401668", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3744251728057861, "incorrect_loss_raw": 1.3957527875900269, "correct_loss_per_char": 0.6872125864028931, "incorrect_loss_per_char": 0.6978763937950134, "correct_loss_per_token": 1.3744251728057861, "incorrect_loss_per_token": 1.3957527875900269, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3950234651565552, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3950234651565552, "logits_per_char": -0.6975117325782776, "num_chars": 2}, {"sum_logits": -1.3699506521224976, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.3699506521224976, "logits_per_char": -0.6849753260612488, "num_chars": 2}, {"sum_logits": -1.4222842454910278, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4222842454910278, "logits_per_char": -0.7111421227455139, "num_chars": 2}, {"sum_logits": -1.3744251728057861, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3744251728057861, "logits_per_char": -0.6872125864028931, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 752, "native_id": "Mercury_7230388", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2697656154632568, "incorrect_loss_raw": 1.4349496364593506, "correct_loss_per_char": 0.6348828077316284, "incorrect_loss_per_char": 0.7174748182296753, "correct_loss_per_token": 1.2697656154632568, "incorrect_loss_per_token": 1.4349496364593506, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4369635581970215, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4369635581970215, "logits_per_char": -0.7184817790985107, "num_chars": 2}, {"sum_logits": -1.3801175355911255, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3801175355911255, "logits_per_char": -0.6900587677955627, "num_chars": 2}, {"sum_logits": -1.4877678155899048, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4877678155899048, "logits_per_char": -0.7438839077949524, "num_chars": 2}, {"sum_logits": -1.2697656154632568, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.2697656154632568, "logits_per_char": -0.6348828077316284, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 753, "native_id": "Mercury_7041650", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3649557828903198, "incorrect_loss_raw": 1.4035306374231975, "correct_loss_per_char": 0.6824778914451599, "incorrect_loss_per_char": 0.7017653187115988, "correct_loss_per_token": 1.3649557828903198, "incorrect_loss_per_token": 1.4035306374231975, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3846373558044434, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3846373558044434, "logits_per_char": -0.6923186779022217, "num_chars": 2}, {"sum_logits": -1.316210150718689, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.316210150718689, "logits_per_char": -0.6581050753593445, "num_chars": 2}, {"sum_logits": -1.50974440574646, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.50974440574646, "logits_per_char": -0.75487220287323, "num_chars": 2}, {"sum_logits": -1.3649557828903198, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3649557828903198, "logits_per_char": -0.6824778914451599, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 754, "native_id": "Mercury_SC_409009", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.455033540725708, "incorrect_loss_raw": 1.3711382150650024, "correct_loss_per_char": 0.727516770362854, "incorrect_loss_per_char": 0.6855691075325012, "correct_loss_per_token": 1.455033540725708, "incorrect_loss_per_token": 1.3711382150650024, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.455033540725708, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.455033540725708, "logits_per_char": -0.727516770362854, "num_chars": 2}, {"sum_logits": -1.4506933689117432, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4506933689117432, "logits_per_char": -0.7253466844558716, "num_chars": 2}, {"sum_logits": -1.3495227098464966, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3495227098464966, "logits_per_char": -0.6747613549232483, "num_chars": 2}, {"sum_logits": -1.3131985664367676, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.3131985664367676, "logits_per_char": -0.6565992832183838, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 755, "native_id": "Mercury_7223143", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.373652458190918, "incorrect_loss_raw": 1.4117643435796101, "correct_loss_per_char": 0.686826229095459, "incorrect_loss_per_char": 0.7058821717898051, "correct_loss_per_token": 1.373652458190918, "incorrect_loss_per_token": 1.4117643435796101, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2494906187057495, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.2494906187057495, "logits_per_char": -0.6247453093528748, "num_chars": 2}, {"sum_logits": -1.4146578311920166, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4146578311920166, "logits_per_char": -0.7073289155960083, "num_chars": 2}, {"sum_logits": -1.5711445808410645, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.5711445808410645, "logits_per_char": -0.7855722904205322, "num_chars": 2}, {"sum_logits": -1.373652458190918, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.373652458190918, "logits_per_char": -0.686826229095459, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 756, "native_id": "ACTAAP_2007_7_3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4225465059280396, "incorrect_loss_raw": 1.3804136117299397, "correct_loss_per_char": 0.7112732529640198, "incorrect_loss_per_char": 0.6902068058649699, "correct_loss_per_token": 1.4225465059280396, "incorrect_loss_per_token": 1.3804136117299397, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3345143795013428, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.3345143795013428, "logits_per_char": -0.6672571897506714, "num_chars": 2}, {"sum_logits": -1.3873364925384521, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3873364925384521, "logits_per_char": -0.6936682462692261, "num_chars": 2}, {"sum_logits": -1.4193899631500244, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4193899631500244, "logits_per_char": -0.7096949815750122, "num_chars": 2}, {"sum_logits": -1.4225465059280396, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4225465059280396, "logits_per_char": -0.7112732529640198, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 757, "native_id": "Mercury_7215670", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4060250520706177, "incorrect_loss_raw": 1.3870277007420857, "correct_loss_per_char": 0.7030125260353088, "incorrect_loss_per_char": 0.6935138503710429, "correct_loss_per_token": 1.4060250520706177, "incorrect_loss_per_token": 1.3870277007420857, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.382354974746704, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.382354974746704, "logits_per_char": -0.691177487373352, "num_chars": 2}, {"sum_logits": -1.4467947483062744, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.4467947483062744, "logits_per_char": -0.7233973741531372, "num_chars": 2}, {"sum_logits": -1.4060250520706177, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.4060250520706177, "logits_per_char": -0.7030125260353088, "num_chars": 2}, {"sum_logits": -1.3319333791732788, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": true, "logits_per_token": -1.3319333791732788, "logits_per_char": -0.6659666895866394, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 758, "native_id": "MEA_2010_8_15", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3968394994735718, "incorrect_loss_raw": 1.3888884782791138, "correct_loss_per_char": 0.6984197497367859, "incorrect_loss_per_char": 0.6944442391395569, "correct_loss_per_token": 1.3968394994735718, "incorrect_loss_per_token": 1.3888884782791138, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3968394994735718, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3968394994735718, "logits_per_char": -0.6984197497367859, "num_chars": 2}, {"sum_logits": -1.35406494140625, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.35406494140625, "logits_per_char": -0.677032470703125, "num_chars": 2}, {"sum_logits": -1.4746975898742676, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4746975898742676, "logits_per_char": -0.7373487949371338, "num_chars": 2}, {"sum_logits": -1.3379029035568237, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.3379029035568237, "logits_per_char": -0.6689514517784119, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 759, "native_id": "Mercury_7270515", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4099149703979492, "incorrect_loss_raw": 1.413511594136556, "correct_loss_per_char": 0.7049574851989746, "incorrect_loss_per_char": 0.706755797068278, "correct_loss_per_token": 1.4099149703979492, "incorrect_loss_per_token": 1.413511594136556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1516926288604736, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.1516926288604736, "logits_per_char": -0.5758463144302368, "num_chars": 2}, {"sum_logits": -1.4099149703979492, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4099149703979492, "logits_per_char": -0.7049574851989746, "num_chars": 2}, {"sum_logits": -1.5836799144744873, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.5836799144744873, "logits_per_char": -0.7918399572372437, "num_chars": 2}, {"sum_logits": -1.505162239074707, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.505162239074707, "logits_per_char": -0.7525811195373535, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 760, "native_id": "Mercury_7006160", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.393165946006775, "incorrect_loss_raw": 1.3961416880289714, "correct_loss_per_char": 0.6965829730033875, "incorrect_loss_per_char": 0.6980708440144857, "correct_loss_per_token": 1.393165946006775, "incorrect_loss_per_token": 1.3961416880289714, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4547103643417358, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4547103643417358, "logits_per_char": -0.7273551821708679, "num_chars": 2}, {"sum_logits": -1.495802640914917, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.495802640914917, "logits_per_char": -0.7479013204574585, "num_chars": 2}, {"sum_logits": -1.393165946006775, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.393165946006775, "logits_per_char": -0.6965829730033875, "num_chars": 2}, {"sum_logits": -1.2379120588302612, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.2379120588302612, "logits_per_char": -0.6189560294151306, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 761, "native_id": "Mercury_SC_410630", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5141148567199707, "incorrect_loss_raw": 1.3733763694763184, "correct_loss_per_char": 0.7570574283599854, "incorrect_loss_per_char": 0.6866881847381592, "correct_loss_per_token": 1.5141148567199707, "incorrect_loss_per_token": 1.3733763694763184, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5824565887451172, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.5824565887451172, "logits_per_char": -0.7912282943725586, "num_chars": 2}, {"sum_logits": -1.4456321001052856, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4456321001052856, "logits_per_char": -0.7228160500526428, "num_chars": 2}, {"sum_logits": -1.5141148567199707, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.5141148567199707, "logits_per_char": -0.7570574283599854, "num_chars": 2}, {"sum_logits": -1.0920404195785522, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.0920404195785522, "logits_per_char": -0.5460202097892761, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 762, "native_id": "Mercury_7082320", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3452458381652832, "incorrect_loss_raw": 1.407039960225423, "correct_loss_per_char": 0.6726229190826416, "incorrect_loss_per_char": 0.7035199801127116, "correct_loss_per_token": 1.3452458381652832, "incorrect_loss_per_token": 1.407039960225423, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3452458381652832, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.3452458381652832, "logits_per_char": -0.6726229190826416, "num_chars": 2}, {"sum_logits": -1.375649333000183, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.375649333000183, "logits_per_char": -0.6878246665000916, "num_chars": 2}, {"sum_logits": -1.4708619117736816, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4708619117736816, "logits_per_char": -0.7354309558868408, "num_chars": 2}, {"sum_logits": -1.3746086359024048, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3746086359024048, "logits_per_char": -0.6873043179512024, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 763, "native_id": "MEA_2013_8_1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4107156991958618, "incorrect_loss_raw": 1.3837064504623413, "correct_loss_per_char": 0.7053578495979309, "incorrect_loss_per_char": 0.6918532252311707, "correct_loss_per_token": 1.4107156991958618, "incorrect_loss_per_token": 1.3837064504623413, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4107156991958618, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4107156991958618, "logits_per_char": -0.7053578495979309, "num_chars": 2}, {"sum_logits": -1.3917338848114014, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3917338848114014, "logits_per_char": -0.6958669424057007, "num_chars": 2}, {"sum_logits": -1.371880292892456, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.371880292892456, "logits_per_char": -0.685940146446228, "num_chars": 2}, {"sum_logits": -1.3875051736831665, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3875051736831665, "logits_per_char": -0.6937525868415833, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 764, "native_id": "Mercury_7033845", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.500130295753479, "incorrect_loss_raw": 1.3593580325444539, "correct_loss_per_char": 0.7500651478767395, "incorrect_loss_per_char": 0.6796790162722269, "correct_loss_per_token": 1.500130295753479, "incorrect_loss_per_token": 1.3593580325444539, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4077742099761963, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4077742099761963, "logits_per_char": -0.7038871049880981, "num_chars": 2}, {"sum_logits": -1.500130295753479, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.500130295753479, "logits_per_char": -0.7500651478767395, "num_chars": 2}, {"sum_logits": -1.4156403541564941, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4156403541564941, "logits_per_char": -0.7078201770782471, "num_chars": 2}, {"sum_logits": -1.2546595335006714, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.2546595335006714, "logits_per_char": -0.6273297667503357, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 765, "native_id": "Mercury_7221620", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.534072995185852, "incorrect_loss_raw": 1.3485976060231526, "correct_loss_per_char": 0.767036497592926, "incorrect_loss_per_char": 0.6742988030115763, "correct_loss_per_token": 1.534072995185852, "incorrect_loss_per_token": 1.3485976060231526, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.534072995185852, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.534072995185852, "logits_per_char": -0.767036497592926, "num_chars": 2}, {"sum_logits": -1.341240406036377, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.341240406036377, "logits_per_char": -0.6706202030181885, "num_chars": 2}, {"sum_logits": -1.4228097200393677, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4228097200393677, "logits_per_char": -0.7114048600196838, "num_chars": 2}, {"sum_logits": -1.2817426919937134, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.2817426919937134, "logits_per_char": -0.6408713459968567, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 766, "native_id": "LEAP__7_10352", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.31740140914917, "incorrect_loss_raw": 1.4171724716822307, "correct_loss_per_char": 0.658700704574585, "incorrect_loss_per_char": 0.7085862358411154, "correct_loss_per_token": 1.31740140914917, "incorrect_loss_per_token": 1.4171724716822307, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.491464376449585, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.491464376449585, "logits_per_char": -0.7457321882247925, "num_chars": 2}, {"sum_logits": -1.3676235675811768, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.3676235675811768, "logits_per_char": -0.6838117837905884, "num_chars": 2}, {"sum_logits": -1.3924294710159302, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.3924294710159302, "logits_per_char": -0.6962147355079651, "num_chars": 2}, {"sum_logits": -1.31740140914917, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": true, "logits_per_token": -1.31740140914917, "logits_per_char": -0.658700704574585, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 767, "native_id": "Mercury_412605", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.353861689567566, "incorrect_loss_raw": 1.4099687735239665, "correct_loss_per_char": 0.676930844783783, "incorrect_loss_per_char": 0.7049843867619833, "correct_loss_per_token": 1.353861689567566, "incorrect_loss_per_token": 1.4099687735239665, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5651301145553589, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.5651301145553589, "logits_per_char": -0.7825650572776794, "num_chars": 2}, {"sum_logits": -1.3918321132659912, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.3918321132659912, "logits_per_char": -0.6959160566329956, "num_chars": 2}, {"sum_logits": -1.353861689567566, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.353861689567566, "logits_per_char": -0.676930844783783, "num_chars": 2}, {"sum_logits": -1.2729440927505493, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -1.2729440927505493, "logits_per_char": -0.6364720463752747, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 768, "native_id": "Mercury_416638", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3223966360092163, "incorrect_loss_raw": 1.4192534287770588, "correct_loss_per_char": 0.6611983180046082, "incorrect_loss_per_char": 0.7096267143885294, "correct_loss_per_token": 1.3223966360092163, "incorrect_loss_per_token": 1.4192534287770588, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3223966360092163, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.3223966360092163, "logits_per_char": -0.6611983180046082, "num_chars": 2}, {"sum_logits": -1.4780536890029907, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4780536890029907, "logits_per_char": -0.7390268445014954, "num_chars": 2}, {"sum_logits": -1.3711519241333008, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3711519241333008, "logits_per_char": -0.6855759620666504, "num_chars": 2}, {"sum_logits": -1.4085546731948853, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4085546731948853, "logits_per_char": -0.7042773365974426, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 769, "native_id": "MCAS_2011_8_17694", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.491152286529541, "incorrect_loss_raw": 1.358952482541402, "correct_loss_per_char": 0.7455761432647705, "incorrect_loss_per_char": 0.679476241270701, "correct_loss_per_token": 1.491152286529541, "incorrect_loss_per_token": 1.358952482541402, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.491152286529541, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.491152286529541, "logits_per_char": -0.7455761432647705, "num_chars": 2}, {"sum_logits": -1.3255711793899536, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.3255711793899536, "logits_per_char": -0.6627855896949768, "num_chars": 2}, {"sum_logits": -1.3869554996490479, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3869554996490479, "logits_per_char": -0.6934777498245239, "num_chars": 2}, {"sum_logits": -1.364330768585205, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.364330768585205, "logits_per_char": -0.6821653842926025, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 770, "native_id": "Mercury_SC_400012", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.364270567893982, "incorrect_loss_raw": 1.3993840217590332, "correct_loss_per_char": 0.682135283946991, "incorrect_loss_per_char": 0.6996920108795166, "correct_loss_per_token": 1.364270567893982, "incorrect_loss_per_token": 1.3993840217590332, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4234862327575684, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4234862327575684, "logits_per_char": -0.7117431163787842, "num_chars": 2}, {"sum_logits": -1.339028239250183, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.339028239250183, "logits_per_char": -0.6695141196250916, "num_chars": 2}, {"sum_logits": -1.4356375932693481, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4356375932693481, "logits_per_char": -0.7178187966346741, "num_chars": 2}, {"sum_logits": -1.364270567893982, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.364270567893982, "logits_per_char": -0.682135283946991, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 771, "native_id": "Mercury_SC_413458", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3877217769622803, "incorrect_loss_raw": 1.3929997682571411, "correct_loss_per_char": 0.6938608884811401, "incorrect_loss_per_char": 0.6964998841285706, "correct_loss_per_token": 1.3877217769622803, "incorrect_loss_per_token": 1.3929997682571411, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3488811254501343, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.3488811254501343, "logits_per_char": -0.6744405627250671, "num_chars": 2}, {"sum_logits": -1.3877217769622803, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3877217769622803, "logits_per_char": -0.6938608884811401, "num_chars": 2}, {"sum_logits": -1.4397499561309814, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4397499561309814, "logits_per_char": -0.7198749780654907, "num_chars": 2}, {"sum_logits": -1.3903682231903076, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3903682231903076, "logits_per_char": -0.6951841115951538, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 772, "native_id": "Mercury_7139545", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4108248949050903, "incorrect_loss_raw": 1.3851255973180134, "correct_loss_per_char": 0.7054124474525452, "incorrect_loss_per_char": 0.6925627986590067, "correct_loss_per_token": 1.4108248949050903, "incorrect_loss_per_token": 1.3851255973180134, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.428533673286438, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.428533673286438, "logits_per_char": -0.714266836643219, "num_chars": 2}, {"sum_logits": -1.356434941291809, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.356434941291809, "logits_per_char": -0.6782174706459045, "num_chars": 2}, {"sum_logits": -1.4108248949050903, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4108248949050903, "logits_per_char": -0.7054124474525452, "num_chars": 2}, {"sum_logits": -1.3704081773757935, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3704081773757935, "logits_per_char": -0.6852040886878967, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 773, "native_id": "NYSEDREGENTS_2015_4_5", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3394572734832764, "incorrect_loss_raw": 1.4121683835983276, "correct_loss_per_char": 0.6697286367416382, "incorrect_loss_per_char": 0.7060841917991638, "correct_loss_per_token": 1.3394572734832764, "incorrect_loss_per_token": 1.4121683835983276, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5397814512252808, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.5397814512252808, "logits_per_char": -0.7698907256126404, "num_chars": 2}, {"sum_logits": -1.3394572734832764, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3394572734832764, "logits_per_char": -0.6697286367416382, "num_chars": 2}, {"sum_logits": -1.3281807899475098, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.3281807899475098, "logits_per_char": -0.6640903949737549, "num_chars": 2}, {"sum_logits": -1.3685429096221924, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3685429096221924, "logits_per_char": -0.6842714548110962, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 774, "native_id": "TIMSS_2003_8_pg16", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2813040018081665, "incorrect_loss_raw": 1.4326495726903279, "correct_loss_per_char": 0.6406520009040833, "incorrect_loss_per_char": 0.7163247863451639, "correct_loss_per_token": 1.2813040018081665, "incorrect_loss_per_token": 1.4326495726903279, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5078098773956299, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.5078098773956299, "logits_per_char": -0.7539049386978149, "num_chars": 2}, {"sum_logits": -1.4586107730865479, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4586107730865479, "logits_per_char": -0.7293053865432739, "num_chars": 2}, {"sum_logits": -1.2813040018081665, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.2813040018081665, "logits_per_char": -0.6406520009040833, "num_chars": 2}, {"sum_logits": -1.3315280675888062, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3315280675888062, "logits_per_char": -0.6657640337944031, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 775, "native_id": "Mercury_SC_415073", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4162921905517578, "incorrect_loss_raw": 1.3848272959391277, "correct_loss_per_char": 0.7081460952758789, "incorrect_loss_per_char": 0.6924136479695638, "correct_loss_per_token": 1.4162921905517578, "incorrect_loss_per_token": 1.3848272959391277, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4162921905517578, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4162921905517578, "logits_per_char": -0.7081460952758789, "num_chars": 2}, {"sum_logits": -1.329005241394043, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.329005241394043, "logits_per_char": -0.6645026206970215, "num_chars": 2}, {"sum_logits": -1.3583648204803467, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3583648204803467, "logits_per_char": -0.6791824102401733, "num_chars": 2}, {"sum_logits": -1.4671118259429932, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4671118259429932, "logits_per_char": -0.7335559129714966, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 776, "native_id": "Mercury_7012880", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4293785095214844, "incorrect_loss_raw": 1.3793169657389324, "correct_loss_per_char": 0.7146892547607422, "incorrect_loss_per_char": 0.6896584828694662, "correct_loss_per_token": 1.4293785095214844, "incorrect_loss_per_token": 1.3793169657389324, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4293785095214844, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4293785095214844, "logits_per_char": -0.7146892547607422, "num_chars": 2}, {"sum_logits": -1.3992681503295898, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.3992681503295898, "logits_per_char": -0.6996340751647949, "num_chars": 2}, {"sum_logits": -1.438669204711914, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.438669204711914, "logits_per_char": -0.719334602355957, "num_chars": 2}, {"sum_logits": -1.300013542175293, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.300013542175293, "logits_per_char": -0.6500067710876465, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 777, "native_id": "Mercury_191625", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3376080989837646, "incorrect_loss_raw": 1.4150376319885254, "correct_loss_per_char": 0.6688040494918823, "incorrect_loss_per_char": 0.7075188159942627, "correct_loss_per_token": 1.3376080989837646, "incorrect_loss_per_token": 1.4150376319885254, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5755542516708374, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5755542516708374, "logits_per_char": -0.7877771258354187, "num_chars": 2}, {"sum_logits": -1.3034969568252563, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.3034969568252563, "logits_per_char": -0.6517484784126282, "num_chars": 2}, {"sum_logits": -1.3660616874694824, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3660616874694824, "logits_per_char": -0.6830308437347412, "num_chars": 2}, {"sum_logits": -1.3376080989837646, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3376080989837646, "logits_per_char": -0.6688040494918823, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 778, "native_id": "Mercury_SC_402985", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3462977409362793, "incorrect_loss_raw": 1.4060083230336506, "correct_loss_per_char": 0.6731488704681396, "incorrect_loss_per_char": 0.7030041615168253, "correct_loss_per_token": 1.3462977409362793, "incorrect_loss_per_token": 1.4060083230336506, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4540163278579712, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4540163278579712, "logits_per_char": -0.7270081639289856, "num_chars": 2}, {"sum_logits": -1.3462977409362793, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.3462977409362793, "logits_per_char": -0.6731488704681396, "num_chars": 2}, {"sum_logits": -1.3783105611801147, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3783105611801147, "logits_per_char": -0.6891552805900574, "num_chars": 2}, {"sum_logits": -1.3856980800628662, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3856980800628662, "logits_per_char": -0.6928490400314331, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 779, "native_id": "Mercury_7005425", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3873122930526733, "incorrect_loss_raw": 1.395082672437032, "correct_loss_per_char": 0.6936561465263367, "incorrect_loss_per_char": 0.697541336218516, "correct_loss_per_token": 1.3873122930526733, "incorrect_loss_per_token": 1.395082672437032, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3873122930526733, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.3873122930526733, "logits_per_char": -0.6936561465263367, "num_chars": 2}, {"sum_logits": -1.4118984937667847, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.4118984937667847, "logits_per_char": -0.7059492468833923, "num_chars": 2}, {"sum_logits": -1.4826384782791138, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.4826384782791138, "logits_per_char": -0.7413192391395569, "num_chars": 2}, {"sum_logits": -1.2907110452651978, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": true, "logits_per_token": -1.2907110452651978, "logits_per_char": -0.6453555226325989, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 780, "native_id": "MDSA_2013_8_40", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.365457534790039, "incorrect_loss_raw": 1.3989932537078857, "correct_loss_per_char": 0.6827287673950195, "incorrect_loss_per_char": 0.6994966268539429, "correct_loss_per_token": 1.365457534790039, "incorrect_loss_per_token": 1.3989932537078857, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4373618364334106, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.4373618364334106, "logits_per_char": -0.7186809182167053, "num_chars": 2}, {"sum_logits": -1.38245689868927, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.38245689868927, "logits_per_char": -0.691228449344635, "num_chars": 2}, {"sum_logits": -1.365457534790039, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.365457534790039, "logits_per_char": -0.6827287673950195, "num_chars": 2}, {"sum_logits": -1.3771610260009766, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.3771610260009766, "logits_per_char": -0.6885805130004883, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 781, "native_id": "Mercury_401684", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4452310800552368, "incorrect_loss_raw": 1.3737621704737346, "correct_loss_per_char": 0.7226155400276184, "incorrect_loss_per_char": 0.6868810852368673, "correct_loss_per_token": 1.4452310800552368, "incorrect_loss_per_token": 1.3737621704737346, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.355720043182373, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.355720043182373, "logits_per_char": -0.6778600215911865, "num_chars": 2}, {"sum_logits": -1.4452310800552368, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4452310800552368, "logits_per_char": -0.7226155400276184, "num_chars": 2}, {"sum_logits": -1.435541033744812, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.435541033744812, "logits_per_char": -0.717770516872406, "num_chars": 2}, {"sum_logits": -1.3300254344940186, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.3300254344940186, "logits_per_char": -0.6650127172470093, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 782, "native_id": "NCEOGA_2013_5_17", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3895535469055176, "incorrect_loss_raw": 1.3926137288411458, "correct_loss_per_char": 0.6947767734527588, "incorrect_loss_per_char": 0.6963068644205729, "correct_loss_per_token": 1.3895535469055176, "incorrect_loss_per_token": 1.3926137288411458, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3648828268051147, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3648828268051147, "logits_per_char": -0.6824414134025574, "num_chars": 2}, {"sum_logits": -1.3895535469055176, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3895535469055176, "logits_per_char": -0.6947767734527588, "num_chars": 2}, {"sum_logits": -1.4753018617630005, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4753018617630005, "logits_per_char": -0.7376509308815002, "num_chars": 2}, {"sum_logits": -1.3376564979553223, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.3376564979553223, "logits_per_char": -0.6688282489776611, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 783, "native_id": "Mercury_7116183", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3727327585220337, "incorrect_loss_raw": 1.3977781931559246, "correct_loss_per_char": 0.6863663792610168, "incorrect_loss_per_char": 0.6988890965779623, "correct_loss_per_token": 1.3727327585220337, "incorrect_loss_per_token": 1.3977781931559246, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.43731689453125, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.43731689453125, "logits_per_char": -0.718658447265625, "num_chars": 2}, {"sum_logits": -1.3727327585220337, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3727327585220337, "logits_per_char": -0.6863663792610168, "num_chars": 2}, {"sum_logits": -1.4461475610733032, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4461475610733032, "logits_per_char": -0.7230737805366516, "num_chars": 2}, {"sum_logits": -1.3098701238632202, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.3098701238632202, "logits_per_char": -0.6549350619316101, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 784, "native_id": "Mercury_7106628", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3622914552688599, "incorrect_loss_raw": 1.4023665189743042, "correct_loss_per_char": 0.6811457276344299, "incorrect_loss_per_char": 0.7011832594871521, "correct_loss_per_token": 1.3622914552688599, "incorrect_loss_per_token": 1.4023665189743042, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.314173936843872, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.314173936843872, "logits_per_char": -0.657086968421936, "num_chars": 2}, {"sum_logits": -1.3963080644607544, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3963080644607544, "logits_per_char": -0.6981540322303772, "num_chars": 2}, {"sum_logits": -1.4966175556182861, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4966175556182861, "logits_per_char": -0.7483087778091431, "num_chars": 2}, {"sum_logits": -1.3622914552688599, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3622914552688599, "logits_per_char": -0.6811457276344299, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 785, "native_id": "Mercury_7203473", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3546944856643677, "incorrect_loss_raw": 1.403125246365865, "correct_loss_per_char": 0.6773472428321838, "incorrect_loss_per_char": 0.7015626231829325, "correct_loss_per_token": 1.3546944856643677, "incorrect_loss_per_token": 1.403125246365865, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.473887324333191, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.473887324333191, "logits_per_char": -0.7369436621665955, "num_chars": 2}, {"sum_logits": -1.3664562702178955, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.3664562702178955, "logits_per_char": -0.6832281351089478, "num_chars": 2}, {"sum_logits": -1.3690321445465088, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.3690321445465088, "logits_per_char": -0.6845160722732544, "num_chars": 2}, {"sum_logits": -1.3546944856643677, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.3546944856643677, "logits_per_char": -0.6773472428321838, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 786, "native_id": "Mercury_SC_416108", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4424008131027222, "incorrect_loss_raw": 1.3753998279571533, "correct_loss_per_char": 0.7212004065513611, "incorrect_loss_per_char": 0.6876999139785767, "correct_loss_per_token": 1.4424008131027222, "incorrect_loss_per_token": 1.3753998279571533, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.307625651359558, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.307625651359558, "logits_per_char": -0.653812825679779, "num_chars": 2}, {"sum_logits": -1.3550456762313843, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3550456762313843, "logits_per_char": -0.6775228381156921, "num_chars": 2}, {"sum_logits": -1.4424008131027222, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4424008131027222, "logits_per_char": -0.7212004065513611, "num_chars": 2}, {"sum_logits": -1.4635281562805176, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4635281562805176, "logits_per_char": -0.7317640781402588, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 787, "native_id": "LEAP_2007_8_10418", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.46366548538208, "incorrect_loss_raw": 1.3717857201894124, "correct_loss_per_char": 0.73183274269104, "incorrect_loss_per_char": 0.6858928600947062, "correct_loss_per_token": 1.46366548538208, "incorrect_loss_per_token": 1.3717857201894124, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.477602243423462, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.477602243423462, "logits_per_char": -0.738801121711731, "num_chars": 2}, {"sum_logits": -1.391331434249878, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.391331434249878, "logits_per_char": -0.695665717124939, "num_chars": 2}, {"sum_logits": -1.46366548538208, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.46366548538208, "logits_per_char": -0.73183274269104, "num_chars": 2}, {"sum_logits": -1.2464234828948975, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.2464234828948975, "logits_per_char": -0.6232117414474487, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 788, "native_id": "Mercury_7111178", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4724321365356445, "incorrect_loss_raw": 1.3660208384195964, "correct_loss_per_char": 0.7362160682678223, "incorrect_loss_per_char": 0.6830104192097982, "correct_loss_per_token": 1.4724321365356445, "incorrect_loss_per_token": 1.3660208384195964, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3899803161621094, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.3899803161621094, "logits_per_char": -0.6949901580810547, "num_chars": 2}, {"sum_logits": -1.419395089149475, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.419395089149475, "logits_per_char": -0.7096975445747375, "num_chars": 2}, {"sum_logits": -1.4724321365356445, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": false, "logits_per_token": -1.4724321365356445, "logits_per_char": -0.7362160682678223, "num_chars": 2}, {"sum_logits": -1.2886871099472046, "num_tokens": 1, "num_tokens_all": 453, "is_greedy": true, "logits_per_token": -1.2886871099472046, "logits_per_char": -0.6443435549736023, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 789, "native_id": "Mercury_7203560", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4503988027572632, "incorrect_loss_raw": 1.373633901278178, "correct_loss_per_char": 0.7251994013786316, "incorrect_loss_per_char": 0.686816950639089, "correct_loss_per_token": 1.4503988027572632, "incorrect_loss_per_token": 1.373633901278178, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4503988027572632, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.4503988027572632, "logits_per_char": -0.7251994013786316, "num_chars": 2}, {"sum_logits": -1.4013999700546265, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.4013999700546265, "logits_per_char": -0.7006999850273132, "num_chars": 2}, {"sum_logits": -1.449514389038086, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.449514389038086, "logits_per_char": -0.724757194519043, "num_chars": 2}, {"sum_logits": -1.2699873447418213, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": true, "logits_per_token": -1.2699873447418213, "logits_per_char": -0.6349936723709106, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 790, "native_id": "ACTAAP_2013_7_2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4080902338027954, "incorrect_loss_raw": 1.3851693471272786, "correct_loss_per_char": 0.7040451169013977, "incorrect_loss_per_char": 0.6925846735636393, "correct_loss_per_token": 1.4080902338027954, "incorrect_loss_per_token": 1.3851693471272786, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4080902338027954, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.4080902338027954, "logits_per_char": -0.7040451169013977, "num_chars": 2}, {"sum_logits": -1.4306021928787231, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.4306021928787231, "logits_per_char": -0.7153010964393616, "num_chars": 2}, {"sum_logits": -1.3423634767532349, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.3423634767532349, "logits_per_char": -0.6711817383766174, "num_chars": 2}, {"sum_logits": -1.382542371749878, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.382542371749878, "logits_per_char": -0.691271185874939, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 791, "native_id": "MCAS_2012_8_23640", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3898581266403198, "incorrect_loss_raw": 1.395727515220642, "correct_loss_per_char": 0.6949290633201599, "incorrect_loss_per_char": 0.697863757610321, "correct_loss_per_token": 1.3898581266403198, "incorrect_loss_per_token": 1.395727515220642, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3220744132995605, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.3220744132995605, "logits_per_char": -0.6610372066497803, "num_chars": 2}, {"sum_logits": -1.3611948490142822, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3611948490142822, "logits_per_char": -0.6805974245071411, "num_chars": 2}, {"sum_logits": -1.5039132833480835, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.5039132833480835, "logits_per_char": -0.7519566416740417, "num_chars": 2}, {"sum_logits": -1.3898581266403198, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3898581266403198, "logits_per_char": -0.6949290633201599, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 792, "native_id": "Mercury_404272", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.314857840538025, "incorrect_loss_raw": 1.4210952520370483, "correct_loss_per_char": 0.6574289202690125, "incorrect_loss_per_char": 0.7105476260185242, "correct_loss_per_token": 1.314857840538025, "incorrect_loss_per_token": 1.4210952520370483, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3479636907577515, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3479636907577515, "logits_per_char": -0.6739818453788757, "num_chars": 2}, {"sum_logits": -1.314857840538025, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.314857840538025, "logits_per_char": -0.6574289202690125, "num_chars": 2}, {"sum_logits": -1.4940714836120605, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4940714836120605, "logits_per_char": -0.7470357418060303, "num_chars": 2}, {"sum_logits": -1.421250581741333, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.421250581741333, "logits_per_char": -0.7106252908706665, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 793, "native_id": "MCAS_2009_8_17", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.285130500793457, "incorrect_loss_raw": 1.4350287119547527, "correct_loss_per_char": 0.6425652503967285, "incorrect_loss_per_char": 0.7175143559773763, "correct_loss_per_token": 1.285130500793457, "incorrect_loss_per_token": 1.4350287119547527, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2775704860687256, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.2775704860687256, "logits_per_char": -0.6387852430343628, "num_chars": 2}, {"sum_logits": -1.285130500793457, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.285130500793457, "logits_per_char": -0.6425652503967285, "num_chars": 2}, {"sum_logits": -1.5069905519485474, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.5069905519485474, "logits_per_char": -0.7534952759742737, "num_chars": 2}, {"sum_logits": -1.5205250978469849, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.5205250978469849, "logits_per_char": -0.7602625489234924, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 794, "native_id": "AIMS_2008_4_5", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3991641998291016, "incorrect_loss_raw": 1.3904008467992146, "correct_loss_per_char": 0.6995820999145508, "incorrect_loss_per_char": 0.6952004233996073, "correct_loss_per_token": 1.3991641998291016, "incorrect_loss_per_token": 1.3904008467992146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3502551317214966, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3502551317214966, "logits_per_char": -0.6751275658607483, "num_chars": 2}, {"sum_logits": -1.477957010269165, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.477957010269165, "logits_per_char": -0.7389785051345825, "num_chars": 2}, {"sum_logits": -1.3991641998291016, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3991641998291016, "logits_per_char": -0.6995820999145508, "num_chars": 2}, {"sum_logits": -1.3429903984069824, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.3429903984069824, "logits_per_char": -0.6714951992034912, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 795, "native_id": "Mercury_7236513", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3855321407318115, "incorrect_loss_raw": 1.392350673675537, "correct_loss_per_char": 0.6927660703659058, "incorrect_loss_per_char": 0.6961753368377686, "correct_loss_per_token": 1.3855321407318115, "incorrect_loss_per_token": 1.392350673675537, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.369491457939148, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.369491457939148, "logits_per_char": -0.684745728969574, "num_chars": 2}, {"sum_logits": -1.3830350637435913, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3830350637435913, "logits_per_char": -0.6915175318717957, "num_chars": 2}, {"sum_logits": -1.424525499343872, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.424525499343872, "logits_per_char": -0.712262749671936, "num_chars": 2}, {"sum_logits": -1.3855321407318115, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3855321407318115, "logits_per_char": -0.6927660703659058, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 796, "native_id": "Mercury_SC_LBS10027", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3880373239517212, "incorrect_loss_raw": 1.3906916379928589, "correct_loss_per_char": 0.6940186619758606, "incorrect_loss_per_char": 0.6953458189964294, "correct_loss_per_token": 1.3880373239517212, "incorrect_loss_per_token": 1.3906916379928589, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4035420417785645, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4035420417785645, "logits_per_char": -0.7017710208892822, "num_chars": 2}, {"sum_logits": -1.3880373239517212, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.3880373239517212, "logits_per_char": -0.6940186619758606, "num_chars": 2}, {"sum_logits": -1.354691505432129, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.354691505432129, "logits_per_char": -0.6773457527160645, "num_chars": 2}, {"sum_logits": -1.4138413667678833, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4138413667678833, "logits_per_char": -0.7069206833839417, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 797, "native_id": "Mercury_189053", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3584980964660645, "incorrect_loss_raw": 1.4020331303278606, "correct_loss_per_char": 0.6792490482330322, "incorrect_loss_per_char": 0.7010165651639303, "correct_loss_per_token": 1.3584980964660645, "incorrect_loss_per_token": 1.4020331303278606, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3642457723617554, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3642457723617554, "logits_per_char": -0.6821228861808777, "num_chars": 2}, {"sum_logits": -1.392820119857788, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.392820119857788, "logits_per_char": -0.696410059928894, "num_chars": 2}, {"sum_logits": -1.449033498764038, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.449033498764038, "logits_per_char": -0.724516749382019, "num_chars": 2}, {"sum_logits": -1.3584980964660645, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.3584980964660645, "logits_per_char": -0.6792490482330322, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 798, "native_id": "Mercury_SC_414271", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3939422369003296, "incorrect_loss_raw": 1.3898944854736328, "correct_loss_per_char": 0.6969711184501648, "incorrect_loss_per_char": 0.6949472427368164, "correct_loss_per_token": 1.3939422369003296, "incorrect_loss_per_token": 1.3898944854736328, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.417881965637207, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.417881965637207, "logits_per_char": -0.7089409828186035, "num_chars": 2}, {"sum_logits": -1.3558456897735596, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.3558456897735596, "logits_per_char": -0.6779228448867798, "num_chars": 2}, {"sum_logits": -1.3939422369003296, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3939422369003296, "logits_per_char": -0.6969711184501648, "num_chars": 2}, {"sum_logits": -1.3959558010101318, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3959558010101318, "logits_per_char": -0.6979779005050659, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 799, "native_id": "Mercury_408922", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3459553718566895, "incorrect_loss_raw": 1.4103200435638428, "correct_loss_per_char": 0.6729776859283447, "incorrect_loss_per_char": 0.7051600217819214, "correct_loss_per_token": 1.3459553718566895, "incorrect_loss_per_token": 1.4103200435638428, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5107969045639038, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.5107969045639038, "logits_per_char": -0.7553984522819519, "num_chars": 2}, {"sum_logits": -1.4488409757614136, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.4488409757614136, "logits_per_char": -0.7244204878807068, "num_chars": 2}, {"sum_logits": -1.3459553718566895, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.3459553718566895, "logits_per_char": -0.6729776859283447, "num_chars": 2}, {"sum_logits": -1.271322250366211, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": true, "logits_per_token": -1.271322250366211, "logits_per_char": -0.6356611251831055, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 800, "native_id": "Mercury_7264093", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2996692657470703, "incorrect_loss_raw": 1.4359068473180134, "correct_loss_per_char": 0.6498346328735352, "incorrect_loss_per_char": 0.7179534236590067, "correct_loss_per_token": 1.2996692657470703, "incorrect_loss_per_token": 1.4359068473180134, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6735384464263916, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.6735384464263916, "logits_per_char": -0.8367692232131958, "num_chars": 2}, {"sum_logits": -1.3438186645507812, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3438186645507812, "logits_per_char": -0.6719093322753906, "num_chars": 2}, {"sum_logits": -1.2996692657470703, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.2996692657470703, "logits_per_char": -0.6498346328735352, "num_chars": 2}, {"sum_logits": -1.2903634309768677, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2903634309768677, "logits_per_char": -0.6451817154884338, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 801, "native_id": "Mercury_SC_LBS11009", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6376628875732422, "incorrect_loss_raw": 1.3727209170659382, "correct_loss_per_char": 0.8188314437866211, "incorrect_loss_per_char": 0.6863604585329691, "correct_loss_per_token": 1.6376628875732422, "incorrect_loss_per_token": 1.3727209170659382, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0053824186325073, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.0053824186325073, "logits_per_char": -0.5026912093162537, "num_chars": 2}, {"sum_logits": -1.3643584251403809, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3643584251403809, "logits_per_char": -0.6821792125701904, "num_chars": 2}, {"sum_logits": -1.6376628875732422, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.6376628875732422, "logits_per_char": -0.8188314437866211, "num_chars": 2}, {"sum_logits": -1.7484219074249268, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.7484219074249268, "logits_per_char": -0.8742109537124634, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 802, "native_id": "Mercury_7191433", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.531895399093628, "incorrect_loss_raw": 1.3515352408091228, "correct_loss_per_char": 0.765947699546814, "incorrect_loss_per_char": 0.6757676204045614, "correct_loss_per_token": 1.531895399093628, "incorrect_loss_per_token": 1.3515352408091228, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4056916236877441, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4056916236877441, "logits_per_char": -0.7028458118438721, "num_chars": 2}, {"sum_logits": -1.4337913990020752, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4337913990020752, "logits_per_char": -0.7168956995010376, "num_chars": 2}, {"sum_logits": -1.531895399093628, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.531895399093628, "logits_per_char": -0.765947699546814, "num_chars": 2}, {"sum_logits": -1.2151226997375488, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.2151226997375488, "logits_per_char": -0.6075613498687744, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 803, "native_id": "MEAP_2005_5_14", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3784160614013672, "incorrect_loss_raw": 1.402154008547465, "correct_loss_per_char": 0.6892080307006836, "incorrect_loss_per_char": 0.7010770042737325, "correct_loss_per_token": 1.3784160614013672, "incorrect_loss_per_token": 1.402154008547465, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.569746971130371, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.569746971130371, "logits_per_char": -0.7848734855651855, "num_chars": 2}, {"sum_logits": -1.320438265800476, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.320438265800476, "logits_per_char": -0.660219132900238, "num_chars": 2}, {"sum_logits": -1.3162767887115479, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.3162767887115479, "logits_per_char": -0.6581383943557739, "num_chars": 2}, {"sum_logits": -1.3784160614013672, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3784160614013672, "logits_per_char": -0.6892080307006836, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 804, "native_id": "Mercury_416683", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.51972496509552, "incorrect_loss_raw": 1.3513926267623901, "correct_loss_per_char": 0.75986248254776, "incorrect_loss_per_char": 0.6756963133811951, "correct_loss_per_token": 1.51972496509552, "incorrect_loss_per_token": 1.3513926267623901, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3529072999954224, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3529072999954224, "logits_per_char": -0.6764536499977112, "num_chars": 2}, {"sum_logits": -1.3384021520614624, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.3384021520614624, "logits_per_char": -0.6692010760307312, "num_chars": 2}, {"sum_logits": -1.51972496509552, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.51972496509552, "logits_per_char": -0.75986248254776, "num_chars": 2}, {"sum_logits": -1.3628684282302856, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3628684282302856, "logits_per_char": -0.6814342141151428, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 805, "native_id": "Mercury_7040775", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3032373189926147, "incorrect_loss_raw": 1.4209250609079997, "correct_loss_per_char": 0.6516186594963074, "incorrect_loss_per_char": 0.7104625304539999, "correct_loss_per_token": 1.3032373189926147, "incorrect_loss_per_token": 1.4209250609079997, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3898565769195557, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3898565769195557, "logits_per_char": -0.6949282884597778, "num_chars": 2}, {"sum_logits": -1.3032373189926147, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.3032373189926147, "logits_per_char": -0.6516186594963074, "num_chars": 2}, {"sum_logits": -1.444165587425232, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.444165587425232, "logits_per_char": -0.722082793712616, "num_chars": 2}, {"sum_logits": -1.4287530183792114, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4287530183792114, "logits_per_char": -0.7143765091896057, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 806, "native_id": "Mercury_7222600", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.470831036567688, "incorrect_loss_raw": 1.3658371766408284, "correct_loss_per_char": 0.735415518283844, "incorrect_loss_per_char": 0.6829185883204142, "correct_loss_per_token": 1.470831036567688, "incorrect_loss_per_token": 1.3658371766408284, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3041105270385742, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.3041105270385742, "logits_per_char": -0.6520552635192871, "num_chars": 2}, {"sum_logits": -1.399179458618164, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.399179458618164, "logits_per_char": -0.699589729309082, "num_chars": 2}, {"sum_logits": -1.470831036567688, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.470831036567688, "logits_per_char": -0.735415518283844, "num_chars": 2}, {"sum_logits": -1.394221544265747, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.394221544265747, "logits_per_char": -0.6971107721328735, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 807, "native_id": "MCAS_2001_5_3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4035061597824097, "incorrect_loss_raw": 1.3883891900380452, "correct_loss_per_char": 0.7017530798912048, "incorrect_loss_per_char": 0.6941945950190226, "correct_loss_per_token": 1.4035061597824097, "incorrect_loss_per_token": 1.3883891900380452, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4485642910003662, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4485642910003662, "logits_per_char": -0.7242821455001831, "num_chars": 2}, {"sum_logits": -1.345942497253418, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.345942497253418, "logits_per_char": -0.672971248626709, "num_chars": 2}, {"sum_logits": -1.3706607818603516, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3706607818603516, "logits_per_char": -0.6853303909301758, "num_chars": 2}, {"sum_logits": -1.4035061597824097, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4035061597824097, "logits_per_char": -0.7017530798912048, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 808, "native_id": "MCAS_2004_8_7", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3225818872451782, "incorrect_loss_raw": 1.4158575932184856, "correct_loss_per_char": 0.6612909436225891, "incorrect_loss_per_char": 0.7079287966092428, "correct_loss_per_token": 1.3225818872451782, "incorrect_loss_per_token": 1.4158575932184856, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4386667013168335, "num_tokens": 1, "num_tokens_all": 435, "is_greedy": false, "logits_per_token": -1.4386667013168335, "logits_per_char": -0.7193333506584167, "num_chars": 2}, {"sum_logits": -1.4171128273010254, "num_tokens": 1, "num_tokens_all": 435, "is_greedy": false, "logits_per_token": -1.4171128273010254, "logits_per_char": -0.7085564136505127, "num_chars": 2}, {"sum_logits": -1.3917932510375977, "num_tokens": 1, "num_tokens_all": 435, "is_greedy": false, "logits_per_token": -1.3917932510375977, "logits_per_char": -0.6958966255187988, "num_chars": 2}, {"sum_logits": -1.3225818872451782, "num_tokens": 1, "num_tokens_all": 435, "is_greedy": true, "logits_per_token": -1.3225818872451782, "logits_per_char": -0.6612909436225891, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 809, "native_id": "Mercury_415268", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3379188776016235, "incorrect_loss_raw": 1.4135302702585857, "correct_loss_per_char": 0.6689594388008118, "incorrect_loss_per_char": 0.7067651351292928, "correct_loss_per_token": 1.3379188776016235, "incorrect_loss_per_token": 1.4135302702585857, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3181262016296387, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.3181262016296387, "logits_per_char": -0.6590631008148193, "num_chars": 2}, {"sum_logits": -1.4571478366851807, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4571478366851807, "logits_per_char": -0.7285739183425903, "num_chars": 2}, {"sum_logits": -1.4653167724609375, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4653167724609375, "logits_per_char": -0.7326583862304688, "num_chars": 2}, {"sum_logits": -1.3379188776016235, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.3379188776016235, "logits_per_char": -0.6689594388008118, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 810, "native_id": "Mercury_7017710", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3675979375839233, "incorrect_loss_raw": 1.402260184288025, "correct_loss_per_char": 0.6837989687919617, "incorrect_loss_per_char": 0.7011300921440125, "correct_loss_per_token": 1.3675979375839233, "incorrect_loss_per_token": 1.402260184288025, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3718990087509155, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3718990087509155, "logits_per_char": -0.6859495043754578, "num_chars": 2}, {"sum_logits": -1.5253682136535645, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5253682136535645, "logits_per_char": -0.7626841068267822, "num_chars": 2}, {"sum_logits": -1.3675979375839233, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3675979375839233, "logits_per_char": -0.6837989687919617, "num_chars": 2}, {"sum_logits": -1.3095133304595947, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.3095133304595947, "logits_per_char": -0.6547566652297974, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 811, "native_id": "Mercury_7210123", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.544014811515808, "incorrect_loss_raw": 1.3464802900950115, "correct_loss_per_char": 0.772007405757904, "incorrect_loss_per_char": 0.6732401450475057, "correct_loss_per_token": 1.544014811515808, "incorrect_loss_per_token": 1.3464802900950115, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.544014811515808, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.544014811515808, "logits_per_char": -0.772007405757904, "num_chars": 2}, {"sum_logits": -1.3406926393508911, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3406926393508911, "logits_per_char": -0.6703463196754456, "num_chars": 2}, {"sum_logits": -1.4257580041885376, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4257580041885376, "logits_per_char": -0.7128790020942688, "num_chars": 2}, {"sum_logits": -1.2729902267456055, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2729902267456055, "logits_per_char": -0.6364951133728027, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 812, "native_id": "MCAS_2009_5_6519", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4074128866195679, "incorrect_loss_raw": 1.386293609937032, "correct_loss_per_char": 0.7037064433097839, "incorrect_loss_per_char": 0.693146804968516, "correct_loss_per_token": 1.4074128866195679, "incorrect_loss_per_token": 1.386293609937032, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.421985149383545, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.421985149383545, "logits_per_char": -0.7109925746917725, "num_chars": 2}, {"sum_logits": -1.413708209991455, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.413708209991455, "logits_per_char": -0.7068541049957275, "num_chars": 2}, {"sum_logits": -1.4074128866195679, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4074128866195679, "logits_per_char": -0.7037064433097839, "num_chars": 2}, {"sum_logits": -1.3231874704360962, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.3231874704360962, "logits_per_char": -0.6615937352180481, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 813, "native_id": "Mercury_401502", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1787779331207275, "incorrect_loss_raw": 1.4814838965733845, "correct_loss_per_char": 0.5893889665603638, "incorrect_loss_per_char": 0.7407419482866923, "correct_loss_per_token": 1.1787779331207275, "incorrect_loss_per_token": 1.4814838965733845, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.452343463897705, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.452343463897705, "logits_per_char": -0.7261717319488525, "num_chars": 2}, {"sum_logits": -1.5591262578964233, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.5591262578964233, "logits_per_char": -0.7795631289482117, "num_chars": 2}, {"sum_logits": -1.4329819679260254, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4329819679260254, "logits_per_char": -0.7164909839630127, "num_chars": 2}, {"sum_logits": -1.1787779331207275, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.1787779331207275, "logits_per_char": -0.5893889665603638, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 814, "native_id": "Mercury_7109498", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3254282474517822, "incorrect_loss_raw": 1.4272603193918865, "correct_loss_per_char": 0.6627141237258911, "incorrect_loss_per_char": 0.7136301596959432, "correct_loss_per_token": 1.3254282474517822, "incorrect_loss_per_token": 1.4272603193918865, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6613770723342896, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.6613770723342896, "logits_per_char": -0.8306885361671448, "num_chars": 2}, {"sum_logits": -1.3254282474517822, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.3254282474517822, "logits_per_char": -0.6627141237258911, "num_chars": 2}, {"sum_logits": -1.3740202188491821, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.3740202188491821, "logits_per_char": -0.6870101094245911, "num_chars": 2}, {"sum_logits": -1.2463836669921875, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -1.2463836669921875, "logits_per_char": -0.6231918334960938, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 815, "native_id": "VASoL_2008_5_10", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4013988971710205, "incorrect_loss_raw": 1.3894363244374592, "correct_loss_per_char": 0.7006994485855103, "incorrect_loss_per_char": 0.6947181622187296, "correct_loss_per_token": 1.4013988971710205, "incorrect_loss_per_token": 1.3894363244374592, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4013988971710205, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4013988971710205, "logits_per_char": -0.7006994485855103, "num_chars": 2}, {"sum_logits": -1.3901780843734741, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3901780843734741, "logits_per_char": -0.6950890421867371, "num_chars": 2}, {"sum_logits": -1.3804980516433716, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3804980516433716, "logits_per_char": -0.6902490258216858, "num_chars": 2}, {"sum_logits": -1.3976328372955322, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3976328372955322, "logits_per_char": -0.6988164186477661, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 816, "native_id": "MCAS_2006_9_4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.331146240234375, "incorrect_loss_raw": 1.4169498284657795, "correct_loss_per_char": 0.6655731201171875, "incorrect_loss_per_char": 0.7084749142328898, "correct_loss_per_token": 1.331146240234375, "incorrect_loss_per_token": 1.4169498284657795, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.331146240234375, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.331146240234375, "logits_per_char": -0.6655731201171875, "num_chars": 2}, {"sum_logits": -1.4832435846328735, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.4832435846328735, "logits_per_char": -0.7416217923164368, "num_chars": 2}, {"sum_logits": -1.4843621253967285, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": false, "logits_per_token": -1.4843621253967285, "logits_per_char": -0.7421810626983643, "num_chars": 2}, {"sum_logits": -1.2832437753677368, "num_tokens": 1, "num_tokens_all": 464, "is_greedy": true, "logits_per_token": -1.2832437753677368, "logits_per_char": -0.6416218876838684, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 817, "native_id": "Mercury_402341", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.229331374168396, "incorrect_loss_raw": 1.4548184474309285, "correct_loss_per_char": 0.614665687084198, "incorrect_loss_per_char": 0.7274092237154642, "correct_loss_per_token": 1.229331374168396, "incorrect_loss_per_token": 1.4548184474309285, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4594717025756836, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4594717025756836, "logits_per_char": -0.7297358512878418, "num_chars": 2}, {"sum_logits": -1.5609756708145142, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5609756708145142, "logits_per_char": -0.7804878354072571, "num_chars": 2}, {"sum_logits": -1.344007968902588, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.344007968902588, "logits_per_char": -0.672003984451294, "num_chars": 2}, {"sum_logits": -1.229331374168396, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.229331374168396, "logits_per_char": -0.614665687084198, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 818, "native_id": "MCAS_2006_9_34", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4865847826004028, "incorrect_loss_raw": 1.3601483503977458, "correct_loss_per_char": 0.7432923913002014, "incorrect_loss_per_char": 0.6800741751988729, "correct_loss_per_token": 1.4865847826004028, "incorrect_loss_per_token": 1.3601483503977458, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3351142406463623, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.3351142406463623, "logits_per_char": -0.6675571203231812, "num_chars": 2}, {"sum_logits": -1.4013049602508545, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4013049602508545, "logits_per_char": -0.7006524801254272, "num_chars": 2}, {"sum_logits": -1.3440258502960205, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3440258502960205, "logits_per_char": -0.6720129251480103, "num_chars": 2}, {"sum_logits": -1.4865847826004028, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4865847826004028, "logits_per_char": -0.7432923913002014, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 819, "native_id": "Mercury_7267715", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.423479437828064, "incorrect_loss_raw": 1.382911245028178, "correct_loss_per_char": 0.711739718914032, "incorrect_loss_per_char": 0.691455622514089, "correct_loss_per_token": 1.423479437828064, "incorrect_loss_per_token": 1.382911245028178, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4149574041366577, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.4149574041366577, "logits_per_char": -0.7074787020683289, "num_chars": 2}, {"sum_logits": -1.4553719758987427, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.4553719758987427, "logits_per_char": -0.7276859879493713, "num_chars": 2}, {"sum_logits": -1.423479437828064, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.423479437828064, "logits_per_char": -0.711739718914032, "num_chars": 2}, {"sum_logits": -1.2784043550491333, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": true, "logits_per_token": -1.2784043550491333, "logits_per_char": -0.6392021775245667, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 820, "native_id": "Mercury_SC_413089", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3212213516235352, "incorrect_loss_raw": 1.416411280632019, "correct_loss_per_char": 0.6606106758117676, "incorrect_loss_per_char": 0.7082056403160095, "correct_loss_per_token": 1.3212213516235352, "incorrect_loss_per_token": 1.416411280632019, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3651199340820312, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3651199340820312, "logits_per_char": -0.6825599670410156, "num_chars": 2}, {"sum_logits": -1.4533777236938477, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4533777236938477, "logits_per_char": -0.7266888618469238, "num_chars": 2}, {"sum_logits": -1.4307361841201782, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4307361841201782, "logits_per_char": -0.7153680920600891, "num_chars": 2}, {"sum_logits": -1.3212213516235352, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.3212213516235352, "logits_per_char": -0.6606106758117676, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 821, "native_id": "Mercury_SC_401656", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1799499988555908, "incorrect_loss_raw": 1.4746123552322388, "correct_loss_per_char": 0.5899749994277954, "incorrect_loss_per_char": 0.7373061776161194, "correct_loss_per_token": 1.1799499988555908, "incorrect_loss_per_token": 1.4746123552322388, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1799499988555908, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.1799499988555908, "logits_per_char": -0.5899749994277954, "num_chars": 2}, {"sum_logits": -1.4103386402130127, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4103386402130127, "logits_per_char": -0.7051693201065063, "num_chars": 2}, {"sum_logits": -1.5454336404800415, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5454336404800415, "logits_per_char": -0.7727168202400208, "num_chars": 2}, {"sum_logits": -1.468064785003662, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.468064785003662, "logits_per_char": -0.734032392501831, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 822, "native_id": "Mercury_407019", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2940982580184937, "incorrect_loss_raw": 1.4252185821533203, "correct_loss_per_char": 0.6470491290092468, "incorrect_loss_per_char": 0.7126092910766602, "correct_loss_per_token": 1.2940982580184937, "incorrect_loss_per_token": 1.4252185821533203, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4708839654922485, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4708839654922485, "logits_per_char": -0.7354419827461243, "num_chars": 2}, {"sum_logits": -1.4090461730957031, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4090461730957031, "logits_per_char": -0.7045230865478516, "num_chars": 2}, {"sum_logits": -1.3957256078720093, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3957256078720093, "logits_per_char": -0.6978628039360046, "num_chars": 2}, {"sum_logits": -1.2940982580184937, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.2940982580184937, "logits_per_char": -0.6470491290092468, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 823, "native_id": "Mercury_417128", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3535066843032837, "incorrect_loss_raw": 1.4063631296157837, "correct_loss_per_char": 0.6767533421516418, "incorrect_loss_per_char": 0.7031815648078918, "correct_loss_per_token": 1.3535066843032837, "incorrect_loss_per_token": 1.4063631296157837, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.482754111289978, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.482754111289978, "logits_per_char": -0.741377055644989, "num_chars": 2}, {"sum_logits": -1.3535066843032837, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.3535066843032837, "logits_per_char": -0.6767533421516418, "num_chars": 2}, {"sum_logits": -1.4398109912872314, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": false, "logits_per_token": -1.4398109912872314, "logits_per_char": -0.7199054956436157, "num_chars": 2}, {"sum_logits": -1.2965242862701416, "num_tokens": 1, "num_tokens_all": 389, "is_greedy": true, "logits_per_token": -1.2965242862701416, "logits_per_char": -0.6482621431350708, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 824, "native_id": "Mercury_7081305", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4472360610961914, "incorrect_loss_raw": 1.372973084449768, "correct_loss_per_char": 0.7236180305480957, "incorrect_loss_per_char": 0.686486542224884, "correct_loss_per_token": 1.4472360610961914, "incorrect_loss_per_token": 1.372973084449768, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3139866590499878, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.3139866590499878, "logits_per_char": -0.6569933295249939, "num_chars": 2}, {"sum_logits": -1.4299176931381226, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4299176931381226, "logits_per_char": -0.7149588465690613, "num_chars": 2}, {"sum_logits": -1.3750149011611938, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3750149011611938, "logits_per_char": -0.6875074505805969, "num_chars": 2}, {"sum_logits": -1.4472360610961914, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4472360610961914, "logits_per_char": -0.7236180305480957, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 825, "native_id": "NYSEDREGENTS_2015_8_3", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4711263179779053, "incorrect_loss_raw": 1.3652077913284302, "correct_loss_per_char": 0.7355631589889526, "incorrect_loss_per_char": 0.6826038956642151, "correct_loss_per_token": 1.4711263179779053, "incorrect_loss_per_token": 1.3652077913284302, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4089634418487549, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4089634418487549, "logits_per_char": -0.7044817209243774, "num_chars": 2}, {"sum_logits": -1.4711263179779053, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4711263179779053, "logits_per_char": -0.7355631589889526, "num_chars": 2}, {"sum_logits": -1.3823946714401245, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3823946714401245, "logits_per_char": -0.6911973357200623, "num_chars": 2}, {"sum_logits": -1.3042652606964111, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.3042652606964111, "logits_per_char": -0.6521326303482056, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 826, "native_id": "MEA_2016_8_15", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5176376104354858, "incorrect_loss_raw": 1.3535643418629963, "correct_loss_per_char": 0.7588188052177429, "incorrect_loss_per_char": 0.6767821709314982, "correct_loss_per_token": 1.5176376104354858, "incorrect_loss_per_token": 1.3535643418629963, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5176376104354858, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.5176376104354858, "logits_per_char": -0.7588188052177429, "num_chars": 2}, {"sum_logits": -1.3663843870162964, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.3663843870162964, "logits_per_char": -0.6831921935081482, "num_chars": 2}, {"sum_logits": -1.4045194387435913, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.4045194387435913, "logits_per_char": -0.7022597193717957, "num_chars": 2}, {"sum_logits": -1.2897891998291016, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.2897891998291016, "logits_per_char": -0.6448945999145508, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 827, "native_id": "ACTAAP_2015_7_9", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3741486072540283, "incorrect_loss_raw": 1.405986984570821, "correct_loss_per_char": 0.6870743036270142, "incorrect_loss_per_char": 0.7029934922854105, "correct_loss_per_token": 1.3741486072540283, "incorrect_loss_per_token": 1.405986984570821, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4822944402694702, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4822944402694702, "logits_per_char": -0.7411472201347351, "num_chars": 2}, {"sum_logits": -1.4828094244003296, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4828094244003296, "logits_per_char": -0.7414047122001648, "num_chars": 2}, {"sum_logits": -1.2528570890426636, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.2528570890426636, "logits_per_char": -0.6264285445213318, "num_chars": 2}, {"sum_logits": -1.3741486072540283, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3741486072540283, "logits_per_char": -0.6870743036270142, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 828, "native_id": "Mercury_7216423", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4099757671356201, "incorrect_loss_raw": 1.3869221607844036, "correct_loss_per_char": 0.7049878835678101, "incorrect_loss_per_char": 0.6934610803922018, "correct_loss_per_token": 1.4099757671356201, "incorrect_loss_per_token": 1.3869221607844036, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.445304274559021, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.445304274559021, "logits_per_char": -0.7226521372795105, "num_chars": 2}, {"sum_logits": -1.4099757671356201, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4099757671356201, "logits_per_char": -0.7049878835678101, "num_chars": 2}, {"sum_logits": -1.415061593055725, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.415061593055725, "logits_per_char": -0.7075307965278625, "num_chars": 2}, {"sum_logits": -1.3004006147384644, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.3004006147384644, "logits_per_char": -0.6502003073692322, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 829, "native_id": "Mercury_416633", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2813892364501953, "incorrect_loss_raw": 1.4366843700408936, "correct_loss_per_char": 0.6406946182250977, "incorrect_loss_per_char": 0.7183421850204468, "correct_loss_per_token": 1.2813892364501953, "incorrect_loss_per_token": 1.4366843700408936, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2813892364501953, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2813892364501953, "logits_per_char": -0.6406946182250977, "num_chars": 2}, {"sum_logits": -1.409446358680725, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.409446358680725, "logits_per_char": -0.7047231793403625, "num_chars": 2}, {"sum_logits": -1.5563145875930786, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5563145875930786, "logits_per_char": -0.7781572937965393, "num_chars": 2}, {"sum_logits": -1.344292163848877, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.344292163848877, "logits_per_char": -0.6721460819244385, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 830, "native_id": "Mercury_7038518", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4896577596664429, "incorrect_loss_raw": 1.3788752555847168, "correct_loss_per_char": 0.7448288798332214, "incorrect_loss_per_char": 0.6894376277923584, "correct_loss_per_token": 1.4896577596664429, "incorrect_loss_per_token": 1.3788752555847168, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1525349617004395, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.1525349617004395, "logits_per_char": -0.5762674808502197, "num_chars": 2}, {"sum_logits": -1.4130610227584839, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4130610227584839, "logits_per_char": -0.7065305113792419, "num_chars": 2}, {"sum_logits": -1.571029782295227, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.571029782295227, "logits_per_char": -0.7855148911476135, "num_chars": 2}, {"sum_logits": -1.4896577596664429, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4896577596664429, "logits_per_char": -0.7448288798332214, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 831, "native_id": "Mercury_7085225", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.25620436668396, "incorrect_loss_raw": 1.4439818461736043, "correct_loss_per_char": 0.62810218334198, "incorrect_loss_per_char": 0.7219909230868021, "correct_loss_per_token": 1.25620436668396, "incorrect_loss_per_token": 1.4439818461736043, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.345990538597107, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.345990538597107, "logits_per_char": -0.6729952692985535, "num_chars": 2}, {"sum_logits": -1.25620436668396, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.25620436668396, "logits_per_char": -0.62810218334198, "num_chars": 2}, {"sum_logits": -1.420986533164978, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.420986533164978, "logits_per_char": -0.710493266582489, "num_chars": 2}, {"sum_logits": -1.564968466758728, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.564968466758728, "logits_per_char": -0.782484233379364, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 832, "native_id": "LEAP__4_10225", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4730178117752075, "incorrect_loss_raw": 1.3692597945531209, "correct_loss_per_char": 0.7365089058876038, "incorrect_loss_per_char": 0.6846298972765604, "correct_loss_per_token": 1.4730178117752075, "incorrect_loss_per_token": 1.3692597945531209, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3012018203735352, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3012018203735352, "logits_per_char": -0.6506009101867676, "num_chars": 2}, {"sum_logits": -1.4730178117752075, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4730178117752075, "logits_per_char": -0.7365089058876038, "num_chars": 2}, {"sum_logits": -1.5063976049423218, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.5063976049423218, "logits_per_char": -0.7531988024711609, "num_chars": 2}, {"sum_logits": -1.3001799583435059, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.3001799583435059, "logits_per_char": -0.6500899791717529, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 833, "native_id": "Mercury_SC_401661", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3279730081558228, "incorrect_loss_raw": 1.416178027788798, "correct_loss_per_char": 0.6639865040779114, "incorrect_loss_per_char": 0.708089013894399, "correct_loss_per_token": 1.3279730081558228, "incorrect_loss_per_token": 1.416178027788798, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.34976327419281, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.34976327419281, "logits_per_char": -0.674881637096405, "num_chars": 2}, {"sum_logits": -1.3927360773086548, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3927360773086548, "logits_per_char": -0.6963680386543274, "num_chars": 2}, {"sum_logits": -1.5060347318649292, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5060347318649292, "logits_per_char": -0.7530173659324646, "num_chars": 2}, {"sum_logits": -1.3279730081558228, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.3279730081558228, "logits_per_char": -0.6639865040779114, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 834, "native_id": "TIMSS_1995_8_Q15", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3687057495117188, "incorrect_loss_raw": 1.4006990591684978, "correct_loss_per_char": 0.6843528747558594, "incorrect_loss_per_char": 0.7003495295842489, "correct_loss_per_token": 1.3687057495117188, "incorrect_loss_per_token": 1.4006990591684978, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3687057495117188, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3687057495117188, "logits_per_char": -0.6843528747558594, "num_chars": 2}, {"sum_logits": -1.4118461608886719, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4118461608886719, "logits_per_char": -0.7059230804443359, "num_chars": 2}, {"sum_logits": -1.319529414176941, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.319529414176941, "logits_per_char": -0.6597647070884705, "num_chars": 2}, {"sum_logits": -1.4707216024398804, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4707216024398804, "logits_per_char": -0.7353608012199402, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 835, "native_id": "MCAS_1999_4_23", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4367040395736694, "incorrect_loss_raw": 1.3814266920089722, "correct_loss_per_char": 0.7183520197868347, "incorrect_loss_per_char": 0.6907133460044861, "correct_loss_per_token": 1.4367040395736694, "incorrect_loss_per_token": 1.3814266920089722, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4367040395736694, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4367040395736694, "logits_per_char": -0.7183520197868347, "num_chars": 2}, {"sum_logits": -1.5246789455413818, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.5246789455413818, "logits_per_char": -0.7623394727706909, "num_chars": 2}, {"sum_logits": -1.3765755891799927, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3765755891799927, "logits_per_char": -0.6882877945899963, "num_chars": 2}, {"sum_logits": -1.243025541305542, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.243025541305542, "logits_per_char": -0.621512770652771, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 836, "native_id": "TIMSS_1995_8_J7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7589308023452759, "incorrect_loss_raw": 1.5874653458595276, "correct_loss_per_char": 0.8794654011726379, "incorrect_loss_per_char": 0.7937326729297638, "correct_loss_per_token": 1.7589308023452759, "incorrect_loss_per_token": 1.5874653458595276, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5724300146102905, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.5724300146102905, "logits_per_char": -0.7862150073051453, "num_chars": 2}, {"sum_logits": -1.5446749925613403, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": true, "logits_per_token": -1.5446749925613403, "logits_per_char": -0.7723374962806702, "num_chars": 2}, {"sum_logits": -1.583896517753601, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.583896517753601, "logits_per_char": -0.7919482588768005, "num_chars": 2}, {"sum_logits": -1.7589308023452759, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.7589308023452759, "logits_per_char": -0.8794654011726379, "num_chars": 2}, {"sum_logits": -1.6488598585128784, "num_tokens": 1, "num_tokens_all": 411, "is_greedy": false, "logits_per_token": -1.6488598585128784, "logits_per_char": -0.8244299292564392, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 837, "native_id": "Mercury_SC_LBS10018", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1361948251724243, "incorrect_loss_raw": 1.4955709377924602, "correct_loss_per_char": 0.5680974125862122, "incorrect_loss_per_char": 0.7477854688962301, "correct_loss_per_token": 1.1361948251724243, "incorrect_loss_per_token": 1.4955709377924602, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1361948251724243, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.1361948251724243, "logits_per_char": -0.5680974125862122, "num_chars": 2}, {"sum_logits": -1.4382352828979492, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4382352828979492, "logits_per_char": -0.7191176414489746, "num_chars": 2}, {"sum_logits": -1.5833253860473633, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.5833253860473633, "logits_per_char": -0.7916626930236816, "num_chars": 2}, {"sum_logits": -1.4651521444320679, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4651521444320679, "logits_per_char": -0.7325760722160339, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 838, "native_id": "Mercury_SC_406855", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2462904453277588, "incorrect_loss_raw": 1.4447522163391113, "correct_loss_per_char": 0.6231452226638794, "incorrect_loss_per_char": 0.7223761081695557, "correct_loss_per_token": 1.2462904453277588, "incorrect_loss_per_token": 1.4447522163391113, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5038899183273315, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.5038899183273315, "logits_per_char": -0.7519449591636658, "num_chars": 2}, {"sum_logits": -1.4754469394683838, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4754469394683838, "logits_per_char": -0.7377234697341919, "num_chars": 2}, {"sum_logits": -1.3549197912216187, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3549197912216187, "logits_per_char": -0.6774598956108093, "num_chars": 2}, {"sum_logits": -1.2462904453277588, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.2462904453277588, "logits_per_char": -0.6231452226638794, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 839, "native_id": "Mercury_SC_415457", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1978228092193604, "incorrect_loss_raw": 1.4730151096979778, "correct_loss_per_char": 0.5989114046096802, "incorrect_loss_per_char": 0.7365075548489889, "correct_loss_per_token": 1.1978228092193604, "incorrect_loss_per_token": 1.4730151096979778, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5771880149841309, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.5771880149841309, "logits_per_char": -0.7885940074920654, "num_chars": 2}, {"sum_logits": -1.544144868850708, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.544144868850708, "logits_per_char": -0.772072434425354, "num_chars": 2}, {"sum_logits": -1.2977124452590942, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.2977124452590942, "logits_per_char": -0.6488562226295471, "num_chars": 2}, {"sum_logits": -1.1978228092193604, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.1978228092193604, "logits_per_char": -0.5989114046096802, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 840, "native_id": "NYSEDREGENTS_2015_4_25", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4159166812896729, "incorrect_loss_raw": 1.3838448127110798, "correct_loss_per_char": 0.7079583406448364, "incorrect_loss_per_char": 0.6919224063555399, "correct_loss_per_token": 1.4159166812896729, "incorrect_loss_per_token": 1.3838448127110798, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4980477094650269, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4980477094650269, "logits_per_char": -0.7490238547325134, "num_chars": 2}, {"sum_logits": -1.290711522102356, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.290711522102356, "logits_per_char": -0.645355761051178, "num_chars": 2}, {"sum_logits": -1.4159166812896729, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4159166812896729, "logits_per_char": -0.7079583406448364, "num_chars": 2}, {"sum_logits": -1.362775206565857, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.362775206565857, "logits_per_char": -0.6813876032829285, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 841, "native_id": "Mercury_7058135", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4892079830169678, "incorrect_loss_raw": 1.3621836105982463, "correct_loss_per_char": 0.7446039915084839, "incorrect_loss_per_char": 0.6810918052991232, "correct_loss_per_token": 1.4892079830169678, "incorrect_loss_per_token": 1.3621836105982463, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3061621189117432, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.3061621189117432, "logits_per_char": -0.6530810594558716, "num_chars": 2}, {"sum_logits": -1.4892079830169678, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4892079830169678, "logits_per_char": -0.7446039915084839, "num_chars": 2}, {"sum_logits": -1.4620068073272705, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4620068073272705, "logits_per_char": -0.7310034036636353, "num_chars": 2}, {"sum_logits": -1.318381905555725, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.318381905555725, "logits_per_char": -0.6591909527778625, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 842, "native_id": "MDSA_2008_4_19", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3862204551696777, "incorrect_loss_raw": 1.3931161165237427, "correct_loss_per_char": 0.6931102275848389, "incorrect_loss_per_char": 0.6965580582618713, "correct_loss_per_token": 1.3862204551696777, "incorrect_loss_per_token": 1.3931161165237427, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.325559377670288, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.325559377670288, "logits_per_char": -0.662779688835144, "num_chars": 2}, {"sum_logits": -1.3862204551696777, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3862204551696777, "logits_per_char": -0.6931102275848389, "num_chars": 2}, {"sum_logits": -1.4486286640167236, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4486286640167236, "logits_per_char": -0.7243143320083618, "num_chars": 2}, {"sum_logits": -1.4051603078842163, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4051603078842163, "logits_per_char": -0.7025801539421082, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 843, "native_id": "AKDE&ED_2008_8_45", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2192729711532593, "incorrect_loss_raw": 1.4580661455790203, "correct_loss_per_char": 0.6096364855766296, "incorrect_loss_per_char": 0.7290330727895101, "correct_loss_per_token": 1.2192729711532593, "incorrect_loss_per_token": 1.4580661455790203, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6020139455795288, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.6020139455795288, "logits_per_char": -0.8010069727897644, "num_chars": 2}, {"sum_logits": -1.401732325553894, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.401732325553894, "logits_per_char": -0.700866162776947, "num_chars": 2}, {"sum_logits": -1.3704521656036377, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3704521656036377, "logits_per_char": -0.6852260828018188, "num_chars": 2}, {"sum_logits": -1.2192729711532593, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.2192729711532593, "logits_per_char": -0.6096364855766296, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 844, "native_id": "Mercury_7131758", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4495049715042114, "incorrect_loss_raw": 1.375787337621053, "correct_loss_per_char": 0.7247524857521057, "incorrect_loss_per_char": 0.6878936688105265, "correct_loss_per_token": 1.4495049715042114, "incorrect_loss_per_token": 1.375787337621053, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4495049715042114, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4495049715042114, "logits_per_char": -0.7247524857521057, "num_chars": 2}, {"sum_logits": -1.3712419271469116, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3712419271469116, "logits_per_char": -0.6856209635734558, "num_chars": 2}, {"sum_logits": -1.480562448501587, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.480562448501587, "logits_per_char": -0.7402812242507935, "num_chars": 2}, {"sum_logits": -1.2755576372146606, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.2755576372146606, "logits_per_char": -0.6377788186073303, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 845, "native_id": "NYSEDREGENTS_2013_8_10", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3566782474517822, "incorrect_loss_raw": 1.4071188767751057, "correct_loss_per_char": 0.6783391237258911, "incorrect_loss_per_char": 0.7035594383875529, "correct_loss_per_token": 1.3566782474517822, "incorrect_loss_per_token": 1.4071188767751057, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5316134691238403, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5316134691238403, "logits_per_char": -0.7658067345619202, "num_chars": 2}, {"sum_logits": -1.3566782474517822, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3566782474517822, "logits_per_char": -0.6783391237258911, "num_chars": 2}, {"sum_logits": -1.3785899877548218, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3785899877548218, "logits_per_char": -0.6892949938774109, "num_chars": 2}, {"sum_logits": -1.3111531734466553, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.3111531734466553, "logits_per_char": -0.6555765867233276, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 846, "native_id": "Mercury_SC_401783", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3466503620147705, "incorrect_loss_raw": 1.4101175864537556, "correct_loss_per_char": 0.6733251810073853, "incorrect_loss_per_char": 0.7050587932268778, "correct_loss_per_token": 1.3466503620147705, "incorrect_loss_per_token": 1.4101175864537556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3140430450439453, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.3140430450439453, "logits_per_char": -0.6570215225219727, "num_chars": 2}, {"sum_logits": -1.5408577919006348, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.5408577919006348, "logits_per_char": -0.7704288959503174, "num_chars": 2}, {"sum_logits": -1.375451922416687, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.375451922416687, "logits_per_char": -0.6877259612083435, "num_chars": 2}, {"sum_logits": -1.3466503620147705, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3466503620147705, "logits_per_char": -0.6733251810073853, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 847, "native_id": "Mercury_7190120", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3826627731323242, "incorrect_loss_raw": 1.3978275855382283, "correct_loss_per_char": 0.6913313865661621, "incorrect_loss_per_char": 0.6989137927691141, "correct_loss_per_token": 1.3826627731323242, "incorrect_loss_per_token": 1.3978275855382283, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4150866270065308, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4150866270065308, "logits_per_char": -0.7075433135032654, "num_chars": 2}, {"sum_logits": -1.3826627731323242, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.3826627731323242, "logits_per_char": -0.6913313865661621, "num_chars": 2}, {"sum_logits": -1.5163698196411133, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.5163698196411133, "logits_per_char": -0.7581849098205566, "num_chars": 2}, {"sum_logits": -1.262026309967041, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.262026309967041, "logits_per_char": -0.6310131549835205, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 848, "native_id": "Mercury_409317", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3661834001541138, "incorrect_loss_raw": 1.3989376227060955, "correct_loss_per_char": 0.6830917000770569, "incorrect_loss_per_char": 0.6994688113530477, "correct_loss_per_token": 1.3661834001541138, "incorrect_loss_per_token": 1.3989376227060955, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.416712760925293, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.416712760925293, "logits_per_char": -0.7083563804626465, "num_chars": 2}, {"sum_logits": -1.3661834001541138, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -1.3661834001541138, "logits_per_char": -0.6830917000770569, "num_chars": 2}, {"sum_logits": -1.397059679031372, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.397059679031372, "logits_per_char": -0.698529839515686, "num_chars": 2}, {"sum_logits": -1.383040428161621, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.383040428161621, "logits_per_char": -0.6915202140808105, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 849, "native_id": "Mercury_7268240", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2448077201843262, "incorrect_loss_raw": 1.4479318459828694, "correct_loss_per_char": 0.6224038600921631, "incorrect_loss_per_char": 0.7239659229914347, "correct_loss_per_token": 1.2448077201843262, "incorrect_loss_per_token": 1.4479318459828694, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5488723516464233, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.5488723516464233, "logits_per_char": -0.7744361758232117, "num_chars": 2}, {"sum_logits": -1.3642841577529907, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3642841577529907, "logits_per_char": -0.6821420788764954, "num_chars": 2}, {"sum_logits": -1.4306390285491943, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4306390285491943, "logits_per_char": -0.7153195142745972, "num_chars": 2}, {"sum_logits": -1.2448077201843262, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.2448077201843262, "logits_per_char": -0.6224038600921631, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 850, "native_id": "Mercury_7228358", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1701403856277466, "incorrect_loss_raw": 1.4766827821731567, "correct_loss_per_char": 0.5850701928138733, "incorrect_loss_per_char": 0.7383413910865784, "correct_loss_per_token": 1.1701403856277466, "incorrect_loss_per_token": 1.4766827821731567, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5274720191955566, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.5274720191955566, "logits_per_char": -0.7637360095977783, "num_chars": 2}, {"sum_logits": -1.4413576126098633, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.4413576126098633, "logits_per_char": -0.7206788063049316, "num_chars": 2}, {"sum_logits": -1.4612187147140503, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.4612187147140503, "logits_per_char": -0.7306093573570251, "num_chars": 2}, {"sum_logits": -1.1701403856277466, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.1701403856277466, "logits_per_char": -0.5850701928138733, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 851, "native_id": "MCAS_2004_5_33", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3634490966796875, "incorrect_loss_raw": 1.4096215565999348, "correct_loss_per_char": 0.6817245483398438, "incorrect_loss_per_char": 0.7048107782999674, "correct_loss_per_token": 1.3634490966796875, "incorrect_loss_per_token": 1.4096215565999348, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.593458890914917, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.593458890914917, "logits_per_char": -0.7967294454574585, "num_chars": 2}, {"sum_logits": -1.3916738033294678, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.3916738033294678, "logits_per_char": -0.6958369016647339, "num_chars": 2}, {"sum_logits": -1.3634490966796875, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": false, "logits_per_token": -1.3634490966796875, "logits_per_char": -0.6817245483398438, "num_chars": 2}, {"sum_logits": -1.24373197555542, "num_tokens": 1, "num_tokens_all": 404, "is_greedy": true, "logits_per_token": -1.24373197555542, "logits_per_char": -0.62186598777771, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 852, "native_id": "Mercury_7008855", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.383129358291626, "incorrect_loss_raw": 1.3962695598602295, "correct_loss_per_char": 0.691564679145813, "incorrect_loss_per_char": 0.6981347799301147, "correct_loss_per_token": 1.383129358291626, "incorrect_loss_per_token": 1.3962695598602295, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3479053974151611, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.3479053974151611, "logits_per_char": -0.6739526987075806, "num_chars": 2}, {"sum_logits": -1.383129358291626, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.383129358291626, "logits_per_char": -0.691564679145813, "num_chars": 2}, {"sum_logits": -1.4044190645217896, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4044190645217896, "logits_per_char": -0.7022095322608948, "num_chars": 2}, {"sum_logits": -1.4364842176437378, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4364842176437378, "logits_per_char": -0.7182421088218689, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 853, "native_id": "Mercury_7057085", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4077911376953125, "incorrect_loss_raw": 1.384860912958781, "correct_loss_per_char": 0.7038955688476562, "incorrect_loss_per_char": 0.6924304564793905, "correct_loss_per_token": 1.4077911376953125, "incorrect_loss_per_token": 1.384860912958781, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3696686029434204, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.3696686029434204, "logits_per_char": -0.6848343014717102, "num_chars": 2}, {"sum_logits": -1.4077911376953125, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4077911376953125, "logits_per_char": -0.7038955688476562, "num_chars": 2}, {"sum_logits": -1.4112988710403442, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4112988710403442, "logits_per_char": -0.7056494355201721, "num_chars": 2}, {"sum_logits": -1.3736152648925781, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3736152648925781, "logits_per_char": -0.6868076324462891, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 854, "native_id": "Mercury_7171728", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.444560170173645, "incorrect_loss_raw": 1.3770908912022908, "correct_loss_per_char": 0.7222800850868225, "incorrect_loss_per_char": 0.6885454456011454, "correct_loss_per_token": 1.444560170173645, "incorrect_loss_per_token": 1.3770908912022908, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.464246392250061, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.464246392250061, "logits_per_char": -0.7321231961250305, "num_chars": 2}, {"sum_logits": -1.444560170173645, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.444560170173645, "logits_per_char": -0.7222800850868225, "num_chars": 2}, {"sum_logits": -1.380219578742981, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.380219578742981, "logits_per_char": -0.6901097893714905, "num_chars": 2}, {"sum_logits": -1.2868067026138306, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.2868067026138306, "logits_per_char": -0.6434033513069153, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 855, "native_id": "NAEP_2005_4_S14+3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5359641313552856, "incorrect_loss_raw": 1.3583892981211345, "correct_loss_per_char": 0.7679820656776428, "incorrect_loss_per_char": 0.6791946490605673, "correct_loss_per_token": 1.5359641313552856, "incorrect_loss_per_token": 1.3583892981211345, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4584358930587769, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": false, "logits_per_token": -1.4584358930587769, "logits_per_char": -0.7292179465293884, "num_chars": 2}, {"sum_logits": -1.1515940427780151, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": true, "logits_per_token": -1.1515940427780151, "logits_per_char": -0.5757970213890076, "num_chars": 2}, {"sum_logits": -1.5359641313552856, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": false, "logits_per_token": -1.5359641313552856, "logits_per_char": -0.7679820656776428, "num_chars": 2}, {"sum_logits": -1.4651379585266113, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": false, "logits_per_token": -1.4651379585266113, "logits_per_char": -0.7325689792633057, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 856, "native_id": "Mercury_7024395", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4889570474624634, "incorrect_loss_raw": 1.3669980367024739, "correct_loss_per_char": 0.7444785237312317, "incorrect_loss_per_char": 0.6834990183512369, "correct_loss_per_token": 1.4889570474624634, "incorrect_loss_per_token": 1.3669980367024739, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4889570474624634, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.4889570474624634, "logits_per_char": -0.7444785237312317, "num_chars": 2}, {"sum_logits": -1.4177188873291016, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.4177188873291016, "logits_per_char": -0.7088594436645508, "num_chars": 2}, {"sum_logits": -1.4747575521469116, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.4747575521469116, "logits_per_char": -0.7373787760734558, "num_chars": 2}, {"sum_logits": -1.2085176706314087, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": true, "logits_per_token": -1.2085176706314087, "logits_per_char": -0.6042588353157043, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 857, "native_id": "NYSEDREGENTS_2012_8_28", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4324190616607666, "incorrect_loss_raw": 1.3774433930714924, "correct_loss_per_char": 0.7162095308303833, "incorrect_loss_per_char": 0.6887216965357462, "correct_loss_per_token": 1.4324190616607666, "incorrect_loss_per_token": 1.3774433930714924, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2986379861831665, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.2986379861831665, "logits_per_char": -0.6493189930915833, "num_chars": 2}, {"sum_logits": -1.4148787260055542, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4148787260055542, "logits_per_char": -0.7074393630027771, "num_chars": 2}, {"sum_logits": -1.4324190616607666, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4324190616607666, "logits_per_char": -0.7162095308303833, "num_chars": 2}, {"sum_logits": -1.4188134670257568, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4188134670257568, "logits_per_char": -0.7094067335128784, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 858, "native_id": "Mercury_7090790", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3231600522994995, "incorrect_loss_raw": 1.4144107103347778, "correct_loss_per_char": 0.6615800261497498, "incorrect_loss_per_char": 0.7072053551673889, "correct_loss_per_token": 1.3231600522994995, "incorrect_loss_per_token": 1.4144107103347778, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3231600522994995, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.3231600522994995, "logits_per_char": -0.6615800261497498, "num_chars": 2}, {"sum_logits": -1.3789066076278687, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3789066076278687, "logits_per_char": -0.6894533038139343, "num_chars": 2}, {"sum_logits": -1.4436242580413818, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4436242580413818, "logits_per_char": -0.7218121290206909, "num_chars": 2}, {"sum_logits": -1.420701265335083, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.420701265335083, "logits_per_char": -0.7103506326675415, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 859, "native_id": "TIMSS_2003_8_pg87", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3797094821929932, "incorrect_loss_raw": 1.3934680620829265, "correct_loss_per_char": 0.6898547410964966, "incorrect_loss_per_char": 0.6967340310414633, "correct_loss_per_token": 1.3797094821929932, "incorrect_loss_per_token": 1.3934680620829265, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.400918960571289, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.400918960571289, "logits_per_char": -0.7004594802856445, "num_chars": 2}, {"sum_logits": -1.3797094821929932, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3797094821929932, "logits_per_char": -0.6898547410964966, "num_chars": 2}, {"sum_logits": -1.4154515266418457, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4154515266418457, "logits_per_char": -0.7077257633209229, "num_chars": 2}, {"sum_logits": -1.3640336990356445, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.3640336990356445, "logits_per_char": -0.6820168495178223, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 860, "native_id": "Mercury_SC_407382", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3858680725097656, "incorrect_loss_raw": 1.399903655052185, "correct_loss_per_char": 0.6929340362548828, "incorrect_loss_per_char": 0.6999518275260925, "correct_loss_per_token": 1.3858680725097656, "incorrect_loss_per_token": 1.399903655052185, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.442636489868164, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.442636489868164, "logits_per_char": -0.721318244934082, "num_chars": 2}, {"sum_logits": -1.5228652954101562, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.5228652954101562, "logits_per_char": -0.7614326477050781, "num_chars": 2}, {"sum_logits": -1.3858680725097656, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3858680725097656, "logits_per_char": -0.6929340362548828, "num_chars": 2}, {"sum_logits": -1.2342091798782349, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.2342091798782349, "logits_per_char": -0.6171045899391174, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 861, "native_id": "MDSA_2010_4_20", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3948612213134766, "incorrect_loss_raw": 1.3912601073582966, "correct_loss_per_char": 0.6974306106567383, "incorrect_loss_per_char": 0.6956300536791483, "correct_loss_per_token": 1.3948612213134766, "incorrect_loss_per_token": 1.3912601073582966, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.30635666847229, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.30635666847229, "logits_per_char": -0.653178334236145, "num_chars": 2}, {"sum_logits": -1.3948612213134766, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3948612213134766, "logits_per_char": -0.6974306106567383, "num_chars": 2}, {"sum_logits": -1.4205310344696045, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4205310344696045, "logits_per_char": -0.7102655172348022, "num_chars": 2}, {"sum_logits": -1.4468926191329956, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4468926191329956, "logits_per_char": -0.7234463095664978, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 862, "native_id": "Mercury_SC_405019", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4198780059814453, "incorrect_loss_raw": 1.3828675349553425, "correct_loss_per_char": 0.7099390029907227, "incorrect_loss_per_char": 0.6914337674776713, "correct_loss_per_token": 1.4198780059814453, "incorrect_loss_per_token": 1.3828675349553425, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.395261287689209, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.395261287689209, "logits_per_char": -0.6976306438446045, "num_chars": 2}, {"sum_logits": -1.3124784231185913, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": true, "logits_per_token": -1.3124784231185913, "logits_per_char": -0.6562392115592957, "num_chars": 2}, {"sum_logits": -1.4408628940582275, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.4408628940582275, "logits_per_char": -0.7204314470291138, "num_chars": 2}, {"sum_logits": -1.4198780059814453, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.4198780059814453, "logits_per_char": -0.7099390029907227, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 863, "native_id": "Mercury_7123078", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.39339280128479, "incorrect_loss_raw": 1.3959096272786458, "correct_loss_per_char": 0.696696400642395, "incorrect_loss_per_char": 0.6979548136393229, "correct_loss_per_token": 1.39339280128479, "incorrect_loss_per_token": 1.3959096272786458, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2512714862823486, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.2512714862823486, "logits_per_char": -0.6256357431411743, "num_chars": 2}, {"sum_logits": -1.4038643836975098, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4038643836975098, "logits_per_char": -0.7019321918487549, "num_chars": 2}, {"sum_logits": -1.532593011856079, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.532593011856079, "logits_per_char": -0.7662965059280396, "num_chars": 2}, {"sum_logits": -1.39339280128479, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.39339280128479, "logits_per_char": -0.696696400642395, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 864, "native_id": "Mercury_400084", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2974703311920166, "incorrect_loss_raw": 1.4319719076156616, "correct_loss_per_char": 0.6487351655960083, "incorrect_loss_per_char": 0.7159859538078308, "correct_loss_per_token": 1.2974703311920166, "incorrect_loss_per_token": 1.4319719076156616, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6098564863204956, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.6098564863204956, "logits_per_char": -0.8049282431602478, "num_chars": 2}, {"sum_logits": -1.307577133178711, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.307577133178711, "logits_per_char": -0.6537885665893555, "num_chars": 2}, {"sum_logits": -1.3784821033477783, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.3784821033477783, "logits_per_char": -0.6892410516738892, "num_chars": 2}, {"sum_logits": -1.2974703311920166, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.2974703311920166, "logits_per_char": -0.6487351655960083, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 865, "native_id": "Mercury_7139650", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3733116388320923, "incorrect_loss_raw": 1.4117855628331502, "correct_loss_per_char": 0.6866558194160461, "incorrect_loss_per_char": 0.7058927814165751, "correct_loss_per_token": 1.3733116388320923, "incorrect_loss_per_token": 1.4117855628331502, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5504660606384277, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.5504660606384277, "logits_per_char": -0.7752330303192139, "num_chars": 2}, {"sum_logits": -1.516472578048706, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.516472578048706, "logits_per_char": -0.758236289024353, "num_chars": 2}, {"sum_logits": -1.3733116388320923, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3733116388320923, "logits_per_char": -0.6866558194160461, "num_chars": 2}, {"sum_logits": -1.168418049812317, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.168418049812317, "logits_per_char": -0.5842090249061584, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 866, "native_id": "Mercury_417150", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.344307541847229, "incorrect_loss_raw": 1.580588936805725, "correct_loss_per_char": 0.6721537709236145, "incorrect_loss_per_char": 0.7902944684028625, "correct_loss_per_token": 1.344307541847229, "incorrect_loss_per_token": 1.580588936805725, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0374960899353027, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.0374960899353027, "logits_per_char": -0.5187480449676514, "num_chars": 2}, {"sum_logits": -1.344307541847229, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.344307541847229, "logits_per_char": -0.6721537709236145, "num_chars": 2}, {"sum_logits": -1.5880011320114136, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.5880011320114136, "logits_per_char": -0.7940005660057068, "num_chars": 2}, {"sum_logits": -2.116269588470459, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -2.116269588470459, "logits_per_char": -1.0581347942352295, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 867, "native_id": "Mercury_SC_402256", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3186959028244019, "incorrect_loss_raw": 1.4327892065048218, "correct_loss_per_char": 0.6593479514122009, "incorrect_loss_per_char": 0.7163946032524109, "correct_loss_per_token": 1.3186959028244019, "incorrect_loss_per_token": 1.4327892065048218, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6277828216552734, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.6277828216552734, "logits_per_char": -0.8138914108276367, "num_chars": 2}, {"sum_logits": -1.4757602214813232, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4757602214813232, "logits_per_char": -0.7378801107406616, "num_chars": 2}, {"sum_logits": -1.3186959028244019, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3186959028244019, "logits_per_char": -0.6593479514122009, "num_chars": 2}, {"sum_logits": -1.1948245763778687, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.1948245763778687, "logits_per_char": -0.5974122881889343, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 868, "native_id": "TIMSS_2007_8_pg53", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7449527978897095, "incorrect_loss_raw": 1.590519279241562, "correct_loss_per_char": 0.8724763989448547, "incorrect_loss_per_char": 0.795259639620781, "correct_loss_per_token": 1.7449527978897095, "incorrect_loss_per_token": 1.590519279241562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7449527978897095, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.7449527978897095, "logits_per_char": -0.8724763989448547, "num_chars": 2}, {"sum_logits": -1.6775654554367065, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.6775654554367065, "logits_per_char": -0.8387827277183533, "num_chars": 2}, {"sum_logits": -1.5869580507278442, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.5869580507278442, "logits_per_char": -0.7934790253639221, "num_chars": 2}, {"sum_logits": -1.672367811203003, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.672367811203003, "logits_per_char": -0.8361839056015015, "num_chars": 2}, {"sum_logits": -1.4251857995986938, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.4251857995986938, "logits_per_char": -0.7125928997993469, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 869, "native_id": "MCAS_2006_9_17-v1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3705565929412842, "incorrect_loss_raw": 1.3990706205368042, "correct_loss_per_char": 0.6852782964706421, "incorrect_loss_per_char": 0.6995353102684021, "correct_loss_per_token": 1.3705565929412842, "incorrect_loss_per_token": 1.3990706205368042, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3913195133209229, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.3913195133209229, "logits_per_char": -0.6956597566604614, "num_chars": 2}, {"sum_logits": -1.3412400484085083, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": true, "logits_per_token": -1.3412400484085083, "logits_per_char": -0.6706200242042542, "num_chars": 2}, {"sum_logits": -1.4646522998809814, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.4646522998809814, "logits_per_char": -0.7323261499404907, "num_chars": 2}, {"sum_logits": -1.3705565929412842, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.3705565929412842, "logits_per_char": -0.6852782964706421, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 870, "native_id": "Mercury_401728", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3812578916549683, "incorrect_loss_raw": 1.3950998783111572, "correct_loss_per_char": 0.6906289458274841, "incorrect_loss_per_char": 0.6975499391555786, "correct_loss_per_token": 1.3812578916549683, "incorrect_loss_per_token": 1.3950998783111572, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4322113990783691, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4322113990783691, "logits_per_char": -0.7161056995391846, "num_chars": 2}, {"sum_logits": -1.3241801261901855, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.3241801261901855, "logits_per_char": -0.6620900630950928, "num_chars": 2}, {"sum_logits": -1.428908109664917, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.428908109664917, "logits_per_char": -0.7144540548324585, "num_chars": 2}, {"sum_logits": -1.3812578916549683, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3812578916549683, "logits_per_char": -0.6906289458274841, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 871, "native_id": "Mercury_7192798", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4356645345687866, "incorrect_loss_raw": 1.3884629408518474, "correct_loss_per_char": 0.7178322672843933, "incorrect_loss_per_char": 0.6942314704259237, "correct_loss_per_token": 1.4356645345687866, "incorrect_loss_per_token": 1.3884629408518474, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5786447525024414, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.5786447525024414, "logits_per_char": -0.7893223762512207, "num_chars": 2}, {"sum_logits": -1.3959566354751587, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3959566354751587, "logits_per_char": -0.6979783177375793, "num_chars": 2}, {"sum_logits": -1.4356645345687866, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4356645345687866, "logits_per_char": -0.7178322672843933, "num_chars": 2}, {"sum_logits": -1.190787434577942, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.190787434577942, "logits_per_char": -0.595393717288971, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 872, "native_id": "Mercury_7221078", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3833403587341309, "incorrect_loss_raw": 1.4057297309239705, "correct_loss_per_char": 0.6916701793670654, "incorrect_loss_per_char": 0.7028648654619852, "correct_loss_per_token": 1.3833403587341309, "incorrect_loss_per_token": 1.4057297309239705, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6269402503967285, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.6269402503967285, "logits_per_char": -0.8134701251983643, "num_chars": 2}, {"sum_logits": -1.3727105855941772, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3727105855941772, "logits_per_char": -0.6863552927970886, "num_chars": 2}, {"sum_logits": -1.3833403587341309, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3833403587341309, "logits_per_char": -0.6916701793670654, "num_chars": 2}, {"sum_logits": -1.2175383567810059, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.2175383567810059, "logits_per_char": -0.6087691783905029, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 873, "native_id": "Mercury_7004953", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2988027334213257, "incorrect_loss_raw": 1.425455649693807, "correct_loss_per_char": 0.6494013667106628, "incorrect_loss_per_char": 0.7127278248469034, "correct_loss_per_token": 1.2988027334213257, "incorrect_loss_per_token": 1.425455649693807, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4278970956802368, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4278970956802368, "logits_per_char": -0.7139485478401184, "num_chars": 2}, {"sum_logits": -1.2988027334213257, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.2988027334213257, "logits_per_char": -0.6494013667106628, "num_chars": 2}, {"sum_logits": -1.5248619318008423, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.5248619318008423, "logits_per_char": -0.7624309659004211, "num_chars": 2}, {"sum_logits": -1.3236079216003418, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3236079216003418, "logits_per_char": -0.6618039608001709, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 874, "native_id": "TIMSS_2003_8_pg94", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6206697225570679, "incorrect_loss_raw": 1.3277319272359211, "correct_loss_per_char": 0.8103348612785339, "incorrect_loss_per_char": 0.6638659636179606, "correct_loss_per_token": 1.6206697225570679, "incorrect_loss_per_token": 1.3277319272359211, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.398884654045105, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.398884654045105, "logits_per_char": -0.6994423270225525, "num_chars": 2}, {"sum_logits": -1.347540020942688, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.347540020942688, "logits_per_char": -0.673770010471344, "num_chars": 2}, {"sum_logits": -1.2367711067199707, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.2367711067199707, "logits_per_char": -0.6183855533599854, "num_chars": 2}, {"sum_logits": -1.6206697225570679, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.6206697225570679, "logits_per_char": -0.8103348612785339, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 875, "native_id": "Mercury_7095060", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4399741888046265, "incorrect_loss_raw": 1.3785872062047322, "correct_loss_per_char": 0.7199870944023132, "incorrect_loss_per_char": 0.6892936031023661, "correct_loss_per_token": 1.4399741888046265, "incorrect_loss_per_token": 1.3785872062047322, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4399741888046265, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4399741888046265, "logits_per_char": -0.7199870944023132, "num_chars": 2}, {"sum_logits": -1.376124382019043, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.376124382019043, "logits_per_char": -0.6880621910095215, "num_chars": 2}, {"sum_logits": -1.5092824697494507, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.5092824697494507, "logits_per_char": -0.7546412348747253, "num_chars": 2}, {"sum_logits": -1.2503547668457031, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.2503547668457031, "logits_per_char": -0.6251773834228516, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 876, "native_id": "Mercury_7123358", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2366071939468384, "incorrect_loss_raw": 1.4484415849049885, "correct_loss_per_char": 0.6183035969734192, "incorrect_loss_per_char": 0.7242207924524943, "correct_loss_per_token": 1.2366071939468384, "incorrect_loss_per_token": 1.4484415849049885, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2366071939468384, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2366071939468384, "logits_per_char": -0.6183035969734192, "num_chars": 2}, {"sum_logits": -1.3692402839660645, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3692402839660645, "logits_per_char": -0.6846201419830322, "num_chars": 2}, {"sum_logits": -1.5001760721206665, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5001760721206665, "logits_per_char": -0.7500880360603333, "num_chars": 2}, {"sum_logits": -1.4759083986282349, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4759083986282349, "logits_per_char": -0.7379541993141174, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 877, "native_id": "Mercury_7069020", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3824070692062378, "incorrect_loss_raw": 1.393584132194519, "correct_loss_per_char": 0.6912035346031189, "incorrect_loss_per_char": 0.6967920660972595, "correct_loss_per_token": 1.3824070692062378, "incorrect_loss_per_token": 1.393584132194519, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3824070692062378, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3824070692062378, "logits_per_char": -0.6912035346031189, "num_chars": 2}, {"sum_logits": -1.3766560554504395, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3766560554504395, "logits_per_char": -0.6883280277252197, "num_chars": 2}, {"sum_logits": -1.4503400325775146, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4503400325775146, "logits_per_char": -0.7251700162887573, "num_chars": 2}, {"sum_logits": -1.353756308555603, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.353756308555603, "logits_per_char": -0.6768781542778015, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 878, "native_id": "TIMSS_2003_8_pg117", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5097302198410034, "incorrect_loss_raw": 1.3592281341552734, "correct_loss_per_char": 0.7548651099205017, "incorrect_loss_per_char": 0.6796140670776367, "correct_loss_per_token": 1.5097302198410034, "incorrect_loss_per_token": 1.3592281341552734, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3890587091445923, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3890587091445923, "logits_per_char": -0.6945293545722961, "num_chars": 2}, {"sum_logits": -1.5097302198410034, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5097302198410034, "logits_per_char": -0.7548651099205017, "num_chars": 2}, {"sum_logits": -1.209805965423584, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.209805965423584, "logits_per_char": -0.604902982711792, "num_chars": 2}, {"sum_logits": -1.478819727897644, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.478819727897644, "logits_per_char": -0.739409863948822, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 879, "native_id": "VASoL_2008_3_32", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5121592283248901, "incorrect_loss_raw": 1.3563943306605022, "correct_loss_per_char": 0.7560796141624451, "incorrect_loss_per_char": 0.6781971653302511, "correct_loss_per_token": 1.5121592283248901, "incorrect_loss_per_token": 1.3563943306605022, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3718317747116089, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3718317747116089, "logits_per_char": -0.6859158873558044, "num_chars": 2}, {"sum_logits": -1.5121592283248901, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.5121592283248901, "logits_per_char": -0.7560796141624451, "num_chars": 2}, {"sum_logits": -1.4211792945861816, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4211792945861816, "logits_per_char": -0.7105896472930908, "num_chars": 2}, {"sum_logits": -1.2761719226837158, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2761719226837158, "logits_per_char": -0.6380859613418579, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 880, "native_id": "Mercury_SC_400142", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3100571632385254, "incorrect_loss_raw": 1.4198458194732666, "correct_loss_per_char": 0.6550285816192627, "incorrect_loss_per_char": 0.7099229097366333, "correct_loss_per_token": 1.3100571632385254, "incorrect_loss_per_token": 1.4198458194732666, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3100571632385254, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.3100571632385254, "logits_per_char": -0.6550285816192627, "num_chars": 2}, {"sum_logits": -1.3904218673706055, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3904218673706055, "logits_per_char": -0.6952109336853027, "num_chars": 2}, {"sum_logits": -1.4893196821212769, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4893196821212769, "logits_per_char": -0.7446598410606384, "num_chars": 2}, {"sum_logits": -1.3797959089279175, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3797959089279175, "logits_per_char": -0.6898979544639587, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 881, "native_id": "Mercury_7163818", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3043782711029053, "incorrect_loss_raw": 1.4203721284866333, "correct_loss_per_char": 0.6521891355514526, "incorrect_loss_per_char": 0.7101860642433167, "correct_loss_per_token": 1.3043782711029053, "incorrect_loss_per_token": 1.4203721284866333, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4243470430374146, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4243470430374146, "logits_per_char": -0.7121735215187073, "num_chars": 2}, {"sum_logits": -1.3985016345977783, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3985016345977783, "logits_per_char": -0.6992508172988892, "num_chars": 2}, {"sum_logits": -1.438267707824707, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.438267707824707, "logits_per_char": -0.7191338539123535, "num_chars": 2}, {"sum_logits": -1.3043782711029053, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.3043782711029053, "logits_per_char": -0.6521891355514526, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 882, "native_id": "Mercury_402502", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2702049016952515, "incorrect_loss_raw": 1.441535194714864, "correct_loss_per_char": 0.6351024508476257, "incorrect_loss_per_char": 0.720767597357432, "correct_loss_per_token": 1.2702049016952515, "incorrect_loss_per_token": 1.441535194714864, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4253787994384766, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4253787994384766, "logits_per_char": -0.7126893997192383, "num_chars": 2}, {"sum_logits": -1.5047272443771362, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.5047272443771362, "logits_per_char": -0.7523636221885681, "num_chars": 2}, {"sum_logits": -1.3944995403289795, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3944995403289795, "logits_per_char": -0.6972497701644897, "num_chars": 2}, {"sum_logits": -1.2702049016952515, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.2702049016952515, "logits_per_char": -0.6351024508476257, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 883, "native_id": "Mercury_7130778", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7669247388839722, "incorrect_loss_raw": 1.4022723038991292, "correct_loss_per_char": 0.8834623694419861, "incorrect_loss_per_char": 0.7011361519495646, "correct_loss_per_token": 1.7669247388839722, "incorrect_loss_per_token": 1.4022723038991292, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0942025184631348, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.0942025184631348, "logits_per_char": -0.5471012592315674, "num_chars": 2}, {"sum_logits": -1.108483076095581, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.108483076095581, "logits_per_char": -0.5542415380477905, "num_chars": 2}, {"sum_logits": -1.7669247388839722, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.7669247388839722, "logits_per_char": -0.8834623694419861, "num_chars": 2}, {"sum_logits": -2.004131317138672, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -2.004131317138672, "logits_per_char": -1.002065658569336, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 884, "native_id": "MEA_2010_8_18", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3790147304534912, "incorrect_loss_raw": 1.3964050610860188, "correct_loss_per_char": 0.6895073652267456, "incorrect_loss_per_char": 0.6982025305430094, "correct_loss_per_token": 1.3790147304534912, "incorrect_loss_per_token": 1.3964050610860188, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.429213523864746, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.429213523864746, "logits_per_char": -0.714606761932373, "num_chars": 2}, {"sum_logits": -1.407631516456604, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.407631516456604, "logits_per_char": -0.703815758228302, "num_chars": 2}, {"sum_logits": -1.3790147304534912, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3790147304534912, "logits_per_char": -0.6895073652267456, "num_chars": 2}, {"sum_logits": -1.3523701429367065, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.3523701429367065, "logits_per_char": -0.6761850714683533, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 885, "native_id": "Mercury_7211033", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.320326328277588, "incorrect_loss_raw": 1.4154708782831829, "correct_loss_per_char": 0.660163164138794, "incorrect_loss_per_char": 0.7077354391415914, "correct_loss_per_token": 1.320326328277588, "incorrect_loss_per_token": 1.4154708782831829, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4646936655044556, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4646936655044556, "logits_per_char": -0.7323468327522278, "num_chars": 2}, {"sum_logits": -1.320326328277588, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.320326328277588, "logits_per_char": -0.660163164138794, "num_chars": 2}, {"sum_logits": -1.3846921920776367, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3846921920776367, "logits_per_char": -0.6923460960388184, "num_chars": 2}, {"sum_logits": -1.397026777267456, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.397026777267456, "logits_per_char": -0.698513388633728, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 886, "native_id": "NYSEDREGENTS_2008_8_17", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3432985544204712, "incorrect_loss_raw": 1.4074195226033528, "correct_loss_per_char": 0.6716492772102356, "incorrect_loss_per_char": 0.7037097613016764, "correct_loss_per_token": 1.3432985544204712, "incorrect_loss_per_token": 1.4074195226033528, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3432985544204712, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.3432985544204712, "logits_per_char": -0.6716492772102356, "num_chars": 2}, {"sum_logits": -1.4399663209915161, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4399663209915161, "logits_per_char": -0.7199831604957581, "num_chars": 2}, {"sum_logits": -1.4296687841415405, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4296687841415405, "logits_per_char": -0.7148343920707703, "num_chars": 2}, {"sum_logits": -1.352623462677002, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.352623462677002, "logits_per_char": -0.676311731338501, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 887, "native_id": "NAEP_2005_8_S11+1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.354575753211975, "incorrect_loss_raw": 1.4032071034113567, "correct_loss_per_char": 0.6772878766059875, "incorrect_loss_per_char": 0.7016035517056783, "correct_loss_per_token": 1.354575753211975, "incorrect_loss_per_token": 1.4032071034113567, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3537046909332275, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.3537046909332275, "logits_per_char": -0.6768523454666138, "num_chars": 2}, {"sum_logits": -1.4574483633041382, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4574483633041382, "logits_per_char": -0.7287241816520691, "num_chars": 2}, {"sum_logits": -1.354575753211975, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.354575753211975, "logits_per_char": -0.6772878766059875, "num_chars": 2}, {"sum_logits": -1.398468255996704, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.398468255996704, "logits_per_char": -0.699234127998352, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 888, "native_id": "Mercury_412774", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4082450866699219, "incorrect_loss_raw": 1.387061635653178, "correct_loss_per_char": 0.7041225433349609, "incorrect_loss_per_char": 0.693530817826589, "correct_loss_per_token": 1.4082450866699219, "incorrect_loss_per_token": 1.387061635653178, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4962114095687866, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.4962114095687866, "logits_per_char": -0.7481057047843933, "num_chars": 2}, {"sum_logits": -1.3693299293518066, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.3693299293518066, "logits_per_char": -0.6846649646759033, "num_chars": 2}, {"sum_logits": -1.4082450866699219, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.4082450866699219, "logits_per_char": -0.7041225433349609, "num_chars": 2}, {"sum_logits": -1.2956435680389404, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": true, "logits_per_token": -1.2956435680389404, "logits_per_char": -0.6478217840194702, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 889, "native_id": "MEA_2013_5_12", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4554178714752197, "incorrect_loss_raw": 1.3691781361897786, "correct_loss_per_char": 0.7277089357376099, "incorrect_loss_per_char": 0.6845890680948893, "correct_loss_per_token": 1.4554178714752197, "incorrect_loss_per_token": 1.3691781361897786, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.363173484802246, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.363173484802246, "logits_per_char": -0.681586742401123, "num_chars": 2}, {"sum_logits": -1.3449370861053467, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": true, "logits_per_token": -1.3449370861053467, "logits_per_char": -0.6724685430526733, "num_chars": 2}, {"sum_logits": -1.4554178714752197, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.4554178714752197, "logits_per_char": -0.7277089357376099, "num_chars": 2}, {"sum_logits": -1.3994238376617432, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.3994238376617432, "logits_per_char": -0.6997119188308716, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 890, "native_id": "Mercury_7098473", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.424005150794983, "incorrect_loss_raw": 1.379625717798869, "correct_loss_per_char": 0.7120025753974915, "incorrect_loss_per_char": 0.6898128588994344, "correct_loss_per_token": 1.424005150794983, "incorrect_loss_per_token": 1.379625717798869, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3962359428405762, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3962359428405762, "logits_per_char": -0.6981179714202881, "num_chars": 2}, {"sum_logits": -1.424005150794983, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.424005150794983, "logits_per_char": -0.7120025753974915, "num_chars": 2}, {"sum_logits": -1.387491226196289, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.387491226196289, "logits_per_char": -0.6937456130981445, "num_chars": 2}, {"sum_logits": -1.3551499843597412, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.3551499843597412, "logits_per_char": -0.6775749921798706, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 891, "native_id": "Mercury_417593", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4753795862197876, "incorrect_loss_raw": 1.3638012806574504, "correct_loss_per_char": 0.7376897931098938, "incorrect_loss_per_char": 0.6819006403287252, "correct_loss_per_token": 1.4753795862197876, "incorrect_loss_per_token": 1.3638012806574504, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4753795862197876, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.4753795862197876, "logits_per_char": -0.7376897931098938, "num_chars": 2}, {"sum_logits": -1.346354603767395, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.346354603767395, "logits_per_char": -0.6731773018836975, "num_chars": 2}, {"sum_logits": -1.4326010942459106, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.4326010942459106, "logits_per_char": -0.7163005471229553, "num_chars": 2}, {"sum_logits": -1.3124481439590454, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": true, "logits_per_token": -1.3124481439590454, "logits_per_char": -0.6562240719795227, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 892, "native_id": "Mercury_7081743", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3275620937347412, "incorrect_loss_raw": 1.4134206374486287, "correct_loss_per_char": 0.6637810468673706, "incorrect_loss_per_char": 0.7067103187243143, "correct_loss_per_token": 1.3275620937347412, "incorrect_loss_per_token": 1.4134206374486287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3741354942321777, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3741354942321777, "logits_per_char": -0.6870677471160889, "num_chars": 2}, {"sum_logits": -1.3275620937347412, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.3275620937347412, "logits_per_char": -0.6637810468673706, "num_chars": 2}, {"sum_logits": -1.4886056184768677, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.4886056184768677, "logits_per_char": -0.7443028092384338, "num_chars": 2}, {"sum_logits": -1.3775207996368408, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3775207996368408, "logits_per_char": -0.6887603998184204, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 893, "native_id": "Mercury_7018410", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.317273497581482, "incorrect_loss_raw": 1.4186482429504395, "correct_loss_per_char": 0.658636748790741, "incorrect_loss_per_char": 0.7093241214752197, "correct_loss_per_token": 1.317273497581482, "incorrect_loss_per_token": 1.4186482429504395, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.365954041481018, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.365954041481018, "logits_per_char": -0.682977020740509, "num_chars": 2}, {"sum_logits": -1.5071263313293457, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.5071263313293457, "logits_per_char": -0.7535631656646729, "num_chars": 2}, {"sum_logits": -1.317273497581482, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.317273497581482, "logits_per_char": -0.658636748790741, "num_chars": 2}, {"sum_logits": -1.3828643560409546, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3828643560409546, "logits_per_char": -0.6914321780204773, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 894, "native_id": "Mercury_402563", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5230326652526855, "incorrect_loss_raw": 1.3530374765396118, "correct_loss_per_char": 0.7615163326263428, "incorrect_loss_per_char": 0.6765187382698059, "correct_loss_per_token": 1.5230326652526855, "incorrect_loss_per_token": 1.3530374765396118, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3482680320739746, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3482680320739746, "logits_per_char": -0.6741340160369873, "num_chars": 2}, {"sum_logits": -1.5230326652526855, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.5230326652526855, "logits_per_char": -0.7615163326263428, "num_chars": 2}, {"sum_logits": -1.2812858819961548, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2812858819961548, "logits_per_char": -0.6406429409980774, "num_chars": 2}, {"sum_logits": -1.429558515548706, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.429558515548706, "logits_per_char": -0.714779257774353, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 895, "native_id": "Mercury_416407", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4516273736953735, "incorrect_loss_raw": 1.3754003047943115, "correct_loss_per_char": 0.7258136868476868, "incorrect_loss_per_char": 0.6877001523971558, "correct_loss_per_token": 1.4516273736953735, "incorrect_loss_per_token": 1.3754003047943115, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3984602689743042, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3984602689743042, "logits_per_char": -0.6992301344871521, "num_chars": 2}, {"sum_logits": -1.2713322639465332, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2713322639465332, "logits_per_char": -0.6356661319732666, "num_chars": 2}, {"sum_logits": -1.4516273736953735, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4516273736953735, "logits_per_char": -0.7258136868476868, "num_chars": 2}, {"sum_logits": -1.4564083814620972, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4564083814620972, "logits_per_char": -0.7282041907310486, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 896, "native_id": "Mercury_SC_400400", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4693268537521362, "incorrect_loss_raw": 1.3683663209279378, "correct_loss_per_char": 0.7346634268760681, "incorrect_loss_per_char": 0.6841831604639689, "correct_loss_per_token": 1.4693268537521362, "incorrect_loss_per_token": 1.3683663209279378, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4693268537521362, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4693268537521362, "logits_per_char": -0.7346634268760681, "num_chars": 2}, {"sum_logits": -1.3478305339813232, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3478305339813232, "logits_per_char": -0.6739152669906616, "num_chars": 2}, {"sum_logits": -1.4510775804519653, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4510775804519653, "logits_per_char": -0.7255387902259827, "num_chars": 2}, {"sum_logits": -1.306190848350525, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.306190848350525, "logits_per_char": -0.6530954241752625, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 897, "native_id": "MCAS_2000_8_22", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4609757661819458, "incorrect_loss_raw": 1.369160493214925, "correct_loss_per_char": 0.7304878830909729, "incorrect_loss_per_char": 0.6845802466074625, "correct_loss_per_token": 1.4609757661819458, "incorrect_loss_per_token": 1.369160493214925, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3882569074630737, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.3882569074630737, "logits_per_char": -0.6941284537315369, "num_chars": 2}, {"sum_logits": -1.3371217250823975, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -1.3371217250823975, "logits_per_char": -0.6685608625411987, "num_chars": 2}, {"sum_logits": -1.3821028470993042, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.3821028470993042, "logits_per_char": -0.6910514235496521, "num_chars": 2}, {"sum_logits": -1.4609757661819458, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.4609757661819458, "logits_per_char": -0.7304878830909729, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 898, "native_id": "MCAS_8_2014_8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4659554958343506, "incorrect_loss_raw": 1.373645822207133, "correct_loss_per_char": 0.7329777479171753, "incorrect_loss_per_char": 0.6868229111035665, "correct_loss_per_token": 1.4659554958343506, "incorrect_loss_per_token": 1.373645822207133, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4659554958343506, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4659554958343506, "logits_per_char": -0.7329777479171753, "num_chars": 2}, {"sum_logits": -1.457865834236145, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.457865834236145, "logits_per_char": -0.7289329171180725, "num_chars": 2}, {"sum_logits": -1.4514806270599365, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4514806270599365, "logits_per_char": -0.7257403135299683, "num_chars": 2}, {"sum_logits": -1.2115910053253174, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.2115910053253174, "logits_per_char": -0.6057955026626587, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 899, "native_id": "Mercury_7206430", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6050710678100586, "incorrect_loss_raw": 1.3326958815256755, "correct_loss_per_char": 0.8025355339050293, "incorrect_loss_per_char": 0.6663479407628378, "correct_loss_per_token": 1.6050710678100586, "incorrect_loss_per_token": 1.3326958815256755, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6050710678100586, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.6050710678100586, "logits_per_char": -0.8025355339050293, "num_chars": 2}, {"sum_logits": -1.3696589469909668, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.3696589469909668, "logits_per_char": -0.6848294734954834, "num_chars": 2}, {"sum_logits": -1.4291316270828247, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.4291316270828247, "logits_per_char": -0.7145658135414124, "num_chars": 2}, {"sum_logits": -1.1992970705032349, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.1992970705032349, "logits_per_char": -0.5996485352516174, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 900, "native_id": "Mercury_7185343", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1332589387893677, "incorrect_loss_raw": 1.4953954617182414, "correct_loss_per_char": 0.5666294693946838, "incorrect_loss_per_char": 0.7476977308591207, "correct_loss_per_token": 1.1332589387893677, "incorrect_loss_per_token": 1.4953954617182414, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5406227111816406, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.5406227111816406, "logits_per_char": -0.7703113555908203, "num_chars": 2}, {"sum_logits": -1.3985371589660645, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.3985371589660645, "logits_per_char": -0.6992685794830322, "num_chars": 2}, {"sum_logits": -1.547026515007019, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.547026515007019, "logits_per_char": -0.7735132575035095, "num_chars": 2}, {"sum_logits": -1.1332589387893677, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": true, "logits_per_token": -1.1332589387893677, "logits_per_char": -0.5666294693946838, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 901, "native_id": "OHAT_2010_8_8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4029957056045532, "incorrect_loss_raw": 1.3907510042190552, "correct_loss_per_char": 0.7014978528022766, "incorrect_loss_per_char": 0.6953755021095276, "correct_loss_per_token": 1.4029957056045532, "incorrect_loss_per_token": 1.3907510042190552, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4737250804901123, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4737250804901123, "logits_per_char": -0.7368625402450562, "num_chars": 2}, {"sum_logits": -1.4029957056045532, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4029957056045532, "logits_per_char": -0.7014978528022766, "num_chars": 2}, {"sum_logits": -1.4449135065078735, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4449135065078735, "logits_per_char": -0.7224567532539368, "num_chars": 2}, {"sum_logits": -1.2536144256591797, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2536144256591797, "logits_per_char": -0.6268072128295898, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 902, "native_id": "Mercury_405462", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3856531381607056, "incorrect_loss_raw": 1.3924534718195598, "correct_loss_per_char": 0.6928265690803528, "incorrect_loss_per_char": 0.6962267359097799, "correct_loss_per_token": 1.3856531381607056, "incorrect_loss_per_token": 1.3924534718195598, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4144861698150635, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.4144861698150635, "logits_per_char": -0.7072430849075317, "num_chars": 2}, {"sum_logits": -1.3856531381607056, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.3856531381607056, "logits_per_char": -0.6928265690803528, "num_chars": 2}, {"sum_logits": -1.4083892107009888, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.4083892107009888, "logits_per_char": -0.7041946053504944, "num_chars": 2}, {"sum_logits": -1.354485034942627, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.354485034942627, "logits_per_char": -0.6772425174713135, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 903, "native_id": "Mercury_SC_LBS10337", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3093798160552979, "incorrect_loss_raw": 1.4190928141276042, "correct_loss_per_char": 0.6546899080276489, "incorrect_loss_per_char": 0.7095464070638021, "correct_loss_per_token": 1.3093798160552979, "incorrect_loss_per_token": 1.4190928141276042, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3093798160552979, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.3093798160552979, "logits_per_char": -0.6546899080276489, "num_chars": 2}, {"sum_logits": -1.430220127105713, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.430220127105713, "logits_per_char": -0.7151100635528564, "num_chars": 2}, {"sum_logits": -1.4005519151687622, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4005519151687622, "logits_per_char": -0.7002759575843811, "num_chars": 2}, {"sum_logits": -1.4265064001083374, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4265064001083374, "logits_per_char": -0.7132532000541687, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 904, "native_id": "Mercury_7142520", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4577646255493164, "incorrect_loss_raw": 1.3698412577311199, "correct_loss_per_char": 0.7288823127746582, "incorrect_loss_per_char": 0.6849206288655599, "correct_loss_per_token": 1.4577646255493164, "incorrect_loss_per_token": 1.3698412577311199, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.341288447380066, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.341288447380066, "logits_per_char": -0.670644223690033, "num_chars": 2}, {"sum_logits": -1.3984239101409912, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3984239101409912, "logits_per_char": -0.6992119550704956, "num_chars": 2}, {"sum_logits": -1.4577646255493164, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4577646255493164, "logits_per_char": -0.7288823127746582, "num_chars": 2}, {"sum_logits": -1.3698114156723022, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3698114156723022, "logits_per_char": -0.6849057078361511, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 905, "native_id": "Mercury_SC_405501", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0815765857696533, "incorrect_loss_raw": 1.5204633871714275, "correct_loss_per_char": 0.5407882928848267, "incorrect_loss_per_char": 0.7602316935857137, "correct_loss_per_token": 1.0815765857696533, "incorrect_loss_per_token": 1.5204633871714275, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6029303073883057, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.6029303073883057, "logits_per_char": -0.8014651536941528, "num_chars": 2}, {"sum_logits": -1.4633543491363525, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.4633543491363525, "logits_per_char": -0.7316771745681763, "num_chars": 2}, {"sum_logits": -1.495105504989624, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.495105504989624, "logits_per_char": -0.747552752494812, "num_chars": 2}, {"sum_logits": -1.0815765857696533, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.0815765857696533, "logits_per_char": -0.5407882928848267, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 906, "native_id": "Mercury_7009555", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4106059074401855, "incorrect_loss_raw": 1.383439540863037, "correct_loss_per_char": 0.7053029537200928, "incorrect_loss_per_char": 0.6917197704315186, "correct_loss_per_token": 1.4106059074401855, "incorrect_loss_per_token": 1.383439540863037, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3989697694778442, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3989697694778442, "logits_per_char": -0.6994848847389221, "num_chars": 2}, {"sum_logits": -1.4050662517547607, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4050662517547607, "logits_per_char": -0.7025331258773804, "num_chars": 2}, {"sum_logits": -1.4106059074401855, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4106059074401855, "logits_per_char": -0.7053029537200928, "num_chars": 2}, {"sum_logits": -1.3462826013565063, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.3462826013565063, "logits_per_char": -0.6731413006782532, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 907, "native_id": "Mercury_409085", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.648350477218628, "incorrect_loss_raw": 1.3194908301035564, "correct_loss_per_char": 0.824175238609314, "incorrect_loss_per_char": 0.6597454150517782, "correct_loss_per_token": 1.648350477218628, "incorrect_loss_per_token": 1.3194908301035564, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.648350477218628, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.648350477218628, "logits_per_char": -0.824175238609314, "num_chars": 2}, {"sum_logits": -1.402616024017334, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.402616024017334, "logits_per_char": -0.701308012008667, "num_chars": 2}, {"sum_logits": -1.328796625137329, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.328796625137329, "logits_per_char": -0.6643983125686646, "num_chars": 2}, {"sum_logits": -1.2270598411560059, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.2270598411560059, "logits_per_char": -0.6135299205780029, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 908, "native_id": "NYSEDREGENTS_2012_4_2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3599488735198975, "incorrect_loss_raw": 1.4122915267944336, "correct_loss_per_char": 0.6799744367599487, "incorrect_loss_per_char": 0.7061457633972168, "correct_loss_per_token": 1.3599488735198975, "incorrect_loss_per_token": 1.4122915267944336, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3599488735198975, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3599488735198975, "logits_per_char": -0.6799744367599487, "num_chars": 2}, {"sum_logits": -1.6039286851882935, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.6039286851882935, "logits_per_char": -0.8019643425941467, "num_chars": 2}, {"sum_logits": -1.415000081062317, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.415000081062317, "logits_per_char": -0.7075000405311584, "num_chars": 2}, {"sum_logits": -1.2179458141326904, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.2179458141326904, "logits_per_char": -0.6089729070663452, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 909, "native_id": "Mercury_407539", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3485145568847656, "incorrect_loss_raw": 1.4057797988255818, "correct_loss_per_char": 0.6742572784423828, "incorrect_loss_per_char": 0.7028898994127909, "correct_loss_per_token": 1.3485145568847656, "incorrect_loss_per_token": 1.4057797988255818, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.491335391998291, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.491335391998291, "logits_per_char": -0.7456676959991455, "num_chars": 2}, {"sum_logits": -1.3413726091384888, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.3413726091384888, "logits_per_char": -0.6706863045692444, "num_chars": 2}, {"sum_logits": -1.3846313953399658, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.3846313953399658, "logits_per_char": -0.6923156976699829, "num_chars": 2}, {"sum_logits": -1.3485145568847656, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.3485145568847656, "logits_per_char": -0.6742572784423828, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 910, "native_id": "ACTAAP_2013_7_16", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3867028951644897, "incorrect_loss_raw": 1.3932685454686482, "correct_loss_per_char": 0.6933514475822449, "incorrect_loss_per_char": 0.6966342727343241, "correct_loss_per_token": 1.3867028951644897, "incorrect_loss_per_token": 1.3932685454686482, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4702738523483276, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.4702738523483276, "logits_per_char": -0.7351369261741638, "num_chars": 2}, {"sum_logits": -1.3760182857513428, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.3760182857513428, "logits_per_char": -0.6880091428756714, "num_chars": 2}, {"sum_logits": -1.3335134983062744, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": true, "logits_per_token": -1.3335134983062744, "logits_per_char": -0.6667567491531372, "num_chars": 2}, {"sum_logits": -1.3867028951644897, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.3867028951644897, "logits_per_char": -0.6933514475822449, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 911, "native_id": "AKDE&ED_2008_8_34", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.403304100036621, "incorrect_loss_raw": 1.510545015335083, "correct_loss_per_char": 1.2016520500183105, "incorrect_loss_per_char": 0.7552725076675415, "correct_loss_per_token": 2.403304100036621, "incorrect_loss_per_token": 1.510545015335083, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.009028434753418, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.009028434753418, "logits_per_char": -0.504514217376709, "num_chars": 2}, {"sum_logits": -1.5030803680419922, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5030803680419922, "logits_per_char": -0.7515401840209961, "num_chars": 2}, {"sum_logits": -2.019526243209839, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -2.019526243209839, "logits_per_char": -1.0097631216049194, "num_chars": 2}, {"sum_logits": -2.403304100036621, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -2.403304100036621, "logits_per_char": -1.2016520500183105, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 912, "native_id": "MCAS_2004_8_3", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4860843420028687, "incorrect_loss_raw": 1.3682002623875935, "correct_loss_per_char": 0.7430421710014343, "incorrect_loss_per_char": 0.6841001311937968, "correct_loss_per_token": 1.4860843420028687, "incorrect_loss_per_token": 1.3682002623875935, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4860843420028687, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4860843420028687, "logits_per_char": -0.7430421710014343, "num_chars": 2}, {"sum_logits": -1.465208649635315, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.465208649635315, "logits_per_char": -0.7326043248176575, "num_chars": 2}, {"sum_logits": -1.442084550857544, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.442084550857544, "logits_per_char": -0.721042275428772, "num_chars": 2}, {"sum_logits": -1.1973075866699219, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.1973075866699219, "logits_per_char": -0.5986537933349609, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 913, "native_id": "Mercury_415272", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3086169958114624, "incorrect_loss_raw": 1.4240411122639973, "correct_loss_per_char": 0.6543084979057312, "incorrect_loss_per_char": 0.7120205561319987, "correct_loss_per_token": 1.3086169958114624, "incorrect_loss_per_token": 1.4240411122639973, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.326319694519043, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.326319694519043, "logits_per_char": -0.6631598472595215, "num_chars": 2}, {"sum_logits": -1.3086169958114624, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.3086169958114624, "logits_per_char": -0.6543084979057312, "num_chars": 2}, {"sum_logits": -1.45634126663208, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.45634126663208, "logits_per_char": -0.72817063331604, "num_chars": 2}, {"sum_logits": -1.4894623756408691, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4894623756408691, "logits_per_char": -0.7447311878204346, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 914, "native_id": "Mercury_405387", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.402511477470398, "incorrect_loss_raw": 1.3899091879526775, "correct_loss_per_char": 0.701255738735199, "incorrect_loss_per_char": 0.6949545939763387, "correct_loss_per_token": 1.402511477470398, "incorrect_loss_per_token": 1.3899091879526775, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.285409927368164, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.285409927368164, "logits_per_char": -0.642704963684082, "num_chars": 2}, {"sum_logits": -1.402511477470398, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.402511477470398, "logits_per_char": -0.701255738735199, "num_chars": 2}, {"sum_logits": -1.4819014072418213, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4819014072418213, "logits_per_char": -0.7409507036209106, "num_chars": 2}, {"sum_logits": -1.4024162292480469, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4024162292480469, "logits_per_char": -0.7012081146240234, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 915, "native_id": "Mercury_7116323", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3561902046203613, "incorrect_loss_raw": 1.4020081361134846, "correct_loss_per_char": 0.6780951023101807, "incorrect_loss_per_char": 0.7010040680567423, "correct_loss_per_token": 1.3561902046203613, "incorrect_loss_per_token": 1.4020081361134846, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4331034421920776, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4331034421920776, "logits_per_char": -0.7165517210960388, "num_chars": 2}, {"sum_logits": -1.382725715637207, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.382725715637207, "logits_per_char": -0.6913628578186035, "num_chars": 2}, {"sum_logits": -1.3901952505111694, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3901952505111694, "logits_per_char": -0.6950976252555847, "num_chars": 2}, {"sum_logits": -1.3561902046203613, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.3561902046203613, "logits_per_char": -0.6780951023101807, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 916, "native_id": "Mercury_7213430", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3862618207931519, "incorrect_loss_raw": 1.3920446634292603, "correct_loss_per_char": 0.6931309103965759, "incorrect_loss_per_char": 0.6960223317146301, "correct_loss_per_token": 1.3862618207931519, "incorrect_loss_per_token": 1.3920446634292603, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.389061450958252, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.389061450958252, "logits_per_char": -0.694530725479126, "num_chars": 2}, {"sum_logits": -1.3862618207931519, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3862618207931519, "logits_per_char": -0.6931309103965759, "num_chars": 2}, {"sum_logits": -1.4477789402008057, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4477789402008057, "logits_per_char": -0.7238894701004028, "num_chars": 2}, {"sum_logits": -1.3392935991287231, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.3392935991287231, "logits_per_char": -0.6696467995643616, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 917, "native_id": "Mercury_7234360", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3325257301330566, "incorrect_loss_raw": 1.4121299187342327, "correct_loss_per_char": 0.6662628650665283, "incorrect_loss_per_char": 0.7060649593671163, "correct_loss_per_token": 1.3325257301330566, "incorrect_loss_per_token": 1.4121299187342327, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3325257301330566, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.3325257301330566, "logits_per_char": -0.6662628650665283, "num_chars": 2}, {"sum_logits": -1.4131814241409302, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4131814241409302, "logits_per_char": -0.7065907120704651, "num_chars": 2}, {"sum_logits": -1.3874199390411377, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3874199390411377, "logits_per_char": -0.6937099695205688, "num_chars": 2}, {"sum_logits": -1.4357883930206299, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4357883930206299, "logits_per_char": -0.7178941965103149, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 918, "native_id": "Mercury_405685", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.387253999710083, "incorrect_loss_raw": 1.3930650154749553, "correct_loss_per_char": 0.6936269998550415, "incorrect_loss_per_char": 0.6965325077374777, "correct_loss_per_token": 1.387253999710083, "incorrect_loss_per_token": 1.3930650154749553, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4212738275527954, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4212738275527954, "logits_per_char": -0.7106369137763977, "num_chars": 2}, {"sum_logits": -1.3726414442062378, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.3726414442062378, "logits_per_char": -0.6863207221031189, "num_chars": 2}, {"sum_logits": -1.3852797746658325, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3852797746658325, "logits_per_char": -0.6926398873329163, "num_chars": 2}, {"sum_logits": -1.387253999710083, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.387253999710083, "logits_per_char": -0.6936269998550415, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 919, "native_id": "Mercury_7236740", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.303600788116455, "incorrect_loss_raw": 1.4222009181976318, "correct_loss_per_char": 0.6518003940582275, "incorrect_loss_per_char": 0.7111004590988159, "correct_loss_per_token": 1.303600788116455, "incorrect_loss_per_token": 1.4222009181976318, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4270843267440796, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4270843267440796, "logits_per_char": -0.7135421633720398, "num_chars": 2}, {"sum_logits": -1.303600788116455, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.303600788116455, "logits_per_char": -0.6518003940582275, "num_chars": 2}, {"sum_logits": -1.4287779331207275, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4287779331207275, "logits_per_char": -0.7143889665603638, "num_chars": 2}, {"sum_logits": -1.4107404947280884, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4107404947280884, "logits_per_char": -0.7053702473640442, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 920, "native_id": "Mercury_7116235", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2591603994369507, "incorrect_loss_raw": 1.440590778986613, "correct_loss_per_char": 0.6295801997184753, "incorrect_loss_per_char": 0.7202953894933065, "correct_loss_per_token": 1.2591603994369507, "incorrect_loss_per_token": 1.440590778986613, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2591603994369507, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2591603994369507, "logits_per_char": -0.6295801997184753, "num_chars": 2}, {"sum_logits": -1.4237313270568848, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4237313270568848, "logits_per_char": -0.7118656635284424, "num_chars": 2}, {"sum_logits": -1.3913874626159668, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3913874626159668, "logits_per_char": -0.6956937313079834, "num_chars": 2}, {"sum_logits": -1.5066535472869873, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.5066535472869873, "logits_per_char": -0.7533267736434937, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 921, "native_id": "Mercury_SC_405357", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.563607931137085, "incorrect_loss_raw": 1.3409379323323567, "correct_loss_per_char": 0.7818039655685425, "incorrect_loss_per_char": 0.6704689661661783, "correct_loss_per_token": 1.563607931137085, "incorrect_loss_per_token": 1.3409379323323567, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.404506802558899, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.404506802558899, "logits_per_char": -0.7022534012794495, "num_chars": 2}, {"sum_logits": -1.383005142211914, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.383005142211914, "logits_per_char": -0.691502571105957, "num_chars": 2}, {"sum_logits": -1.563607931137085, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.563607931137085, "logits_per_char": -0.7818039655685425, "num_chars": 2}, {"sum_logits": -1.2353018522262573, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.2353018522262573, "logits_per_char": -0.6176509261131287, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 922, "native_id": "Mercury_7042945", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4355343580245972, "incorrect_loss_raw": 1.3815285762151082, "correct_loss_per_char": 0.7177671790122986, "incorrect_loss_per_char": 0.6907642881075541, "correct_loss_per_token": 1.4355343580245972, "incorrect_loss_per_token": 1.3815285762151082, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5144946575164795, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.5144946575164795, "logits_per_char": -0.7572473287582397, "num_chars": 2}, {"sum_logits": -1.4355343580245972, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4355343580245972, "logits_per_char": -0.7177671790122986, "num_chars": 2}, {"sum_logits": -1.365046739578247, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.365046739578247, "logits_per_char": -0.6825233697891235, "num_chars": 2}, {"sum_logits": -1.2650443315505981, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.2650443315505981, "logits_per_char": -0.6325221657752991, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 923, "native_id": "Mercury_7106750", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3573416471481323, "incorrect_loss_raw": 1.4049431085586548, "correct_loss_per_char": 0.6786708235740662, "incorrect_loss_per_char": 0.7024715542793274, "correct_loss_per_token": 1.3573416471481323, "incorrect_loss_per_token": 1.4049431085586548, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.360700249671936, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.360700249671936, "logits_per_char": -0.680350124835968, "num_chars": 2}, {"sum_logits": -1.3335574865341187, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": true, "logits_per_token": -1.3335574865341187, "logits_per_char": -0.6667787432670593, "num_chars": 2}, {"sum_logits": -1.5205715894699097, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.5205715894699097, "logits_per_char": -0.7602857947349548, "num_chars": 2}, {"sum_logits": -1.3573416471481323, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.3573416471481323, "logits_per_char": -0.6786708235740662, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 924, "native_id": "MDSA_2009_4_34", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4125667810440063, "incorrect_loss_raw": 1.3893882830937703, "correct_loss_per_char": 0.7062833905220032, "incorrect_loss_per_char": 0.6946941415468851, "correct_loss_per_token": 1.4125667810440063, "incorrect_loss_per_token": 1.3893882830937703, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4785224199295044, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": false, "logits_per_token": -1.4785224199295044, "logits_per_char": -0.7392612099647522, "num_chars": 2}, {"sum_logits": -1.4125667810440063, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": false, "logits_per_token": -1.4125667810440063, "logits_per_char": -0.7062833905220032, "num_chars": 2}, {"sum_logits": -1.4520595073699951, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": false, "logits_per_token": -1.4520595073699951, "logits_per_char": -0.7260297536849976, "num_chars": 2}, {"sum_logits": -1.2375829219818115, "num_tokens": 1, "num_tokens_all": 481, "is_greedy": true, "logits_per_token": -1.2375829219818115, "logits_per_char": -0.6187914609909058, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 925, "native_id": "Mercury_7016310", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1382368803024292, "incorrect_loss_raw": 1.494331161181132, "correct_loss_per_char": 0.5691184401512146, "incorrect_loss_per_char": 0.747165580590566, "correct_loss_per_token": 1.1382368803024292, "incorrect_loss_per_token": 1.494331161181132, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.462339162826538, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.462339162826538, "logits_per_char": -0.731169581413269, "num_chars": 2}, {"sum_logits": -1.549009084701538, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.549009084701538, "logits_per_char": -0.774504542350769, "num_chars": 2}, {"sum_logits": -1.1382368803024292, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.1382368803024292, "logits_per_char": -0.5691184401512146, "num_chars": 2}, {"sum_logits": -1.4716452360153198, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4716452360153198, "logits_per_char": -0.7358226180076599, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 926, "native_id": "VASoL_2007_3_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2860368490219116, "incorrect_loss_raw": 1.432202974955241, "correct_loss_per_char": 0.6430184245109558, "incorrect_loss_per_char": 0.7161014874776205, "correct_loss_per_token": 1.2860368490219116, "incorrect_loss_per_token": 1.432202974955241, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2860368490219116, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.2860368490219116, "logits_per_char": -0.6430184245109558, "num_chars": 2}, {"sum_logits": -1.3519752025604248, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3519752025604248, "logits_per_char": -0.6759876012802124, "num_chars": 2}, {"sum_logits": -1.4483673572540283, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4483673572540283, "logits_per_char": -0.7241836786270142, "num_chars": 2}, {"sum_logits": -1.4962663650512695, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4962663650512695, "logits_per_char": -0.7481331825256348, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 927, "native_id": "Mercury_7030468", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.254062294960022, "incorrect_loss_raw": 1.4522191286087036, "correct_loss_per_char": 0.627031147480011, "incorrect_loss_per_char": 0.7261095643043518, "correct_loss_per_token": 1.254062294960022, "incorrect_loss_per_token": 1.4522191286087036, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.254062294960022, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": true, "logits_per_token": -1.254062294960022, "logits_per_char": -0.627031147480011, "num_chars": 2}, {"sum_logits": -1.373698353767395, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.373698353767395, "logits_per_char": -0.6868491768836975, "num_chars": 2}, {"sum_logits": -1.6182059049606323, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.6182059049606323, "logits_per_char": -0.8091029524803162, "num_chars": 2}, {"sum_logits": -1.3647531270980835, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.3647531270980835, "logits_per_char": -0.6823765635490417, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 928, "native_id": "Mercury_SC_402616", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.344680666923523, "incorrect_loss_raw": 1.4054077863693237, "correct_loss_per_char": 0.6723403334617615, "incorrect_loss_per_char": 0.7027038931846619, "correct_loss_per_token": 1.344680666923523, "incorrect_loss_per_token": 1.4054077863693237, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3950992822647095, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3950992822647095, "logits_per_char": -0.6975496411323547, "num_chars": 2}, {"sum_logits": -1.4390170574188232, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4390170574188232, "logits_per_char": -0.7195085287094116, "num_chars": 2}, {"sum_logits": -1.3821070194244385, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3821070194244385, "logits_per_char": -0.6910535097122192, "num_chars": 2}, {"sum_logits": -1.344680666923523, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.344680666923523, "logits_per_char": -0.6723403334617615, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 929, "native_id": "Mercury_405464", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.314326286315918, "incorrect_loss_raw": 1.4224484761555989, "correct_loss_per_char": 0.657163143157959, "incorrect_loss_per_char": 0.7112242380777994, "correct_loss_per_token": 1.314326286315918, "incorrect_loss_per_token": 1.4224484761555989, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.314326286315918, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.314326286315918, "logits_per_char": -0.657163143157959, "num_chars": 2}, {"sum_logits": -1.3724420070648193, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.3724420070648193, "logits_per_char": -0.6862210035324097, "num_chars": 2}, {"sum_logits": -1.5462268590927124, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.5462268590927124, "logits_per_char": -0.7731134295463562, "num_chars": 2}, {"sum_logits": -1.3486765623092651, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.3486765623092651, "logits_per_char": -0.6743382811546326, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 930, "native_id": "Mercury_7205608", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2816165685653687, "incorrect_loss_raw": 1.4290301005045574, "correct_loss_per_char": 0.6408082842826843, "incorrect_loss_per_char": 0.7145150502522787, "correct_loss_per_token": 1.2816165685653687, "incorrect_loss_per_token": 1.4290301005045574, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4598811864852905, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4598811864852905, "logits_per_char": -0.7299405932426453, "num_chars": 2}, {"sum_logits": -1.2816165685653687, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.2816165685653687, "logits_per_char": -0.6408082842826843, "num_chars": 2}, {"sum_logits": -1.4199979305267334, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4199979305267334, "logits_per_char": -0.7099989652633667, "num_chars": 2}, {"sum_logits": -1.407211184501648, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.407211184501648, "logits_per_char": -0.703605592250824, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 931, "native_id": "Mercury_7015208", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3846406936645508, "incorrect_loss_raw": 1.400747537612915, "correct_loss_per_char": 0.6923203468322754, "incorrect_loss_per_char": 0.7003737688064575, "correct_loss_per_token": 1.3846406936645508, "incorrect_loss_per_token": 1.400747537612915, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5683714151382446, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.5683714151382446, "logits_per_char": -0.7841857075691223, "num_chars": 2}, {"sum_logits": -1.3846406936645508, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3846406936645508, "logits_per_char": -0.6923203468322754, "num_chars": 2}, {"sum_logits": -1.3964911699295044, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.3964911699295044, "logits_per_char": -0.6982455849647522, "num_chars": 2}, {"sum_logits": -1.237380027770996, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.237380027770996, "logits_per_char": -0.618690013885498, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 932, "native_id": "Mercury_SC_409666", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.296513319015503, "incorrect_loss_raw": 1.4248075882593791, "correct_loss_per_char": 0.6482566595077515, "incorrect_loss_per_char": 0.7124037941296896, "correct_loss_per_token": 1.296513319015503, "incorrect_loss_per_token": 1.4248075882593791, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5001436471939087, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.5001436471939087, "logits_per_char": -0.7500718235969543, "num_chars": 2}, {"sum_logits": -1.346488356590271, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.346488356590271, "logits_per_char": -0.6732441782951355, "num_chars": 2}, {"sum_logits": -1.4277907609939575, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": false, "logits_per_token": -1.4277907609939575, "logits_per_char": -0.7138953804969788, "num_chars": 2}, {"sum_logits": -1.296513319015503, "num_tokens": 1, "num_tokens_all": 406, "is_greedy": true, "logits_per_token": -1.296513319015503, "logits_per_char": -0.6482566595077515, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 933, "native_id": "Mercury_7230353", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.278248906135559, "incorrect_loss_raw": 1.4329156080881755, "correct_loss_per_char": 0.6391244530677795, "incorrect_loss_per_char": 0.7164578040440878, "correct_loss_per_token": 1.278248906135559, "incorrect_loss_per_token": 1.4329156080881755, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4206066131591797, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4206066131591797, "logits_per_char": -0.7103033065795898, "num_chars": 2}, {"sum_logits": -1.278248906135559, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.278248906135559, "logits_per_char": -0.6391244530677795, "num_chars": 2}, {"sum_logits": -1.4646978378295898, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4646978378295898, "logits_per_char": -0.7323489189147949, "num_chars": 2}, {"sum_logits": -1.4134423732757568, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4134423732757568, "logits_per_char": -0.7067211866378784, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 934, "native_id": "Mercury_7150343", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3653008937835693, "incorrect_loss_raw": 1.399928092956543, "correct_loss_per_char": 0.6826504468917847, "incorrect_loss_per_char": 0.6999640464782715, "correct_loss_per_token": 1.3653008937835693, "incorrect_loss_per_token": 1.399928092956543, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3822730779647827, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.3822730779647827, "logits_per_char": -0.6911365389823914, "num_chars": 2}, {"sum_logits": -1.3653008937835693, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.3653008937835693, "logits_per_char": -0.6826504468917847, "num_chars": 2}, {"sum_logits": -1.4605419635772705, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.4605419635772705, "logits_per_char": -0.7302709817886353, "num_chars": 2}, {"sum_logits": -1.3569692373275757, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": true, "logits_per_token": -1.3569692373275757, "logits_per_char": -0.6784846186637878, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 935, "native_id": "Mercury_7026723", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3643739223480225, "incorrect_loss_raw": 1.4001519282658894, "correct_loss_per_char": 0.6821869611740112, "incorrect_loss_per_char": 0.7000759641329447, "correct_loss_per_token": 1.3643739223480225, "incorrect_loss_per_token": 1.4001519282658894, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3669463396072388, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3669463396072388, "logits_per_char": -0.6834731698036194, "num_chars": 2}, {"sum_logits": -1.3788566589355469, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3788566589355469, "logits_per_char": -0.6894283294677734, "num_chars": 2}, {"sum_logits": -1.4546527862548828, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4546527862548828, "logits_per_char": -0.7273263931274414, "num_chars": 2}, {"sum_logits": -1.3643739223480225, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.3643739223480225, "logits_per_char": -0.6821869611740112, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 936, "native_id": "Mercury_7024273", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3703166246414185, "incorrect_loss_raw": 1.397351622581482, "correct_loss_per_char": 0.6851583123207092, "incorrect_loss_per_char": 0.698675811290741, "correct_loss_per_token": 1.3703166246414185, "incorrect_loss_per_token": 1.397351622581482, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.338247299194336, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.338247299194336, "logits_per_char": -0.669123649597168, "num_chars": 2}, {"sum_logits": -1.423172116279602, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.423172116279602, "logits_per_char": -0.711586058139801, "num_chars": 2}, {"sum_logits": -1.3703166246414185, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3703166246414185, "logits_per_char": -0.6851583123207092, "num_chars": 2}, {"sum_logits": -1.4306354522705078, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4306354522705078, "logits_per_char": -0.7153177261352539, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 937, "native_id": "AKDE&ED_2008_8_40", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.353562831878662, "incorrect_loss_raw": 1.4138068755467732, "correct_loss_per_char": 0.676781415939331, "incorrect_loss_per_char": 0.7069034377733866, "correct_loss_per_token": 1.353562831878662, "incorrect_loss_per_token": 1.4138068755467732, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5309910774230957, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.5309910774230957, "logits_per_char": -0.7654955387115479, "num_chars": 2}, {"sum_logits": -1.501658320426941, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.501658320426941, "logits_per_char": -0.7508291602134705, "num_chars": 2}, {"sum_logits": -1.353562831878662, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.353562831878662, "logits_per_char": -0.676781415939331, "num_chars": 2}, {"sum_logits": -1.2087712287902832, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.2087712287902832, "logits_per_char": -0.6043856143951416, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 938, "native_id": "Mercury_183033", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.328300952911377, "incorrect_loss_raw": 1.4156295855840046, "correct_loss_per_char": 0.6641504764556885, "incorrect_loss_per_char": 0.7078147927920023, "correct_loss_per_token": 1.328300952911377, "incorrect_loss_per_token": 1.4156295855840046, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2968037128448486, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2968037128448486, "logits_per_char": -0.6484018564224243, "num_chars": 2}, {"sum_logits": -1.328300952911377, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.328300952911377, "logits_per_char": -0.6641504764556885, "num_chars": 2}, {"sum_logits": -1.4414252042770386, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4414252042770386, "logits_per_char": -0.7207126021385193, "num_chars": 2}, {"sum_logits": -1.508659839630127, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.508659839630127, "logits_per_char": -0.7543299198150635, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 939, "native_id": "Mercury_402364", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2167818546295166, "incorrect_loss_raw": 1.4572486480077107, "correct_loss_per_char": 0.6083909273147583, "incorrect_loss_per_char": 0.7286243240038554, "correct_loss_per_token": 1.2167818546295166, "incorrect_loss_per_token": 1.4572486480077107, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4336227178573608, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4336227178573608, "logits_per_char": -0.7168113589286804, "num_chars": 2}, {"sum_logits": -1.4329708814620972, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4329708814620972, "logits_per_char": -0.7164854407310486, "num_chars": 2}, {"sum_logits": -1.5051523447036743, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.5051523447036743, "logits_per_char": -0.7525761723518372, "num_chars": 2}, {"sum_logits": -1.2167818546295166, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.2167818546295166, "logits_per_char": -0.6083909273147583, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 940, "native_id": "Mercury_7263183", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3849451541900635, "incorrect_loss_raw": 1.3967535495758057, "correct_loss_per_char": 0.6924725770950317, "incorrect_loss_per_char": 0.6983767747879028, "correct_loss_per_token": 1.3849451541900635, "incorrect_loss_per_token": 1.3967535495758057, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5044207572937012, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.5044207572937012, "logits_per_char": -0.7522103786468506, "num_chars": 2}, {"sum_logits": -1.3849451541900635, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3849451541900635, "logits_per_char": -0.6924725770950317, "num_chars": 2}, {"sum_logits": -1.3754328489303589, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3754328489303589, "logits_per_char": -0.6877164244651794, "num_chars": 2}, {"sum_logits": -1.310407042503357, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.310407042503357, "logits_per_char": -0.6552035212516785, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 941, "native_id": "Mercury_7222530", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3337676525115967, "incorrect_loss_raw": 1.4131937424341838, "correct_loss_per_char": 0.6668838262557983, "incorrect_loss_per_char": 0.7065968712170919, "correct_loss_per_token": 1.3337676525115967, "incorrect_loss_per_token": 1.4131937424341838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5221107006072998, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.5221107006072998, "logits_per_char": -0.7610553503036499, "num_chars": 2}, {"sum_logits": -1.3337676525115967, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.3337676525115967, "logits_per_char": -0.6668838262557983, "num_chars": 2}, {"sum_logits": -1.401483416557312, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.401483416557312, "logits_per_char": -0.700741708278656, "num_chars": 2}, {"sum_logits": -1.3159871101379395, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.3159871101379395, "logits_per_char": -0.6579935550689697, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 942, "native_id": "OHAT_2009_8_36", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3457971811294556, "incorrect_loss_raw": 1.4092412392298381, "correct_loss_per_char": 0.6728985905647278, "incorrect_loss_per_char": 0.7046206196149191, "correct_loss_per_token": 1.3457971811294556, "incorrect_loss_per_token": 1.4092412392298381, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.398611307144165, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.398611307144165, "logits_per_char": -0.6993056535720825, "num_chars": 2}, {"sum_logits": -1.4918497800827026, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4918497800827026, "logits_per_char": -0.7459248900413513, "num_chars": 2}, {"sum_logits": -1.3457971811294556, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3457971811294556, "logits_per_char": -0.6728985905647278, "num_chars": 2}, {"sum_logits": -1.3372626304626465, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.3372626304626465, "logits_per_char": -0.6686313152313232, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 943, "native_id": "Mercury_7141750", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3207447528839111, "incorrect_loss_raw": 1.42854638894399, "correct_loss_per_char": 0.6603723764419556, "incorrect_loss_per_char": 0.714273194471995, "correct_loss_per_token": 1.3207447528839111, "incorrect_loss_per_token": 1.42854638894399, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.205527663230896, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.205527663230896, "logits_per_char": -0.602763831615448, "num_chars": 2}, {"sum_logits": -1.3207447528839111, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3207447528839111, "logits_per_char": -0.6603723764419556, "num_chars": 2}, {"sum_logits": -1.4639019966125488, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4639019966125488, "logits_per_char": -0.7319509983062744, "num_chars": 2}, {"sum_logits": -1.6162095069885254, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.6162095069885254, "logits_per_char": -0.8081047534942627, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 944, "native_id": "TIMSS_2011_4_pg45", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.424988865852356, "incorrect_loss_raw": 1.381124218304952, "correct_loss_per_char": 0.712494432926178, "incorrect_loss_per_char": 0.690562109152476, "correct_loss_per_token": 1.424988865852356, "incorrect_loss_per_token": 1.381124218304952, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.424988865852356, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.424988865852356, "logits_per_char": -0.712494432926178, "num_chars": 2}, {"sum_logits": -1.4304230213165283, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4304230213165283, "logits_per_char": -0.7152115106582642, "num_chars": 2}, {"sum_logits": -1.4179658889770508, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4179658889770508, "logits_per_char": -0.7089829444885254, "num_chars": 2}, {"sum_logits": -1.2949837446212769, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2949837446212769, "logits_per_char": -0.6474918723106384, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 945, "native_id": "MCAS_2014_5_5", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5207723379135132, "incorrect_loss_raw": 1.3508113225301106, "correct_loss_per_char": 0.7603861689567566, "incorrect_loss_per_char": 0.6754056612650553, "correct_loss_per_token": 1.5207723379135132, "incorrect_loss_per_token": 1.3508113225301106, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5207723379135132, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.5207723379135132, "logits_per_char": -0.7603861689567566, "num_chars": 2}, {"sum_logits": -1.3528012037277222, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.3528012037277222, "logits_per_char": -0.6764006018638611, "num_chars": 2}, {"sum_logits": -1.3339723348617554, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.3339723348617554, "logits_per_char": -0.6669861674308777, "num_chars": 2}, {"sum_logits": -1.3656604290008545, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.3656604290008545, "logits_per_char": -0.6828302145004272, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 946, "native_id": "Mercury_SC_409241", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4069178104400635, "incorrect_loss_raw": 1.3871901830037434, "correct_loss_per_char": 0.7034589052200317, "incorrect_loss_per_char": 0.6935950915018717, "correct_loss_per_token": 1.4069178104400635, "incorrect_loss_per_token": 1.3871901830037434, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4874714612960815, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4874714612960815, "logits_per_char": -0.7437357306480408, "num_chars": 2}, {"sum_logits": -1.369076132774353, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.369076132774353, "logits_per_char": -0.6845380663871765, "num_chars": 2}, {"sum_logits": -1.4069178104400635, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4069178104400635, "logits_per_char": -0.7034589052200317, "num_chars": 2}, {"sum_logits": -1.305022954940796, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.305022954940796, "logits_per_char": -0.652511477470398, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 947, "native_id": "Mercury_SC_401147", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4940439462661743, "incorrect_loss_raw": 1.3586887915929158, "correct_loss_per_char": 0.7470219731330872, "incorrect_loss_per_char": 0.6793443957964579, "correct_loss_per_token": 1.4940439462661743, "incorrect_loss_per_token": 1.3586887915929158, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3330645561218262, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3330645561218262, "logits_per_char": -0.6665322780609131, "num_chars": 2}, {"sum_logits": -1.3222742080688477, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.3222742080688477, "logits_per_char": -0.6611371040344238, "num_chars": 2}, {"sum_logits": -1.4940439462661743, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4940439462661743, "logits_per_char": -0.7470219731330872, "num_chars": 2}, {"sum_logits": -1.4207276105880737, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4207276105880737, "logits_per_char": -0.7103638052940369, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 948, "native_id": "Mercury_SC_LBS10273", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.450705885887146, "incorrect_loss_raw": 1.372747818628947, "correct_loss_per_char": 0.725352942943573, "incorrect_loss_per_char": 0.6863739093144735, "correct_loss_per_token": 1.450705885887146, "incorrect_loss_per_token": 1.372747818628947, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4071404933929443, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.4071404933929443, "logits_per_char": -0.7035702466964722, "num_chars": 2}, {"sum_logits": -1.3686603307724, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.3686603307724, "logits_per_char": -0.6843301653862, "num_chars": 2}, {"sum_logits": -1.3424426317214966, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.3424426317214966, "logits_per_char": -0.6712213158607483, "num_chars": 2}, {"sum_logits": -1.450705885887146, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.450705885887146, "logits_per_char": -0.725352942943573, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 949, "native_id": "Mercury_401523", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.380266785621643, "incorrect_loss_raw": 1.3937129974365234, "correct_loss_per_char": 0.6901333928108215, "incorrect_loss_per_char": 0.6968564987182617, "correct_loss_per_token": 1.380266785621643, "incorrect_loss_per_token": 1.3937129974365234, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.380266785621643, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": true, "logits_per_token": -1.380266785621643, "logits_per_char": -0.6901333928108215, "num_chars": 2}, {"sum_logits": -1.3917393684387207, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.3917393684387207, "logits_per_char": -0.6958696842193604, "num_chars": 2}, {"sum_logits": -1.3947547674179077, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.3947547674179077, "logits_per_char": -0.6973773837089539, "num_chars": 2}, {"sum_logits": -1.394644856452942, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.394644856452942, "logits_per_char": -0.697322428226471, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 950, "native_id": "Mercury_401865", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5560598373413086, "incorrect_loss_raw": 1.3521573146184285, "correct_loss_per_char": 0.7780299186706543, "incorrect_loss_per_char": 0.6760786573092142, "correct_loss_per_token": 1.5560598373413086, "incorrect_loss_per_token": 1.3521573146184285, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.347115159034729, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.347115159034729, "logits_per_char": -0.6735575795173645, "num_chars": 2}, {"sum_logits": -1.1772488355636597, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": true, "logits_per_token": -1.1772488355636597, "logits_per_char": -0.5886244177818298, "num_chars": 2}, {"sum_logits": -1.532107949256897, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.532107949256897, "logits_per_char": -0.7660539746284485, "num_chars": 2}, {"sum_logits": -1.5560598373413086, "num_tokens": 1, "num_tokens_all": 426, "is_greedy": false, "logits_per_token": -1.5560598373413086, "logits_per_char": -0.7780299186706543, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 951, "native_id": "MCAS_2013_8_29435", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4210786819458008, "incorrect_loss_raw": 1.396152178446452, "correct_loss_per_char": 0.7105393409729004, "incorrect_loss_per_char": 0.698076089223226, "correct_loss_per_token": 1.4210786819458008, "incorrect_loss_per_token": 1.396152178446452, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.622107982635498, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.622107982635498, "logits_per_char": -0.811053991317749, "num_chars": 2}, {"sum_logits": -1.4210786819458008, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4210786819458008, "logits_per_char": -0.7105393409729004, "num_chars": 2}, {"sum_logits": -1.3729619979858398, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3729619979858398, "logits_per_char": -0.6864809989929199, "num_chars": 2}, {"sum_logits": -1.1933865547180176, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.1933865547180176, "logits_per_char": -0.5966932773590088, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 952, "native_id": "Mercury_SC_406720", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5241270065307617, "incorrect_loss_raw": 1.3552865982055664, "correct_loss_per_char": 0.7620635032653809, "incorrect_loss_per_char": 0.6776432991027832, "correct_loss_per_token": 1.5241270065307617, "incorrect_loss_per_token": 1.3552865982055664, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2063753604888916, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.2063753604888916, "logits_per_char": -0.6031876802444458, "num_chars": 2}, {"sum_logits": -1.3825033903121948, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3825033903121948, "logits_per_char": -0.6912516951560974, "num_chars": 2}, {"sum_logits": -1.5241270065307617, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.5241270065307617, "logits_per_char": -0.7620635032653809, "num_chars": 2}, {"sum_logits": -1.4769810438156128, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4769810438156128, "logits_per_char": -0.7384905219078064, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 953, "native_id": "NYSEDREGENTS_2013_8_34", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3116837739944458, "incorrect_loss_raw": 1.4248824914296467, "correct_loss_per_char": 0.6558418869972229, "incorrect_loss_per_char": 0.7124412457148234, "correct_loss_per_token": 1.3116837739944458, "incorrect_loss_per_token": 1.4248824914296467, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3116837739944458, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.3116837739944458, "logits_per_char": -0.6558418869972229, "num_chars": 2}, {"sum_logits": -1.3191263675689697, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3191263675689697, "logits_per_char": -0.6595631837844849, "num_chars": 2}, {"sum_logits": -1.5897293090820312, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.5897293090820312, "logits_per_char": -0.7948646545410156, "num_chars": 2}, {"sum_logits": -1.3657917976379395, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3657917976379395, "logits_per_char": -0.6828958988189697, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 954, "native_id": "Mercury_7038833", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5122066736221313, "incorrect_loss_raw": 1.356876532236735, "correct_loss_per_char": 0.7561033368110657, "incorrect_loss_per_char": 0.6784382661183676, "correct_loss_per_token": 1.5122066736221313, "incorrect_loss_per_token": 1.356876532236735, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4606715440750122, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4606715440750122, "logits_per_char": -0.7303357720375061, "num_chars": 2}, {"sum_logits": -1.3535422086715698, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3535422086715698, "logits_per_char": -0.6767711043357849, "num_chars": 2}, {"sum_logits": -1.5122066736221313, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.5122066736221313, "logits_per_char": -0.7561033368110657, "num_chars": 2}, {"sum_logits": -1.256415843963623, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.256415843963623, "logits_per_char": -0.6282079219818115, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 955, "native_id": "Mercury_175560", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3875118494033813, "incorrect_loss_raw": 1.3962653875350952, "correct_loss_per_char": 0.6937559247016907, "incorrect_loss_per_char": 0.6981326937675476, "correct_loss_per_token": 1.3875118494033813, "incorrect_loss_per_token": 1.3962653875350952, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2611970901489258, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": true, "logits_per_token": -1.2611970901489258, "logits_per_char": -0.6305985450744629, "num_chars": 2}, {"sum_logits": -1.4883676767349243, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4883676767349243, "logits_per_char": -0.7441838383674622, "num_chars": 2}, {"sum_logits": -1.3875118494033813, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.3875118494033813, "logits_per_char": -0.6937559247016907, "num_chars": 2}, {"sum_logits": -1.4392313957214355, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4392313957214355, "logits_per_char": -0.7196156978607178, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 956, "native_id": "Mercury_7005005", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3707191944122314, "incorrect_loss_raw": 1.4006276528040569, "correct_loss_per_char": 0.6853595972061157, "incorrect_loss_per_char": 0.7003138264020284, "correct_loss_per_token": 1.3707191944122314, "incorrect_loss_per_token": 1.4006276528040569, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4827851057052612, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4827851057052612, "logits_per_char": -0.7413925528526306, "num_chars": 2}, {"sum_logits": -1.429222822189331, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.429222822189331, "logits_per_char": -0.7146114110946655, "num_chars": 2}, {"sum_logits": -1.3707191944122314, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.3707191944122314, "logits_per_char": -0.6853595972061157, "num_chars": 2}, {"sum_logits": -1.2898750305175781, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.2898750305175781, "logits_per_char": -0.6449375152587891, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 957, "native_id": "Mercury_183890", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4847444295883179, "incorrect_loss_raw": 1.3661378622055054, "correct_loss_per_char": 0.7423722147941589, "incorrect_loss_per_char": 0.6830689311027527, "correct_loss_per_token": 1.4847444295883179, "incorrect_loss_per_token": 1.3661378622055054, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2913779020309448, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.2913779020309448, "logits_per_char": -0.6456889510154724, "num_chars": 2}, {"sum_logits": -1.3073196411132812, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3073196411132812, "logits_per_char": -0.6536598205566406, "num_chars": 2}, {"sum_logits": -1.49971604347229, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.49971604347229, "logits_per_char": -0.749858021736145, "num_chars": 2}, {"sum_logits": -1.4847444295883179, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4847444295883179, "logits_per_char": -0.7423722147941589, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 958, "native_id": "Mercury_7270358", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.388040542602539, "incorrect_loss_raw": 1.392203688621521, "correct_loss_per_char": 0.6940202713012695, "incorrect_loss_per_char": 0.6961018443107605, "correct_loss_per_token": 1.388040542602539, "incorrect_loss_per_token": 1.392203688621521, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3752111196517944, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3752111196517944, "logits_per_char": -0.6876055598258972, "num_chars": 2}, {"sum_logits": -1.471497893333435, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.471497893333435, "logits_per_char": -0.7357489466667175, "num_chars": 2}, {"sum_logits": -1.3299020528793335, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.3299020528793335, "logits_per_char": -0.6649510264396667, "num_chars": 2}, {"sum_logits": -1.388040542602539, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.388040542602539, "logits_per_char": -0.6940202713012695, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 959, "native_id": "MCAS_2013_5_29411", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4912241697311401, "incorrect_loss_raw": 1.3711463610331218, "correct_loss_per_char": 0.7456120848655701, "incorrect_loss_per_char": 0.6855731805165609, "correct_loss_per_token": 1.4912241697311401, "incorrect_loss_per_token": 1.3711463610331218, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2103245258331299, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2103245258331299, "logits_per_char": -0.6051622629165649, "num_chars": 2}, {"sum_logits": -1.3503059148788452, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3503059148788452, "logits_per_char": -0.6751529574394226, "num_chars": 2}, {"sum_logits": -1.4912241697311401, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4912241697311401, "logits_per_char": -0.7456120848655701, "num_chars": 2}, {"sum_logits": -1.5528086423873901, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.5528086423873901, "logits_per_char": -0.7764043211936951, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 960, "native_id": "ACTAAP_2007_7_31", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4635177850723267, "incorrect_loss_raw": 1.3708513975143433, "correct_loss_per_char": 0.7317588925361633, "incorrect_loss_per_char": 0.6854256987571716, "correct_loss_per_token": 1.4635177850723267, "incorrect_loss_per_token": 1.3708513975143433, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4050053358078003, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.4050053358078003, "logits_per_char": -0.7025026679039001, "num_chars": 2}, {"sum_logits": -1.310079574584961, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": true, "logits_per_token": -1.310079574584961, "logits_per_char": -0.6550397872924805, "num_chars": 2}, {"sum_logits": -1.4635177850723267, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.4635177850723267, "logits_per_char": -0.7317588925361633, "num_chars": 2}, {"sum_logits": -1.3974692821502686, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.3974692821502686, "logits_per_char": -0.6987346410751343, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 961, "native_id": "Mercury_7082023", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5190019607543945, "incorrect_loss_raw": 1.361864407857259, "correct_loss_per_char": 0.7595009803771973, "incorrect_loss_per_char": 0.6809322039286295, "correct_loss_per_token": 1.5190019607543945, "incorrect_loss_per_token": 1.361864407857259, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2788664102554321, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.2788664102554321, "logits_per_char": -0.6394332051277161, "num_chars": 2}, {"sum_logits": -1.5190019607543945, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.5190019607543945, "logits_per_char": -0.7595009803771973, "num_chars": 2}, {"sum_logits": -1.5582231283187866, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.5582231283187866, "logits_per_char": -0.7791115641593933, "num_chars": 2}, {"sum_logits": -1.2485036849975586, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.2485036849975586, "logits_per_char": -0.6242518424987793, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 962, "native_id": "MCAS_2003_8_21", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3382818698883057, "incorrect_loss_raw": 1.4091440041859944, "correct_loss_per_char": 0.6691409349441528, "incorrect_loss_per_char": 0.7045720020929972, "correct_loss_per_token": 1.3382818698883057, "incorrect_loss_per_token": 1.4091440041859944, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3805255889892578, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3805255889892578, "logits_per_char": -0.6902627944946289, "num_chars": 2}, {"sum_logits": -1.3382818698883057, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.3382818698883057, "logits_per_char": -0.6691409349441528, "num_chars": 2}, {"sum_logits": -1.4693132638931274, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4693132638931274, "logits_per_char": -0.7346566319465637, "num_chars": 2}, {"sum_logits": -1.3775931596755981, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3775931596755981, "logits_per_char": -0.6887965798377991, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 963, "native_id": "NYSEDREGENTS_2015_8_9", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2708293199539185, "incorrect_loss_raw": 1.4362086455027263, "correct_loss_per_char": 0.6354146599769592, "incorrect_loss_per_char": 0.7181043227513632, "correct_loss_per_token": 1.2708293199539185, "incorrect_loss_per_token": 1.4362086455027263, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3592792749404907, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3592792749404907, "logits_per_char": -0.6796396374702454, "num_chars": 2}, {"sum_logits": -1.2708293199539185, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2708293199539185, "logits_per_char": -0.6354146599769592, "num_chars": 2}, {"sum_logits": -1.5006507635116577, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5006507635116577, "logits_per_char": -0.7503253817558289, "num_chars": 2}, {"sum_logits": -1.4486958980560303, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4486958980560303, "logits_per_char": -0.7243479490280151, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 964, "native_id": "Mercury_7064750", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.160150408744812, "incorrect_loss_raw": 1.4811956485112507, "correct_loss_per_char": 0.580075204372406, "incorrect_loss_per_char": 0.7405978242556254, "correct_loss_per_token": 1.160150408744812, "incorrect_loss_per_token": 1.4811956485112507, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.535265326499939, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.535265326499939, "logits_per_char": -0.7676326632499695, "num_chars": 2}, {"sum_logits": -1.4678984880447388, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.4678984880447388, "logits_per_char": -0.7339492440223694, "num_chars": 2}, {"sum_logits": -1.4404231309890747, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.4404231309890747, "logits_per_char": -0.7202115654945374, "num_chars": 2}, {"sum_logits": -1.160150408744812, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -1.160150408744812, "logits_per_char": -0.580075204372406, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 965, "native_id": "TIMSS_2007_8_pg113", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4660040140151978, "incorrect_loss_raw": 1.3672816356023152, "correct_loss_per_char": 0.7330020070075989, "incorrect_loss_per_char": 0.6836408178011576, "correct_loss_per_token": 1.4660040140151978, "incorrect_loss_per_token": 1.3672816356023152, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4186220169067383, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4186220169067383, "logits_per_char": -0.7093110084533691, "num_chars": 2}, {"sum_logits": -1.3365206718444824, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.3365206718444824, "logits_per_char": -0.6682603359222412, "num_chars": 2}, {"sum_logits": -1.4660040140151978, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4660040140151978, "logits_per_char": -0.7330020070075989, "num_chars": 2}, {"sum_logits": -1.346702218055725, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.346702218055725, "logits_per_char": -0.6733511090278625, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 966, "native_id": "Mercury_7173583", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5026377439498901, "incorrect_loss_raw": 1.3554786443710327, "correct_loss_per_char": 0.7513188719749451, "incorrect_loss_per_char": 0.6777393221855164, "correct_loss_per_token": 1.5026377439498901, "incorrect_loss_per_token": 1.3554786443710327, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3403599262237549, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.3403599262237549, "logits_per_char": -0.6701799631118774, "num_chars": 2}, {"sum_logits": -1.3638920783996582, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3638920783996582, "logits_per_char": -0.6819460391998291, "num_chars": 2}, {"sum_logits": -1.5026377439498901, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.5026377439498901, "logits_per_char": -0.7513188719749451, "num_chars": 2}, {"sum_logits": -1.362183928489685, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.362183928489685, "logits_per_char": -0.6810919642448425, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 967, "native_id": "Mercury_403930", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5138182640075684, "incorrect_loss_raw": 1.354567249615987, "correct_loss_per_char": 0.7569091320037842, "incorrect_loss_per_char": 0.6772836248079935, "correct_loss_per_token": 1.5138182640075684, "incorrect_loss_per_token": 1.354567249615987, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3117130994796753, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.3117130994796753, "logits_per_char": -0.6558565497398376, "num_chars": 2}, {"sum_logits": -1.3494371175765991, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3494371175765991, "logits_per_char": -0.6747185587882996, "num_chars": 2}, {"sum_logits": -1.5138182640075684, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.5138182640075684, "logits_per_char": -0.7569091320037842, "num_chars": 2}, {"sum_logits": -1.402551531791687, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.402551531791687, "logits_per_char": -0.7012757658958435, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 968, "native_id": "Mercury_417118", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5300168991088867, "incorrect_loss_raw": 1.3495777448018391, "correct_loss_per_char": 0.7650084495544434, "incorrect_loss_per_char": 0.6747888724009196, "correct_loss_per_token": 1.5300168991088867, "incorrect_loss_per_token": 1.3495777448018391, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3862766027450562, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3862766027450562, "logits_per_char": -0.6931383013725281, "num_chars": 2}, {"sum_logits": -1.3769034147262573, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3769034147262573, "logits_per_char": -0.6884517073631287, "num_chars": 2}, {"sum_logits": -1.5300168991088867, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.5300168991088867, "logits_per_char": -0.7650084495544434, "num_chars": 2}, {"sum_logits": -1.285553216934204, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.285553216934204, "logits_per_char": -0.642776608467102, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 969, "native_id": "Mercury_7143010", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4473730325698853, "incorrect_loss_raw": 1.3866148392359416, "correct_loss_per_char": 0.7236865162849426, "incorrect_loss_per_char": 0.6933074196179708, "correct_loss_per_token": 1.4473730325698853, "incorrect_loss_per_token": 1.3866148392359416, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5198676586151123, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.5198676586151123, "logits_per_char": -0.7599338293075562, "num_chars": 2}, {"sum_logits": -1.4473730325698853, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.4473730325698853, "logits_per_char": -0.7236865162849426, "num_chars": 2}, {"sum_logits": -1.4964865446090698, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.4964865446090698, "logits_per_char": -0.7482432723045349, "num_chars": 2}, {"sum_logits": -1.1434903144836426, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.1434903144836426, "logits_per_char": -0.5717451572418213, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 970, "native_id": "Mercury_SC_401801", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.374848484992981, "incorrect_loss_raw": 1.3953251838684082, "correct_loss_per_char": 0.6874242424964905, "incorrect_loss_per_char": 0.6976625919342041, "correct_loss_per_token": 1.374848484992981, "incorrect_loss_per_token": 1.3953251838684082, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.374848484992981, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.374848484992981, "logits_per_char": -0.6874242424964905, "num_chars": 2}, {"sum_logits": -1.3823856115341187, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3823856115341187, "logits_per_char": -0.6911928057670593, "num_chars": 2}, {"sum_logits": -1.3955577611923218, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3955577611923218, "logits_per_char": -0.6977788805961609, "num_chars": 2}, {"sum_logits": -1.4080321788787842, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4080321788787842, "logits_per_char": -0.7040160894393921, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 971, "native_id": "Mercury_410334", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4594680070877075, "incorrect_loss_raw": 1.375370462735494, "correct_loss_per_char": 0.7297340035438538, "incorrect_loss_per_char": 0.687685231367747, "correct_loss_per_token": 1.4594680070877075, "incorrect_loss_per_token": 1.375370462735494, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5505694150924683, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.5505694150924683, "logits_per_char": -0.7752847075462341, "num_chars": 2}, {"sum_logits": -1.4594680070877075, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.4594680070877075, "logits_per_char": -0.7297340035438538, "num_chars": 2}, {"sum_logits": -1.3231759071350098, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": false, "logits_per_token": -1.3231759071350098, "logits_per_char": -0.6615879535675049, "num_chars": 2}, {"sum_logits": -1.252366065979004, "num_tokens": 1, "num_tokens_all": 417, "is_greedy": true, "logits_per_token": -1.252366065979004, "logits_per_char": -0.626183032989502, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 972, "native_id": "NAEP_2000_4_S12+3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4105279445648193, "incorrect_loss_raw": 1.3931981722513835, "correct_loss_per_char": 0.7052639722824097, "incorrect_loss_per_char": 0.6965990861256918, "correct_loss_per_token": 1.4105279445648193, "incorrect_loss_per_token": 1.3931981722513835, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.569103479385376, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.569103479385376, "logits_per_char": -0.784551739692688, "num_chars": 2}, {"sum_logits": -1.4105279445648193, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4105279445648193, "logits_per_char": -0.7052639722824097, "num_chars": 2}, {"sum_logits": -1.2648131847381592, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.2648131847381592, "logits_per_char": -0.6324065923690796, "num_chars": 2}, {"sum_logits": -1.3456778526306152, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3456778526306152, "logits_per_char": -0.6728389263153076, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 973, "native_id": "Mercury_7218015", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2053241729736328, "incorrect_loss_raw": 1.4606465895970662, "correct_loss_per_char": 0.6026620864868164, "incorrect_loss_per_char": 0.7303232947985331, "correct_loss_per_token": 1.2053241729736328, "incorrect_loss_per_token": 1.4606465895970662, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5064207315444946, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.5064207315444946, "logits_per_char": -0.7532103657722473, "num_chars": 2}, {"sum_logits": -1.4281816482543945, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4281816482543945, "logits_per_char": -0.7140908241271973, "num_chars": 2}, {"sum_logits": -1.4473373889923096, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4473373889923096, "logits_per_char": -0.7236686944961548, "num_chars": 2}, {"sum_logits": -1.2053241729736328, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.2053241729736328, "logits_per_char": -0.6026620864868164, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 974, "native_id": "Mercury_7109603", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5077115297317505, "incorrect_loss_raw": 1.3554487228393555, "correct_loss_per_char": 0.7538557648658752, "incorrect_loss_per_char": 0.6777243614196777, "correct_loss_per_token": 1.5077115297317505, "incorrect_loss_per_token": 1.3554487228393555, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5077115297317505, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.5077115297317505, "logits_per_char": -0.7538557648658752, "num_chars": 2}, {"sum_logits": -1.2870984077453613, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": true, "logits_per_token": -1.2870984077453613, "logits_per_char": -0.6435492038726807, "num_chars": 2}, {"sum_logits": -1.4292036294937134, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.4292036294937134, "logits_per_char": -0.7146018147468567, "num_chars": 2}, {"sum_logits": -1.3500441312789917, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.3500441312789917, "logits_per_char": -0.6750220656394958, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 975, "native_id": "NYSEDREGENTS_2008_8_42", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3316112756729126, "incorrect_loss_raw": 1.4129710992177327, "correct_loss_per_char": 0.6658056378364563, "incorrect_loss_per_char": 0.7064855496088663, "correct_loss_per_token": 1.3316112756729126, "incorrect_loss_per_token": 1.4129710992177327, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4608300924301147, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4608300924301147, "logits_per_char": -0.7304150462150574, "num_chars": 2}, {"sum_logits": -1.4707798957824707, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4707798957824707, "logits_per_char": -0.7353899478912354, "num_chars": 2}, {"sum_logits": -1.3316112756729126, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3316112756729126, "logits_per_char": -0.6658056378364563, "num_chars": 2}, {"sum_logits": -1.3073033094406128, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.3073033094406128, "logits_per_char": -0.6536516547203064, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 976, "native_id": "NAEP_2000_8_S11+11", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3858059644699097, "incorrect_loss_raw": 1.4072163899739583, "correct_loss_per_char": 0.6929029822349548, "incorrect_loss_per_char": 0.7036081949869791, "correct_loss_per_token": 1.3858059644699097, "incorrect_loss_per_token": 1.4072163899739583, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5970627069473267, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.5970627069473267, "logits_per_char": -0.7985313534736633, "num_chars": 2}, {"sum_logits": -1.3858059644699097, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3858059644699097, "logits_per_char": -0.6929029822349548, "num_chars": 2}, {"sum_logits": -1.4394253492355347, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.4394253492355347, "logits_per_char": -0.7197126746177673, "num_chars": 2}, {"sum_logits": -1.1851611137390137, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.1851611137390137, "logits_per_char": -0.5925805568695068, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 977, "native_id": "Mercury_7271670", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2252159118652344, "incorrect_loss_raw": 1.4568151235580444, "correct_loss_per_char": 0.6126079559326172, "incorrect_loss_per_char": 0.7284075617790222, "correct_loss_per_token": 1.2252159118652344, "incorrect_loss_per_token": 1.4568151235580444, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.556488037109375, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.556488037109375, "logits_per_char": -0.7782440185546875, "num_chars": 2}, {"sum_logits": -1.4923399686813354, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4923399686813354, "logits_per_char": -0.7461699843406677, "num_chars": 2}, {"sum_logits": -1.3216173648834229, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3216173648834229, "logits_per_char": -0.6608086824417114, "num_chars": 2}, {"sum_logits": -1.2252159118652344, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2252159118652344, "logits_per_char": -0.6126079559326172, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 978, "native_id": "ACTAAP_2009_5_8", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3778873682022095, "incorrect_loss_raw": 1.399710973103841, "correct_loss_per_char": 0.6889436841011047, "incorrect_loss_per_char": 0.6998554865519205, "correct_loss_per_token": 1.3778873682022095, "incorrect_loss_per_token": 1.399710973103841, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4604507684707642, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4604507684707642, "logits_per_char": -0.7302253842353821, "num_chars": 2}, {"sum_logits": -1.3778873682022095, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3778873682022095, "logits_per_char": -0.6889436841011047, "num_chars": 2}, {"sum_logits": -1.3088948726654053, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3088948726654053, "logits_per_char": -0.6544474363327026, "num_chars": 2}, {"sum_logits": -1.429787278175354, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.429787278175354, "logits_per_char": -0.714893639087677, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 979, "native_id": "NYSEDREGENTS_2012_4_1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4632830619812012, "incorrect_loss_raw": 1.3716691732406616, "correct_loss_per_char": 0.7316415309906006, "incorrect_loss_per_char": 0.6858345866203308, "correct_loss_per_token": 1.4632830619812012, "incorrect_loss_per_token": 1.3716691732406616, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4844707250595093, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4844707250595093, "logits_per_char": -0.7422353625297546, "num_chars": 2}, {"sum_logits": -1.4632830619812012, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4632830619812012, "logits_per_char": -0.7316415309906006, "num_chars": 2}, {"sum_logits": -1.2986339330673218, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.2986339330673218, "logits_per_char": -0.6493169665336609, "num_chars": 2}, {"sum_logits": -1.3319028615951538, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3319028615951538, "logits_per_char": -0.6659514307975769, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 980, "native_id": "Mercury_SC_409030", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3706039190292358, "incorrect_loss_raw": 1.3989151318868, "correct_loss_per_char": 0.6853019595146179, "incorrect_loss_per_char": 0.6994575659434, "correct_loss_per_token": 1.3706039190292358, "incorrect_loss_per_token": 1.3989151318868, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.429214596748352, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.429214596748352, "logits_per_char": -0.714607298374176, "num_chars": 2}, {"sum_logits": -1.3370540142059326, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.3370540142059326, "logits_per_char": -0.6685270071029663, "num_chars": 2}, {"sum_logits": -1.4304767847061157, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4304767847061157, "logits_per_char": -0.7152383923530579, "num_chars": 2}, {"sum_logits": -1.3706039190292358, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3706039190292358, "logits_per_char": -0.6853019595146179, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 981, "native_id": "MEA_2013_8_8", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4010415077209473, "incorrect_loss_raw": 1.3888233105341594, "correct_loss_per_char": 0.7005207538604736, "incorrect_loss_per_char": 0.6944116552670797, "correct_loss_per_token": 1.4010415077209473, "incorrect_loss_per_token": 1.3888233105341594, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4189095497131348, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4189095497131348, "logits_per_char": -0.7094547748565674, "num_chars": 2}, {"sum_logits": -1.4223501682281494, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4223501682281494, "logits_per_char": -0.7111750841140747, "num_chars": 2}, {"sum_logits": -1.3252102136611938, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.3252102136611938, "logits_per_char": -0.6626051068305969, "num_chars": 2}, {"sum_logits": -1.4010415077209473, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4010415077209473, "logits_per_char": -0.7005207538604736, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 982, "native_id": "Mercury_7140333", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.265442967414856, "incorrect_loss_raw": 1.4383985996246338, "correct_loss_per_char": 0.632721483707428, "incorrect_loss_per_char": 0.7191992998123169, "correct_loss_per_token": 1.265442967414856, "incorrect_loss_per_token": 1.4383985996246338, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.397002100944519, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.397002100944519, "logits_per_char": -0.6985010504722595, "num_chars": 2}, {"sum_logits": -1.3791764974594116, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.3791764974594116, "logits_per_char": -0.6895882487297058, "num_chars": 2}, {"sum_logits": -1.5390172004699707, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.5390172004699707, "logits_per_char": -0.7695086002349854, "num_chars": 2}, {"sum_logits": -1.265442967414856, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": true, "logits_per_token": -1.265442967414856, "logits_per_char": -0.632721483707428, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 983, "native_id": "Mercury_SC_LBS10664", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.356079339981079, "incorrect_loss_raw": 1.4077468713124592, "correct_loss_per_char": 0.6780396699905396, "incorrect_loss_per_char": 0.7038734356562296, "correct_loss_per_token": 1.356079339981079, "incorrect_loss_per_token": 1.4077468713124592, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5108734369277954, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.5108734369277954, "logits_per_char": -0.7554367184638977, "num_chars": 2}, {"sum_logits": -1.420997977256775, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.420997977256775, "logits_per_char": -0.7104989886283875, "num_chars": 2}, {"sum_logits": -1.356079339981079, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.356079339981079, "logits_per_char": -0.6780396699905396, "num_chars": 2}, {"sum_logits": -1.2913691997528076, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.2913691997528076, "logits_per_char": -0.6456845998764038, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 984, "native_id": "Mercury_7171430", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.33297860622406, "incorrect_loss_raw": 1.4276138544082642, "correct_loss_per_char": 0.66648930311203, "incorrect_loss_per_char": 0.7138069272041321, "correct_loss_per_token": 1.33297860622406, "incorrect_loss_per_token": 1.4276138544082642, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6800488233566284, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.6800488233566284, "logits_per_char": -0.8400244116783142, "num_chars": 2}, {"sum_logits": -1.33297860622406, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.33297860622406, "logits_per_char": -0.66648930311203, "num_chars": 2}, {"sum_logits": -1.3612377643585205, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3612377643585205, "logits_per_char": -0.6806188821792603, "num_chars": 2}, {"sum_logits": -1.2415549755096436, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.2415549755096436, "logits_per_char": -0.6207774877548218, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 985, "native_id": "Mercury_SC_407572", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3963282108306885, "incorrect_loss_raw": 1.3872098525365193, "correct_loss_per_char": 0.6981641054153442, "incorrect_loss_per_char": 0.6936049262682596, "correct_loss_per_token": 1.3963282108306885, "incorrect_loss_per_token": 1.3872098525365193, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.36065673828125, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.36065673828125, "logits_per_char": -0.680328369140625, "num_chars": 2}, {"sum_logits": -1.4160001277923584, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4160001277923584, "logits_per_char": -0.7080000638961792, "num_chars": 2}, {"sum_logits": -1.3849726915359497, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3849726915359497, "logits_per_char": -0.6924863457679749, "num_chars": 2}, {"sum_logits": -1.3963282108306885, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3963282108306885, "logits_per_char": -0.6981641054153442, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 986, "native_id": "VASoL_2009_3_2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.382736325263977, "incorrect_loss_raw": 1.3968257109324138, "correct_loss_per_char": 0.6913681626319885, "incorrect_loss_per_char": 0.6984128554662069, "correct_loss_per_token": 1.382736325263977, "incorrect_loss_per_token": 1.3968257109324138, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.435560941696167, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.435560941696167, "logits_per_char": -0.7177804708480835, "num_chars": 2}, {"sum_logits": -1.3902133703231812, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3902133703231812, "logits_per_char": -0.6951066851615906, "num_chars": 2}, {"sum_logits": -1.364702820777893, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.364702820777893, "logits_per_char": -0.6823514103889465, "num_chars": 2}, {"sum_logits": -1.382736325263977, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.382736325263977, "logits_per_char": -0.6913681626319885, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 987, "native_id": "Mercury_SC_407383", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4607985019683838, "incorrect_loss_raw": 1.3704376618067424, "correct_loss_per_char": 0.7303992509841919, "incorrect_loss_per_char": 0.6852188309033712, "correct_loss_per_token": 1.4607985019683838, "incorrect_loss_per_token": 1.3704376618067424, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3588405847549438, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.3588405847549438, "logits_per_char": -0.6794202923774719, "num_chars": 2}, {"sum_logits": -1.3117740154266357, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.3117740154266357, "logits_per_char": -0.6558870077133179, "num_chars": 2}, {"sum_logits": -1.4406983852386475, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4406983852386475, "logits_per_char": -0.7203491926193237, "num_chars": 2}, {"sum_logits": -1.4607985019683838, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4607985019683838, "logits_per_char": -0.7303992509841919, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 988, "native_id": "Mercury_7218400", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4733449220657349, "incorrect_loss_raw": 1.3710837364196777, "correct_loss_per_char": 0.7366724610328674, "incorrect_loss_per_char": 0.6855418682098389, "correct_loss_per_token": 1.4733449220657349, "incorrect_loss_per_token": 1.3710837364196777, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4767378568649292, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.4767378568649292, "logits_per_char": -0.7383689284324646, "num_chars": 2}, {"sum_logits": -1.4733449220657349, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.4733449220657349, "logits_per_char": -0.7366724610328674, "num_chars": 2}, {"sum_logits": -1.4341189861297607, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.4341189861297607, "logits_per_char": -0.7170594930648804, "num_chars": 2}, {"sum_logits": -1.2023943662643433, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": true, "logits_per_token": -1.2023943662643433, "logits_per_char": -0.6011971831321716, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 989, "native_id": "Mercury_184818", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2076948881149292, "incorrect_loss_raw": 1.4609791437784831, "correct_loss_per_char": 0.6038474440574646, "incorrect_loss_per_char": 0.7304895718892416, "correct_loss_per_token": 1.2076948881149292, "incorrect_loss_per_token": 1.4609791437784831, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5444499254226685, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5444499254226685, "logits_per_char": -0.7722249627113342, "num_chars": 2}, {"sum_logits": -1.3821862936019897, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3821862936019897, "logits_per_char": -0.6910931468009949, "num_chars": 2}, {"sum_logits": -1.456301212310791, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.456301212310791, "logits_per_char": -0.7281506061553955, "num_chars": 2}, {"sum_logits": -1.2076948881149292, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2076948881149292, "logits_per_char": -0.6038474440574646, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 990, "native_id": "Mercury_SC_405931", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2972773313522339, "incorrect_loss_raw": 1.4241523742675781, "correct_loss_per_char": 0.6486386656761169, "incorrect_loss_per_char": 0.7120761871337891, "correct_loss_per_token": 1.2972773313522339, "incorrect_loss_per_token": 1.4241523742675781, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4870373010635376, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4870373010635376, "logits_per_char": -0.7435186505317688, "num_chars": 2}, {"sum_logits": -1.3650310039520264, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.3650310039520264, "logits_per_char": -0.6825155019760132, "num_chars": 2}, {"sum_logits": -1.4203888177871704, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4203888177871704, "logits_per_char": -0.7101944088935852, "num_chars": 2}, {"sum_logits": -1.2972773313522339, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.2972773313522339, "logits_per_char": -0.6486386656761169, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 991, "native_id": "Mercury_SC_416177", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5433119535446167, "incorrect_loss_raw": 1.3455629348754883, "correct_loss_per_char": 0.7716559767723083, "incorrect_loss_per_char": 0.6727814674377441, "correct_loss_per_token": 1.5433119535446167, "incorrect_loss_per_token": 1.3455629348754883, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2945997714996338, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.2945997714996338, "logits_per_char": -0.6472998857498169, "num_chars": 2}, {"sum_logits": -1.3167401552200317, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3167401552200317, "logits_per_char": -0.6583700776100159, "num_chars": 2}, {"sum_logits": -1.5433119535446167, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.5433119535446167, "logits_per_char": -0.7716559767723083, "num_chars": 2}, {"sum_logits": -1.4253488779067993, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4253488779067993, "logits_per_char": -0.7126744389533997, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 992, "native_id": "Mercury_SC_406625", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3975908756256104, "incorrect_loss_raw": 1.389225999514262, "correct_loss_per_char": 0.6987954378128052, "incorrect_loss_per_char": 0.694612999757131, "correct_loss_per_token": 1.3975908756256104, "incorrect_loss_per_token": 1.389225999514262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4135679006576538, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4135679006576538, "logits_per_char": -0.7067839503288269, "num_chars": 2}, {"sum_logits": -1.3975908756256104, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3975908756256104, "logits_per_char": -0.6987954378128052, "num_chars": 2}, {"sum_logits": -1.4656636714935303, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4656636714935303, "logits_per_char": -0.7328318357467651, "num_chars": 2}, {"sum_logits": -1.2884464263916016, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.2884464263916016, "logits_per_char": -0.6442232131958008, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 993, "native_id": "MCAS_2014_8_16", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2030030488967896, "incorrect_loss_raw": 1.461273153622945, "correct_loss_per_char": 0.6015015244483948, "incorrect_loss_per_char": 0.7306365768114725, "correct_loss_per_token": 1.2030030488967896, "incorrect_loss_per_token": 1.461273153622945, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4686236381530762, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.4686236381530762, "logits_per_char": -0.7343118190765381, "num_chars": 2}, {"sum_logits": -1.4946426153182983, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.4946426153182983, "logits_per_char": -0.7473213076591492, "num_chars": 2}, {"sum_logits": -1.420553207397461, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": false, "logits_per_token": -1.420553207397461, "logits_per_char": -0.7102766036987305, "num_chars": 2}, {"sum_logits": -1.2030030488967896, "num_tokens": 1, "num_tokens_all": 425, "is_greedy": true, "logits_per_token": -1.2030030488967896, "logits_per_char": -0.6015015244483948, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 994, "native_id": "Mercury_7138460", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4131461381912231, "incorrect_loss_raw": 1.3829400936762493, "correct_loss_per_char": 0.7065730690956116, "incorrect_loss_per_char": 0.6914700468381246, "correct_loss_per_token": 1.4131461381912231, "incorrect_loss_per_token": 1.3829400936762493, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3847213983535767, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3847213983535767, "logits_per_char": -0.6923606991767883, "num_chars": 2}, {"sum_logits": -1.3791877031326294, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.3791877031326294, "logits_per_char": -0.6895938515663147, "num_chars": 2}, {"sum_logits": -1.4131461381912231, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4131461381912231, "logits_per_char": -0.7065730690956116, "num_chars": 2}, {"sum_logits": -1.3849111795425415, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3849111795425415, "logits_per_char": -0.6924555897712708, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 995, "native_id": "Mercury_7129640", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.403329610824585, "incorrect_loss_raw": 1.3893420298894246, "correct_loss_per_char": 0.7016648054122925, "incorrect_loss_per_char": 0.6946710149447123, "correct_loss_per_token": 1.403329610824585, "incorrect_loss_per_token": 1.3893420298894246, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4392682313919067, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4392682313919067, "logits_per_char": -0.7196341156959534, "num_chars": 2}, {"sum_logits": -1.403329610824585, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.403329610824585, "logits_per_char": -0.7016648054122925, "num_chars": 2}, {"sum_logits": -1.467871904373169, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.467871904373169, "logits_per_char": -0.7339359521865845, "num_chars": 2}, {"sum_logits": -1.2608859539031982, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.2608859539031982, "logits_per_char": -0.6304429769515991, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 996, "native_id": "Mercury_7024290", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4954057931900024, "incorrect_loss_raw": 1.3720231850941975, "correct_loss_per_char": 0.7477028965950012, "incorrect_loss_per_char": 0.6860115925470988, "correct_loss_per_token": 1.4954057931900024, "incorrect_loss_per_token": 1.3720231850941975, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5118680000305176, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.5118680000305176, "logits_per_char": -0.7559340000152588, "num_chars": 2}, {"sum_logits": -1.4954057931900024, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4954057931900024, "logits_per_char": -0.7477028965950012, "num_chars": 2}, {"sum_logits": -1.1642649173736572, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.1642649173736572, "logits_per_char": -0.5821324586868286, "num_chars": 2}, {"sum_logits": -1.439936637878418, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.439936637878418, "logits_per_char": -0.719968318939209, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 997, "native_id": "NYSEDREGENTS_2008_4_28", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4600876569747925, "incorrect_loss_raw": 1.3696892261505127, "correct_loss_per_char": 0.7300438284873962, "incorrect_loss_per_char": 0.6848446130752563, "correct_loss_per_token": 1.4600876569747925, "incorrect_loss_per_token": 1.3696892261505127, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3144487142562866, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.3144487142562866, "logits_per_char": -0.6572243571281433, "num_chars": 2}, {"sum_logits": -1.3814557790756226, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3814557790756226, "logits_per_char": -0.6907278895378113, "num_chars": 2}, {"sum_logits": -1.4600876569747925, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4600876569747925, "logits_per_char": -0.7300438284873962, "num_chars": 2}, {"sum_logits": -1.413163185119629, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.413163185119629, "logits_per_char": -0.7065815925598145, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 998, "native_id": "Mercury_SC_414339", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.438471794128418, "incorrect_loss_raw": 1.375364859898885, "correct_loss_per_char": 0.719235897064209, "incorrect_loss_per_char": 0.6876824299494425, "correct_loss_per_token": 1.438471794128418, "incorrect_loss_per_token": 1.375364859898885, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4317305088043213, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4317305088043213, "logits_per_char": -0.7158652544021606, "num_chars": 2}, {"sum_logits": -1.382553219795227, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.382553219795227, "logits_per_char": -0.6912766098976135, "num_chars": 2}, {"sum_logits": -1.438471794128418, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.438471794128418, "logits_per_char": -0.719235897064209, "num_chars": 2}, {"sum_logits": -1.311810851097107, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.311810851097107, "logits_per_char": -0.6559054255485535, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 999, "native_id": "LEAP_2000_8_2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4192560911178589, "incorrect_loss_raw": 1.3855773210525513, "correct_loss_per_char": 0.7096280455589294, "incorrect_loss_per_char": 0.6927886605262756, "correct_loss_per_token": 1.4192560911178589, "incorrect_loss_per_token": 1.3855773210525513, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4192560911178589, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.4192560911178589, "logits_per_char": -0.7096280455589294, "num_chars": 2}, {"sum_logits": -1.3718167543411255, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.3718167543411255, "logits_per_char": -0.6859083771705627, "num_chars": 2}, {"sum_logits": -1.4226707220077515, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.4226707220077515, "logits_per_char": -0.7113353610038757, "num_chars": 2}, {"sum_logits": -1.3622444868087769, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": true, "logits_per_token": -1.3622444868087769, "logits_per_char": -0.6811222434043884, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1000, "native_id": "Mercury_7172270", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5364563465118408, "incorrect_loss_raw": 1.351924180984497, "correct_loss_per_char": 0.7682281732559204, "incorrect_loss_per_char": 0.6759620904922485, "correct_loss_per_token": 1.5364563465118408, "incorrect_loss_per_token": 1.351924180984497, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5364563465118408, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.5364563465118408, "logits_per_char": -0.7682281732559204, "num_chars": 2}, {"sum_logits": -1.4350364208221436, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.4350364208221436, "logits_per_char": -0.7175182104110718, "num_chars": 2}, {"sum_logits": -1.421684741973877, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.421684741973877, "logits_per_char": -0.7108423709869385, "num_chars": 2}, {"sum_logits": -1.1990513801574707, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -1.1990513801574707, "logits_per_char": -0.5995256900787354, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1001, "native_id": "Mercury_184205", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4299510717391968, "incorrect_loss_raw": 1.3779046535491943, "correct_loss_per_char": 0.7149755358695984, "incorrect_loss_per_char": 0.6889523267745972, "correct_loss_per_token": 1.4299510717391968, "incorrect_loss_per_token": 1.3779046535491943, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3218014240264893, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.3218014240264893, "logits_per_char": -0.6609007120132446, "num_chars": 2}, {"sum_logits": -1.394513726234436, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.394513726234436, "logits_per_char": -0.697256863117218, "num_chars": 2}, {"sum_logits": -1.4299510717391968, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4299510717391968, "logits_per_char": -0.7149755358695984, "num_chars": 2}, {"sum_logits": -1.4173988103866577, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4173988103866577, "logits_per_char": -0.7086994051933289, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1002, "native_id": "Mercury_SC_400683", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.334679365158081, "incorrect_loss_raw": 1.410801370938619, "correct_loss_per_char": 0.6673396825790405, "incorrect_loss_per_char": 0.7054006854693095, "correct_loss_per_token": 1.334679365158081, "incorrect_loss_per_token": 1.410801370938619, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.334679365158081, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.334679365158081, "logits_per_char": -0.6673396825790405, "num_chars": 2}, {"sum_logits": -1.466057300567627, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.466057300567627, "logits_per_char": -0.7330286502838135, "num_chars": 2}, {"sum_logits": -1.3707244396209717, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3707244396209717, "logits_per_char": -0.6853622198104858, "num_chars": 2}, {"sum_logits": -1.3956223726272583, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3956223726272583, "logits_per_char": -0.6978111863136292, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1003, "native_id": "Mercury_7182210", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3150893449783325, "incorrect_loss_raw": 1.4177607695261638, "correct_loss_per_char": 0.6575446724891663, "incorrect_loss_per_char": 0.7088803847630819, "correct_loss_per_token": 1.3150893449783325, "incorrect_loss_per_token": 1.4177607695261638, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4389805793762207, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.4389805793762207, "logits_per_char": -0.7194902896881104, "num_chars": 2}, {"sum_logits": -1.3150893449783325, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": true, "logits_per_token": -1.3150893449783325, "logits_per_char": -0.6575446724891663, "num_chars": 2}, {"sum_logits": -1.334085464477539, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.334085464477539, "logits_per_char": -0.6670427322387695, "num_chars": 2}, {"sum_logits": -1.4802162647247314, "num_tokens": 1, "num_tokens_all": 440, "is_greedy": false, "logits_per_token": -1.4802162647247314, "logits_per_char": -0.7401081323623657, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1004, "native_id": "Mercury_7238945", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4474622011184692, "incorrect_loss_raw": 1.3831625779469807, "correct_loss_per_char": 0.7237311005592346, "incorrect_loss_per_char": 0.6915812889734904, "correct_loss_per_token": 1.4474622011184692, "incorrect_loss_per_token": 1.3831625779469807, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4390859603881836, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.4390859603881836, "logits_per_char": -0.7195429801940918, "num_chars": 2}, {"sum_logits": -1.4657248258590698, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.4657248258590698, "logits_per_char": -0.7328624129295349, "num_chars": 2}, {"sum_logits": -1.4474622011184692, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": false, "logits_per_token": -1.4474622011184692, "logits_per_char": -0.7237311005592346, "num_chars": 2}, {"sum_logits": -1.244676947593689, "num_tokens": 1, "num_tokens_all": 402, "is_greedy": true, "logits_per_token": -1.244676947593689, "logits_per_char": -0.6223384737968445, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1005, "native_id": "Mercury_SC_408748", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3739537000656128, "incorrect_loss_raw": 1.3954482873280842, "correct_loss_per_char": 0.6869768500328064, "incorrect_loss_per_char": 0.6977241436640421, "correct_loss_per_token": 1.3739537000656128, "incorrect_loss_per_token": 1.3954482873280842, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.379412293434143, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.379412293434143, "logits_per_char": -0.6897061467170715, "num_chars": 2}, {"sum_logits": -1.3761957883834839, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.3761957883834839, "logits_per_char": -0.6880978941917419, "num_chars": 2}, {"sum_logits": -1.430736780166626, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.430736780166626, "logits_per_char": -0.715368390083313, "num_chars": 2}, {"sum_logits": -1.3739537000656128, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.3739537000656128, "logits_per_char": -0.6869768500328064, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1006, "native_id": "MEA_2016_5_4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3335111141204834, "incorrect_loss_raw": 1.4105615615844727, "correct_loss_per_char": 0.6667555570602417, "incorrect_loss_per_char": 0.7052807807922363, "correct_loss_per_token": 1.3335111141204834, "incorrect_loss_per_token": 1.4105615615844727, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4212756156921387, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": false, "logits_per_token": -1.4212756156921387, "logits_per_char": -0.7106378078460693, "num_chars": 2}, {"sum_logits": -1.3478150367736816, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": false, "logits_per_token": -1.3478150367736816, "logits_per_char": -0.6739075183868408, "num_chars": 2}, {"sum_logits": -1.4625940322875977, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": false, "logits_per_token": -1.4625940322875977, "logits_per_char": -0.7312970161437988, "num_chars": 2}, {"sum_logits": -1.3335111141204834, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": true, "logits_per_token": -1.3335111141204834, "logits_per_char": -0.6667555570602417, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1007, "native_id": "Mercury_7271513", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.255845308303833, "incorrect_loss_raw": 1.4413119951883953, "correct_loss_per_char": 0.6279226541519165, "incorrect_loss_per_char": 0.7206559975941976, "correct_loss_per_token": 1.255845308303833, "incorrect_loss_per_token": 1.4413119951883953, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4352291822433472, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4352291822433472, "logits_per_char": -0.7176145911216736, "num_chars": 2}, {"sum_logits": -1.4835844039916992, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4835844039916992, "logits_per_char": -0.7417922019958496, "num_chars": 2}, {"sum_logits": -1.4051223993301392, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4051223993301392, "logits_per_char": -0.7025611996650696, "num_chars": 2}, {"sum_logits": -1.255845308303833, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.255845308303833, "logits_per_char": -0.6279226541519165, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1008, "native_id": "Mercury_7189000", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.425062894821167, "incorrect_loss_raw": 1.3821146090825398, "correct_loss_per_char": 0.7125314474105835, "incorrect_loss_per_char": 0.6910573045412699, "correct_loss_per_token": 1.425062894821167, "incorrect_loss_per_token": 1.3821146090825398, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.425062894821167, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.425062894821167, "logits_per_char": -0.7125314474105835, "num_chars": 2}, {"sum_logits": -1.3907384872436523, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3907384872436523, "logits_per_char": -0.6953692436218262, "num_chars": 2}, {"sum_logits": -1.4790432453155518, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4790432453155518, "logits_per_char": -0.7395216226577759, "num_chars": 2}, {"sum_logits": -1.2765620946884155, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.2765620946884155, "logits_per_char": -0.6382810473442078, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1009, "native_id": "Mercury_SC_401585", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3414580821990967, "incorrect_loss_raw": 1.410890817642212, "correct_loss_per_char": 0.6707290410995483, "incorrect_loss_per_char": 0.705445408821106, "correct_loss_per_token": 1.3414580821990967, "incorrect_loss_per_token": 1.410890817642212, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3414580821990967, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.3414580821990967, "logits_per_char": -0.6707290410995483, "num_chars": 2}, {"sum_logits": -1.4335451126098633, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4335451126098633, "logits_per_char": -0.7167725563049316, "num_chars": 2}, {"sum_logits": -1.3594075441360474, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3594075441360474, "logits_per_char": -0.6797037720680237, "num_chars": 2}, {"sum_logits": -1.439719796180725, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.439719796180725, "logits_per_char": -0.7198598980903625, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1010, "native_id": "Mercury_188528", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2782868146896362, "incorrect_loss_raw": 1.4347439606984456, "correct_loss_per_char": 0.6391434073448181, "incorrect_loss_per_char": 0.7173719803492228, "correct_loss_per_token": 1.2782868146896362, "incorrect_loss_per_token": 1.4347439606984456, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.543868899345398, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.543868899345398, "logits_per_char": -0.771934449672699, "num_chars": 2}, {"sum_logits": -1.3884965181350708, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3884965181350708, "logits_per_char": -0.6942482590675354, "num_chars": 2}, {"sum_logits": -1.3718664646148682, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3718664646148682, "logits_per_char": -0.6859332323074341, "num_chars": 2}, {"sum_logits": -1.2782868146896362, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2782868146896362, "logits_per_char": -0.6391434073448181, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1011, "native_id": "Mercury_SC_415719", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3556855916976929, "incorrect_loss_raw": 1.4021722078323364, "correct_loss_per_char": 0.6778427958488464, "incorrect_loss_per_char": 0.7010861039161682, "correct_loss_per_token": 1.3556855916976929, "incorrect_loss_per_token": 1.4021722078323364, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3556855916976929, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.3556855916976929, "logits_per_char": -0.6778427958488464, "num_chars": 2}, {"sum_logits": -1.4547921419143677, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4547921419143677, "logits_per_char": -0.7273960709571838, "num_chars": 2}, {"sum_logits": -1.356904149055481, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.356904149055481, "logits_per_char": -0.6784520745277405, "num_chars": 2}, {"sum_logits": -1.3948203325271606, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3948203325271606, "logits_per_char": -0.6974101662635803, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1012, "native_id": "Mercury_SC_407072", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4394758939743042, "incorrect_loss_raw": 1.3740355571111043, "correct_loss_per_char": 0.7197379469871521, "incorrect_loss_per_char": 0.6870177785555521, "correct_loss_per_token": 1.4394758939743042, "incorrect_loss_per_token": 1.3740355571111043, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4394758939743042, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4394758939743042, "logits_per_char": -0.7197379469871521, "num_chars": 2}, {"sum_logits": -1.3571542501449585, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3571542501449585, "logits_per_char": -0.6785771250724792, "num_chars": 2}, {"sum_logits": -1.3516498804092407, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.3516498804092407, "logits_per_char": -0.6758249402046204, "num_chars": 2}, {"sum_logits": -1.4133025407791138, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4133025407791138, "logits_per_char": -0.7066512703895569, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1013, "native_id": "Mercury_7091823", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2647361755371094, "incorrect_loss_raw": 1.44135586420695, "correct_loss_per_char": 0.6323680877685547, "incorrect_loss_per_char": 0.720677932103475, "correct_loss_per_token": 1.2647361755371094, "incorrect_loss_per_token": 1.44135586420695, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2647361755371094, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.2647361755371094, "logits_per_char": -0.6323680877685547, "num_chars": 2}, {"sum_logits": -1.3362617492675781, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3362617492675781, "logits_per_char": -0.6681308746337891, "num_chars": 2}, {"sum_logits": -1.398925542831421, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.398925542831421, "logits_per_char": -0.6994627714157104, "num_chars": 2}, {"sum_logits": -1.5888803005218506, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.5888803005218506, "logits_per_char": -0.7944401502609253, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1014, "native_id": "Mercury_7040985", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5546457767486572, "incorrect_loss_raw": 1.347168246905009, "correct_loss_per_char": 0.7773228883743286, "incorrect_loss_per_char": 0.6735841234525045, "correct_loss_per_token": 1.5546457767486572, "incorrect_loss_per_token": 1.347168246905009, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4602837562561035, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4602837562561035, "logits_per_char": -0.7301418781280518, "num_chars": 2}, {"sum_logits": -1.5546457767486572, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5546457767486572, "logits_per_char": -0.7773228883743286, "num_chars": 2}, {"sum_logits": -1.3616224527359009, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3616224527359009, "logits_per_char": -0.6808112263679504, "num_chars": 2}, {"sum_logits": -1.2195985317230225, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.2195985317230225, "logits_per_char": -0.6097992658615112, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1015, "native_id": "Mercury_SC_409383", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3799999952316284, "incorrect_loss_raw": 1.3966854413350422, "correct_loss_per_char": 0.6899999976158142, "incorrect_loss_per_char": 0.6983427206675211, "correct_loss_per_token": 1.3799999952316284, "incorrect_loss_per_token": 1.3966854413350422, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4155428409576416, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4155428409576416, "logits_per_char": -0.7077714204788208, "num_chars": 2}, {"sum_logits": -1.459965705871582, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.459965705871582, "logits_per_char": -0.729982852935791, "num_chars": 2}, {"sum_logits": -1.3145477771759033, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.3145477771759033, "logits_per_char": -0.6572738885879517, "num_chars": 2}, {"sum_logits": -1.3799999952316284, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3799999952316284, "logits_per_char": -0.6899999976158142, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1016, "native_id": "Mercury_SC_407080", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5420228242874146, "incorrect_loss_raw": 1.3452107508977253, "correct_loss_per_char": 0.7710114121437073, "incorrect_loss_per_char": 0.6726053754488627, "correct_loss_per_token": 1.5420228242874146, "incorrect_loss_per_token": 1.3452107508977253, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3788955211639404, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3788955211639404, "logits_per_char": -0.6894477605819702, "num_chars": 2}, {"sum_logits": -1.298729658126831, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.298729658126831, "logits_per_char": -0.6493648290634155, "num_chars": 2}, {"sum_logits": -1.3580070734024048, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3580070734024048, "logits_per_char": -0.6790035367012024, "num_chars": 2}, {"sum_logits": -1.5420228242874146, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5420228242874146, "logits_per_char": -0.7710114121437073, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1017, "native_id": "MCAS_2000_4_34", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5345538854599, "incorrect_loss_raw": 1.3549591700236003, "correct_loss_per_char": 0.76727694272995, "incorrect_loss_per_char": 0.6774795850118002, "correct_loss_per_token": 1.5345538854599, "incorrect_loss_per_token": 1.3549591700236003, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5345538854599, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.5345538854599, "logits_per_char": -0.76727694272995, "num_chars": 2}, {"sum_logits": -1.5055574178695679, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.5055574178695679, "logits_per_char": -0.7527787089347839, "num_chars": 2}, {"sum_logits": -1.3698995113372803, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3698995113372803, "logits_per_char": -0.6849497556686401, "num_chars": 2}, {"sum_logits": -1.1894205808639526, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.1894205808639526, "logits_per_char": -0.5947102904319763, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1018, "native_id": "Mercury_7032498", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.540174126625061, "incorrect_loss_raw": 1.3542493184407551, "correct_loss_per_char": 0.7700870633125305, "incorrect_loss_per_char": 0.6771246592203776, "correct_loss_per_token": 1.540174126625061, "incorrect_loss_per_token": 1.3542493184407551, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.203062653541565, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.203062653541565, "logits_per_char": -0.6015313267707825, "num_chars": 2}, {"sum_logits": -1.3197627067565918, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3197627067565918, "logits_per_char": -0.6598813533782959, "num_chars": 2}, {"sum_logits": -1.5399225950241089, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.5399225950241089, "logits_per_char": -0.7699612975120544, "num_chars": 2}, {"sum_logits": -1.540174126625061, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.540174126625061, "logits_per_char": -0.7700870633125305, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1019, "native_id": "TAKS_2009_5_30", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3977794647216797, "incorrect_loss_raw": 1.389596978823344, "correct_loss_per_char": 0.6988897323608398, "incorrect_loss_per_char": 0.694798489411672, "correct_loss_per_token": 1.3977794647216797, "incorrect_loss_per_token": 1.389596978823344, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3977794647216797, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3977794647216797, "logits_per_char": -0.6988897323608398, "num_chars": 2}, {"sum_logits": -1.4415156841278076, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4415156841278076, "logits_per_char": -0.7207578420639038, "num_chars": 2}, {"sum_logits": -1.3695820569992065, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3695820569992065, "logits_per_char": -0.6847910284996033, "num_chars": 2}, {"sum_logits": -1.3576931953430176, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.3576931953430176, "logits_per_char": -0.6788465976715088, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1020, "native_id": "Mercury_SC_415761", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.426550030708313, "incorrect_loss_raw": 1.3818613290786743, "correct_loss_per_char": 0.7132750153541565, "incorrect_loss_per_char": 0.6909306645393372, "correct_loss_per_token": 1.426550030708313, "incorrect_loss_per_token": 1.3818613290786743, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.398947834968567, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.398947834968567, "logits_per_char": -0.6994739174842834, "num_chars": 2}, {"sum_logits": -1.4817513227462769, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4817513227462769, "logits_per_char": -0.7408756613731384, "num_chars": 2}, {"sum_logits": -1.426550030708313, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.426550030708313, "logits_per_char": -0.7132750153541565, "num_chars": 2}, {"sum_logits": -1.2648848295211792, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.2648848295211792, "logits_per_char": -0.6324424147605896, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1021, "native_id": "ACTAAP_2008_5_10", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2873859405517578, "incorrect_loss_raw": 1.4274693330128987, "correct_loss_per_char": 0.6436929702758789, "incorrect_loss_per_char": 0.7137346665064493, "correct_loss_per_token": 1.2873859405517578, "incorrect_loss_per_token": 1.4274693330128987, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.383961796760559, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.383961796760559, "logits_per_char": -0.6919808983802795, "num_chars": 2}, {"sum_logits": -1.409647822380066, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.409647822380066, "logits_per_char": -0.704823911190033, "num_chars": 2}, {"sum_logits": -1.4887983798980713, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4887983798980713, "logits_per_char": -0.7443991899490356, "num_chars": 2}, {"sum_logits": -1.2873859405517578, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.2873859405517578, "logits_per_char": -0.6436929702758789, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1022, "native_id": "Mercury_416671", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.394816279411316, "incorrect_loss_raw": 1.3907357851664226, "correct_loss_per_char": 0.697408139705658, "incorrect_loss_per_char": 0.6953678925832113, "correct_loss_per_token": 1.394816279411316, "incorrect_loss_per_token": 1.3907357851664226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4296201467514038, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4296201467514038, "logits_per_char": -0.7148100733757019, "num_chars": 2}, {"sum_logits": -1.3247078657150269, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.3247078657150269, "logits_per_char": -0.6623539328575134, "num_chars": 2}, {"sum_logits": -1.417879343032837, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.417879343032837, "logits_per_char": -0.7089396715164185, "num_chars": 2}, {"sum_logits": -1.394816279411316, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.394816279411316, "logits_per_char": -0.697408139705658, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1023, "native_id": "Mercury_400803", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.083571434020996, "incorrect_loss_raw": 1.5355993906656902, "correct_loss_per_char": 0.541785717010498, "incorrect_loss_per_char": 0.7677996953328451, "correct_loss_per_token": 1.083571434020996, "incorrect_loss_per_token": 1.5355993906656902, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6686229705810547, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.6686229705810547, "logits_per_char": -0.8343114852905273, "num_chars": 2}, {"sum_logits": -1.315861701965332, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.315861701965332, "logits_per_char": -0.657930850982666, "num_chars": 2}, {"sum_logits": -1.6223134994506836, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.6223134994506836, "logits_per_char": -0.8111567497253418, "num_chars": 2}, {"sum_logits": -1.083571434020996, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.083571434020996, "logits_per_char": -0.541785717010498, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1024, "native_id": "Mercury_7005880", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.434171438217163, "incorrect_loss_raw": 1.38071604569753, "correct_loss_per_char": 0.7170857191085815, "incorrect_loss_per_char": 0.690358022848765, "correct_loss_per_token": 1.434171438217163, "incorrect_loss_per_token": 1.38071604569753, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4965898990631104, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4965898990631104, "logits_per_char": -0.7482949495315552, "num_chars": 2}, {"sum_logits": -1.3888705968856812, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3888705968856812, "logits_per_char": -0.6944352984428406, "num_chars": 2}, {"sum_logits": -1.434171438217163, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.434171438217163, "logits_per_char": -0.7170857191085815, "num_chars": 2}, {"sum_logits": -1.2566876411437988, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.2566876411437988, "logits_per_char": -0.6283438205718994, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1025, "native_id": "Mercury_7210508", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4008020162582397, "incorrect_loss_raw": 1.397223671277364, "correct_loss_per_char": 0.7004010081291199, "incorrect_loss_per_char": 0.698611835638682, "correct_loss_per_token": 1.4008020162582397, "incorrect_loss_per_token": 1.397223671277364, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5039035081863403, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.5039035081863403, "logits_per_char": -0.7519517540931702, "num_chars": 2}, {"sum_logits": -1.4798303842544556, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.4798303842544556, "logits_per_char": -0.7399151921272278, "num_chars": 2}, {"sum_logits": -1.4008020162582397, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.4008020162582397, "logits_per_char": -0.7004010081291199, "num_chars": 2}, {"sum_logits": -1.2079371213912964, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.2079371213912964, "logits_per_char": -0.6039685606956482, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1026, "native_id": "NYSEDREGENTS_2013_4_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4248268604278564, "incorrect_loss_raw": 1.3894507884979248, "correct_loss_per_char": 0.7124134302139282, "incorrect_loss_per_char": 0.6947253942489624, "correct_loss_per_token": 1.4248268604278564, "incorrect_loss_per_token": 1.3894507884979248, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2426934242248535, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.2426934242248535, "logits_per_char": -0.6213467121124268, "num_chars": 2}, {"sum_logits": -1.3845247030258179, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3845247030258179, "logits_per_char": -0.6922623515129089, "num_chars": 2}, {"sum_logits": -1.4248268604278564, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4248268604278564, "logits_per_char": -0.7124134302139282, "num_chars": 2}, {"sum_logits": -1.541134238243103, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.541134238243103, "logits_per_char": -0.7705671191215515, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1027, "native_id": "NYSEDREGENTS_2008_4_12", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.477703332901001, "incorrect_loss_raw": 1.3629614114761353, "correct_loss_per_char": 0.7388516664505005, "incorrect_loss_per_char": 0.6814807057380676, "correct_loss_per_token": 1.477703332901001, "incorrect_loss_per_token": 1.3629614114761353, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4049986600875854, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4049986600875854, "logits_per_char": -0.7024993300437927, "num_chars": 2}, {"sum_logits": -1.3696361780166626, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3696361780166626, "logits_per_char": -0.6848180890083313, "num_chars": 2}, {"sum_logits": -1.477703332901001, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.477703332901001, "logits_per_char": -0.7388516664505005, "num_chars": 2}, {"sum_logits": -1.3142493963241577, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.3142493963241577, "logits_per_char": -0.6571246981620789, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1028, "native_id": "Mercury_400091", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5667610168457031, "incorrect_loss_raw": 1.3521378835042317, "correct_loss_per_char": 0.7833805084228516, "incorrect_loss_per_char": 0.6760689417521158, "correct_loss_per_token": 1.5667610168457031, "incorrect_loss_per_token": 1.3521378835042317, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5932356119155884, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.5932356119155884, "logits_per_char": -0.7966178059577942, "num_chars": 2}, {"sum_logits": -1.5667610168457031, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.5667610168457031, "logits_per_char": -0.7833805084228516, "num_chars": 2}, {"sum_logits": -1.2067196369171143, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.2067196369171143, "logits_per_char": -0.6033598184585571, "num_chars": 2}, {"sum_logits": -1.2564584016799927, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.2564584016799927, "logits_per_char": -0.6282292008399963, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1029, "native_id": "Mercury_SC_402257", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3815534114837646, "incorrect_loss_raw": 1.3969016869862874, "correct_loss_per_char": 0.6907767057418823, "incorrect_loss_per_char": 0.6984508434931437, "correct_loss_per_token": 1.3815534114837646, "incorrect_loss_per_token": 1.3969016869862874, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4786579608917236, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4786579608917236, "logits_per_char": -0.7393289804458618, "num_chars": 2}, {"sum_logits": -1.3929295539855957, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3929295539855957, "logits_per_char": -0.6964647769927979, "num_chars": 2}, {"sum_logits": -1.319117546081543, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.319117546081543, "logits_per_char": -0.6595587730407715, "num_chars": 2}, {"sum_logits": -1.3815534114837646, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3815534114837646, "logits_per_char": -0.6907767057418823, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1030, "native_id": "Mercury_7227815", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.495202660560608, "incorrect_loss_raw": 1.3793718020121257, "correct_loss_per_char": 0.747601330280304, "incorrect_loss_per_char": 0.6896859010060629, "correct_loss_per_token": 1.495202660560608, "incorrect_loss_per_token": 1.3793718020121257, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1597580909729004, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.1597580909729004, "logits_per_char": -0.5798790454864502, "num_chars": 2}, {"sum_logits": -1.3804030418395996, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3804030418395996, "logits_per_char": -0.6902015209197998, "num_chars": 2}, {"sum_logits": -1.597954273223877, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.597954273223877, "logits_per_char": -0.7989771366119385, "num_chars": 2}, {"sum_logits": -1.495202660560608, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.495202660560608, "logits_per_char": -0.747601330280304, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1031, "native_id": "ACTAAP_2010_7_3", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4811959266662598, "incorrect_loss_raw": 1.3676265875498455, "correct_loss_per_char": 0.7405979633331299, "incorrect_loss_per_char": 0.6838132937749227, "correct_loss_per_token": 1.4811959266662598, "incorrect_loss_per_token": 1.3676265875498455, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4811959266662598, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4811959266662598, "logits_per_char": -0.7405979633331299, "num_chars": 2}, {"sum_logits": -1.4629133939743042, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4629133939743042, "logits_per_char": -0.7314566969871521, "num_chars": 2}, {"sum_logits": -1.4244869947433472, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": false, "logits_per_token": -1.4244869947433472, "logits_per_char": -0.7122434973716736, "num_chars": 2}, {"sum_logits": -1.2154793739318848, "num_tokens": 1, "num_tokens_all": 388, "is_greedy": true, "logits_per_token": -1.2154793739318848, "logits_per_char": -0.6077396869659424, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1032, "native_id": "Mercury_SC_410905", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3217723369598389, "incorrect_loss_raw": 1.4156092405319214, "correct_loss_per_char": 0.6608861684799194, "incorrect_loss_per_char": 0.7078046202659607, "correct_loss_per_token": 1.3217723369598389, "incorrect_loss_per_token": 1.4156092405319214, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4619909524917603, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.4619909524917603, "logits_per_char": -0.7309954762458801, "num_chars": 2}, {"sum_logits": -1.3433899879455566, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.3433899879455566, "logits_per_char": -0.6716949939727783, "num_chars": 2}, {"sum_logits": -1.4414467811584473, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.4414467811584473, "logits_per_char": -0.7207233905792236, "num_chars": 2}, {"sum_logits": -1.3217723369598389, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": true, "logits_per_token": -1.3217723369598389, "logits_per_char": -0.6608861684799194, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1033, "native_id": "OHAT_2010_5_18", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.279357671737671, "incorrect_loss_raw": 1.4331345160802205, "correct_loss_per_char": 0.6396788358688354, "incorrect_loss_per_char": 0.7165672580401102, "correct_loss_per_token": 1.279357671737671, "incorrect_loss_per_token": 1.4331345160802205, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3587583303451538, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.3587583303451538, "logits_per_char": -0.6793791651725769, "num_chars": 2}, {"sum_logits": -1.420636534690857, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.420636534690857, "logits_per_char": -0.7103182673454285, "num_chars": 2}, {"sum_logits": -1.5200086832046509, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": false, "logits_per_token": -1.5200086832046509, "logits_per_char": -0.7600043416023254, "num_chars": 2}, {"sum_logits": -1.279357671737671, "num_tokens": 1, "num_tokens_all": 414, "is_greedy": true, "logits_per_token": -1.279357671737671, "logits_per_char": -0.6396788358688354, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1034, "native_id": "NAEP_2000_8_S11+10", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1045639514923096, "incorrect_loss_raw": 1.5230398972829182, "correct_loss_per_char": 0.5522819757461548, "incorrect_loss_per_char": 0.7615199486414591, "correct_loss_per_token": 1.1045639514923096, "incorrect_loss_per_token": 1.5230398972829182, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1045639514923096, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.1045639514923096, "logits_per_char": -0.5522819757461548, "num_chars": 2}, {"sum_logits": -1.3180952072143555, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3180952072143555, "logits_per_char": -0.6590476036071777, "num_chars": 2}, {"sum_logits": -1.550984501838684, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.550984501838684, "logits_per_char": -0.775492250919342, "num_chars": 2}, {"sum_logits": -1.7000399827957153, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.7000399827957153, "logits_per_char": -0.8500199913978577, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1035, "native_id": "MCAS_2003_8_29", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3016531467437744, "incorrect_loss_raw": 1.425140102704366, "correct_loss_per_char": 0.6508265733718872, "incorrect_loss_per_char": 0.712570051352183, "correct_loss_per_token": 1.3016531467437744, "incorrect_loss_per_token": 1.425140102704366, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3938344717025757, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3938344717025757, "logits_per_char": -0.6969172358512878, "num_chars": 2}, {"sum_logits": -1.3016531467437744, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": true, "logits_per_token": -1.3016531467437744, "logits_per_char": -0.6508265733718872, "num_chars": 2}, {"sum_logits": -1.5322884321212769, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.5322884321212769, "logits_per_char": -0.7661442160606384, "num_chars": 2}, {"sum_logits": -1.3492974042892456, "num_tokens": 1, "num_tokens_all": 385, "is_greedy": false, "logits_per_token": -1.3492974042892456, "logits_per_char": -0.6746487021446228, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1036, "native_id": "Mercury_401433", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.481870174407959, "incorrect_loss_raw": 1.363829493522644, "correct_loss_per_char": 0.7409350872039795, "incorrect_loss_per_char": 0.681914746761322, "correct_loss_per_token": 1.481870174407959, "incorrect_loss_per_token": 1.363829493522644, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3907973766326904, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3907973766326904, "logits_per_char": -0.6953986883163452, "num_chars": 2}, {"sum_logits": -1.3024874925613403, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.3024874925613403, "logits_per_char": -0.6512437462806702, "num_chars": 2}, {"sum_logits": -1.3982036113739014, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3982036113739014, "logits_per_char": -0.6991018056869507, "num_chars": 2}, {"sum_logits": -1.481870174407959, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.481870174407959, "logits_per_char": -0.7409350872039795, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1037, "native_id": "TIMSS_1995_8_N4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.5616111755371094, "incorrect_loss_raw": 1.6325481534004211, "correct_loss_per_char": 0.7808055877685547, "incorrect_loss_per_char": 0.8162740767002106, "correct_loss_per_token": 1.5616111755371094, "incorrect_loss_per_token": 1.6325481534004211, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7705585956573486, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.7705585956573486, "logits_per_char": -0.8852792978286743, "num_chars": 2}, {"sum_logits": -1.5616111755371094, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.5616111755371094, "logits_per_char": -0.7808055877685547, "num_chars": 2}, {"sum_logits": -1.575087070465088, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.575087070465088, "logits_per_char": -0.787543535232544, "num_chars": 2}, {"sum_logits": -1.5986738204956055, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.5986738204956055, "logits_per_char": -0.7993369102478027, "num_chars": 2}, {"sum_logits": -1.5858731269836426, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.5858731269836426, "logits_per_char": -0.7929365634918213, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1038, "native_id": "Mercury_SC_405885", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5023751258850098, "incorrect_loss_raw": 1.3612611293792725, "correct_loss_per_char": 0.7511875629425049, "incorrect_loss_per_char": 0.6806305646896362, "correct_loss_per_token": 1.5023751258850098, "incorrect_loss_per_token": 1.3612611293792725, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3621551990509033, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3621551990509033, "logits_per_char": -0.6810775995254517, "num_chars": 2}, {"sum_logits": -1.4888267517089844, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4888267517089844, "logits_per_char": -0.7444133758544922, "num_chars": 2}, {"sum_logits": -1.5023751258850098, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.5023751258850098, "logits_per_char": -0.7511875629425049, "num_chars": 2}, {"sum_logits": -1.2328014373779297, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.2328014373779297, "logits_per_char": -0.6164007186889648, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1039, "native_id": "Mercury_7263638", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3727960586547852, "incorrect_loss_raw": 1.3999236822128296, "correct_loss_per_char": 0.6863980293273926, "incorrect_loss_per_char": 0.6999618411064148, "correct_loss_per_token": 1.3727960586547852, "incorrect_loss_per_token": 1.3999236822128296, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4983093738555908, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4983093738555908, "logits_per_char": -0.7491546869277954, "num_chars": 2}, {"sum_logits": -1.3727960586547852, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3727960586547852, "logits_per_char": -0.6863980293273926, "num_chars": 2}, {"sum_logits": -1.3662776947021484, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3662776947021484, "logits_per_char": -0.6831388473510742, "num_chars": 2}, {"sum_logits": -1.3351839780807495, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.3351839780807495, "logits_per_char": -0.6675919890403748, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1040, "native_id": "Mercury_401428", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3084330558776855, "incorrect_loss_raw": 1.4215673208236694, "correct_loss_per_char": 0.6542165279388428, "incorrect_loss_per_char": 0.7107836604118347, "correct_loss_per_token": 1.3084330558776855, "incorrect_loss_per_token": 1.4215673208236694, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4823886156082153, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4823886156082153, "logits_per_char": -0.7411943078041077, "num_chars": 2}, {"sum_logits": -1.3084330558776855, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.3084330558776855, "logits_per_char": -0.6542165279388428, "num_chars": 2}, {"sum_logits": -1.399782657623291, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.399782657623291, "logits_per_char": -0.6998913288116455, "num_chars": 2}, {"sum_logits": -1.382530689239502, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.382530689239502, "logits_per_char": -0.691265344619751, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1041, "native_id": "Mercury_SC_402121", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4433869123458862, "incorrect_loss_raw": 1.3836222489674885, "correct_loss_per_char": 0.7216934561729431, "incorrect_loss_per_char": 0.6918111244837443, "correct_loss_per_token": 1.4433869123458862, "incorrect_loss_per_token": 1.3836222489674885, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.546122431755066, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.546122431755066, "logits_per_char": -0.773061215877533, "num_chars": 2}, {"sum_logits": -1.4433869123458862, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4433869123458862, "logits_per_char": -0.7216934561729431, "num_chars": 2}, {"sum_logits": -1.4122049808502197, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4122049808502197, "logits_per_char": -0.7061024904251099, "num_chars": 2}, {"sum_logits": -1.1925393342971802, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.1925393342971802, "logits_per_char": -0.5962696671485901, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1042, "native_id": "NYSEDREGENTS_2015_4_7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2266942262649536, "incorrect_loss_raw": 1.2705147862434387, "correct_loss_per_char": 0.6133471131324768, "incorrect_loss_per_char": 0.6352573931217194, "correct_loss_per_token": 1.2266942262649536, "incorrect_loss_per_token": 1.2705147862434387, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2266942262649536, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.2266942262649536, "logits_per_char": -0.6133471131324768, "num_chars": 2}, {"sum_logits": -1.179483413696289, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.179483413696289, "logits_per_char": -0.5897417068481445, "num_chars": 2}, {"sum_logits": -1.3615461587905884, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3615461587905884, "logits_per_char": -0.6807730793952942, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1043, "native_id": "MCAS_2012_5_23614", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3895189762115479, "incorrect_loss_raw": 1.3916269540786743, "correct_loss_per_char": 0.6947594881057739, "incorrect_loss_per_char": 0.6958134770393372, "correct_loss_per_token": 1.3895189762115479, "incorrect_loss_per_token": 1.3916269540786743, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4236420392990112, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4236420392990112, "logits_per_char": -0.7118210196495056, "num_chars": 2}, {"sum_logits": -1.431708812713623, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.431708812713623, "logits_per_char": -0.7158544063568115, "num_chars": 2}, {"sum_logits": -1.3895189762115479, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3895189762115479, "logits_per_char": -0.6947594881057739, "num_chars": 2}, {"sum_logits": -1.3195300102233887, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.3195300102233887, "logits_per_char": -0.6597650051116943, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1044, "native_id": "Mercury_407262", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5142244100570679, "incorrect_loss_raw": 1.3522039254506428, "correct_loss_per_char": 0.7571122050285339, "incorrect_loss_per_char": 0.6761019627253214, "correct_loss_per_token": 1.5142244100570679, "incorrect_loss_per_token": 1.3522039254506428, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5142244100570679, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.5142244100570679, "logits_per_char": -0.7571122050285339, "num_chars": 2}, {"sum_logits": -1.309183955192566, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.309183955192566, "logits_per_char": -0.654591977596283, "num_chars": 2}, {"sum_logits": -1.3638755083084106, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3638755083084106, "logits_per_char": -0.6819377541542053, "num_chars": 2}, {"sum_logits": -1.3835523128509521, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.3835523128509521, "logits_per_char": -0.6917761564254761, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1045, "native_id": "MCAS_2014_8_6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4039969444274902, "incorrect_loss_raw": 1.3891290028889973, "correct_loss_per_char": 0.7019984722137451, "incorrect_loss_per_char": 0.6945645014444987, "correct_loss_per_token": 1.4039969444274902, "incorrect_loss_per_token": 1.3891290028889973, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4340986013412476, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.4340986013412476, "logits_per_char": -0.7170493006706238, "num_chars": 2}, {"sum_logits": -1.4039969444274902, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.4039969444274902, "logits_per_char": -0.7019984722137451, "num_chars": 2}, {"sum_logits": -1.4532321691513062, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.4532321691513062, "logits_per_char": -0.7266160845756531, "num_chars": 2}, {"sum_logits": -1.2800562381744385, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": true, "logits_per_token": -1.2800562381744385, "logits_per_char": -0.6400281190872192, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1046, "native_id": "Mercury_7032515", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4197574853897095, "incorrect_loss_raw": 1.3838867743810017, "correct_loss_per_char": 0.7098787426948547, "incorrect_loss_per_char": 0.6919433871905009, "correct_loss_per_token": 1.4197574853897095, "incorrect_loss_per_token": 1.3838867743810017, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.333258867263794, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.333258867263794, "logits_per_char": -0.666629433631897, "num_chars": 2}, {"sum_logits": -1.380476951599121, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.380476951599121, "logits_per_char": -0.6902384757995605, "num_chars": 2}, {"sum_logits": -1.4379245042800903, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4379245042800903, "logits_per_char": -0.7189622521400452, "num_chars": 2}, {"sum_logits": -1.4197574853897095, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4197574853897095, "logits_per_char": -0.7098787426948547, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1047, "native_id": "Mercury_7270165", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.355478286743164, "incorrect_loss_raw": 1.4024611711502075, "correct_loss_per_char": 0.677739143371582, "incorrect_loss_per_char": 0.7012305855751038, "correct_loss_per_token": 1.355478286743164, "incorrect_loss_per_token": 1.4024611711502075, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3940168619155884, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.3940168619155884, "logits_per_char": -0.6970084309577942, "num_chars": 2}, {"sum_logits": -1.4441801309585571, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.4441801309585571, "logits_per_char": -0.7220900654792786, "num_chars": 2}, {"sum_logits": -1.355478286743164, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": true, "logits_per_token": -1.355478286743164, "logits_per_char": -0.677739143371582, "num_chars": 2}, {"sum_logits": -1.369186520576477, "num_tokens": 1, "num_tokens_all": 407, "is_greedy": false, "logits_per_token": -1.369186520576477, "logits_per_char": -0.6845932602882385, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1048, "native_id": "Mercury_7017045", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5287721157073975, "incorrect_loss_raw": 1.3621806701024373, "correct_loss_per_char": 0.7643860578536987, "incorrect_loss_per_char": 0.6810903350512186, "correct_loss_per_token": 1.5287721157073975, "incorrect_loss_per_token": 1.3621806701024373, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1529620885849, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.1529620885849, "logits_per_char": -0.57648104429245, "num_chars": 2}, {"sum_logits": -1.5287721157073975, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5287721157073975, "logits_per_char": -0.7643860578536987, "num_chars": 2}, {"sum_logits": -1.4800865650177002, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4800865650177002, "logits_per_char": -0.7400432825088501, "num_chars": 2}, {"sum_logits": -1.453493356704712, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.453493356704712, "logits_per_char": -0.726746678352356, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1049, "native_id": "Mercury_SC_400386", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.425895094871521, "incorrect_loss_raw": 1.378635843594869, "correct_loss_per_char": 0.7129475474357605, "incorrect_loss_per_char": 0.6893179217974345, "correct_loss_per_token": 1.425895094871521, "incorrect_loss_per_token": 1.378635843594869, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.425895094871521, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.425895094871521, "logits_per_char": -0.7129475474357605, "num_chars": 2}, {"sum_logits": -1.4342917203903198, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4342917203903198, "logits_per_char": -0.7171458601951599, "num_chars": 2}, {"sum_logits": -1.3567367792129517, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3567367792129517, "logits_per_char": -0.6783683896064758, "num_chars": 2}, {"sum_logits": -1.3448790311813354, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.3448790311813354, "logits_per_char": -0.6724395155906677, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1050, "native_id": "Mercury_400750", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.345344066619873, "incorrect_loss_raw": 1.4141705830891926, "correct_loss_per_char": 0.6726720333099365, "incorrect_loss_per_char": 0.7070852915445963, "correct_loss_per_token": 1.345344066619873, "incorrect_loss_per_token": 1.4141705830891926, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4911142587661743, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4911142587661743, "logits_per_char": -0.7455571293830872, "num_chars": 2}, {"sum_logits": -1.506163477897644, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.506163477897644, "logits_per_char": -0.753081738948822, "num_chars": 2}, {"sum_logits": -1.2452340126037598, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.2452340126037598, "logits_per_char": -0.6226170063018799, "num_chars": 2}, {"sum_logits": -1.345344066619873, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.345344066619873, "logits_per_char": -0.6726720333099365, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1051, "native_id": "MCAS_2006_9_28-v1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3815609216690063, "incorrect_loss_raw": 1.397149920463562, "correct_loss_per_char": 0.6907804608345032, "incorrect_loss_per_char": 0.698574960231781, "correct_loss_per_token": 1.3815609216690063, "incorrect_loss_per_token": 1.397149920463562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4243531227111816, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.4243531227111816, "logits_per_char": -0.7121765613555908, "num_chars": 2}, {"sum_logits": -1.3047492504119873, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": true, "logits_per_token": -1.3047492504119873, "logits_per_char": -0.6523746252059937, "num_chars": 2}, {"sum_logits": -1.3815609216690063, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.3815609216690063, "logits_per_char": -0.6907804608345032, "num_chars": 2}, {"sum_logits": -1.462347388267517, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.462347388267517, "logits_per_char": -0.7311736941337585, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1052, "native_id": "Mercury_416376", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3302031755447388, "incorrect_loss_raw": 1.4160367647806804, "correct_loss_per_char": 0.6651015877723694, "incorrect_loss_per_char": 0.7080183823903402, "correct_loss_per_token": 1.3302031755447388, "incorrect_loss_per_token": 1.4160367647806804, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4732966423034668, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4732966423034668, "logits_per_char": -0.7366483211517334, "num_chars": 2}, {"sum_logits": -1.2904889583587646, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.2904889583587646, "logits_per_char": -0.6452444791793823, "num_chars": 2}, {"sum_logits": -1.4843246936798096, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4843246936798096, "logits_per_char": -0.7421623468399048, "num_chars": 2}, {"sum_logits": -1.3302031755447388, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3302031755447388, "logits_per_char": -0.6651015877723694, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1053, "native_id": "Mercury_7086520", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3613899946212769, "incorrect_loss_raw": 1.4020540714263916, "correct_loss_per_char": 0.6806949973106384, "incorrect_loss_per_char": 0.7010270357131958, "correct_loss_per_token": 1.3613899946212769, "incorrect_loss_per_token": 1.4020540714263916, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3613899946212769, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3613899946212769, "logits_per_char": -0.6806949973106384, "num_chars": 2}, {"sum_logits": -1.348322868347168, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.348322868347168, "logits_per_char": -0.674161434173584, "num_chars": 2}, {"sum_logits": -1.4361343383789062, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4361343383789062, "logits_per_char": -0.7180671691894531, "num_chars": 2}, {"sum_logits": -1.4217050075531006, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4217050075531006, "logits_per_char": -0.7108525037765503, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1054, "native_id": "Mercury_7014333", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3462414741516113, "incorrect_loss_raw": 1.4061203400293987, "correct_loss_per_char": 0.6731207370758057, "incorrect_loss_per_char": 0.7030601700146993, "correct_loss_per_token": 1.3462414741516113, "incorrect_loss_per_token": 1.4061203400293987, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3462414741516113, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.3462414741516113, "logits_per_char": -0.6731207370758057, "num_chars": 2}, {"sum_logits": -1.4024946689605713, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4024946689605713, "logits_per_char": -0.7012473344802856, "num_chars": 2}, {"sum_logits": -1.382880687713623, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.382880687713623, "logits_per_char": -0.6914403438568115, "num_chars": 2}, {"sum_logits": -1.4329856634140015, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4329856634140015, "logits_per_char": -0.7164928317070007, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1055, "native_id": "Mercury_SC_406623", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3423460721969604, "incorrect_loss_raw": 1.4096587498982747, "correct_loss_per_char": 0.6711730360984802, "incorrect_loss_per_char": 0.7048293749491373, "correct_loss_per_token": 1.3423460721969604, "incorrect_loss_per_token": 1.4096587498982747, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3423460721969604, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.3423460721969604, "logits_per_char": -0.6711730360984802, "num_chars": 2}, {"sum_logits": -1.4104591608047485, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4104591608047485, "logits_per_char": -0.7052295804023743, "num_chars": 2}, {"sum_logits": -1.4359434843063354, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4359434843063354, "logits_per_char": -0.7179717421531677, "num_chars": 2}, {"sum_logits": -1.3825736045837402, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.3825736045837402, "logits_per_char": -0.6912868022918701, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1056, "native_id": "Mercury_7042648", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2340490818023682, "incorrect_loss_raw": 1.4530450503031414, "correct_loss_per_char": 0.6170245409011841, "incorrect_loss_per_char": 0.7265225251515707, "correct_loss_per_token": 1.2340490818023682, "incorrect_loss_per_token": 1.4530450503031414, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2340490818023682, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.2340490818023682, "logits_per_char": -0.6170245409011841, "num_chars": 2}, {"sum_logits": -1.445840835571289, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.445840835571289, "logits_per_char": -0.7229204177856445, "num_chars": 2}, {"sum_logits": -1.5445802211761475, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.5445802211761475, "logits_per_char": -0.7722901105880737, "num_chars": 2}, {"sum_logits": -1.3687140941619873, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3687140941619873, "logits_per_char": -0.6843570470809937, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1057, "native_id": "MCAS_2004_8_23", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5175774097442627, "incorrect_loss_raw": 1.375561515490214, "correct_loss_per_char": 0.7587887048721313, "incorrect_loss_per_char": 0.687780757745107, "correct_loss_per_token": 1.5175774097442627, "incorrect_loss_per_token": 1.375561515490214, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.475321888923645, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.475321888923645, "logits_per_char": -0.7376609444618225, "num_chars": 2}, {"sum_logits": -1.5175774097442627, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.5175774097442627, "logits_per_char": -0.7587887048721313, "num_chars": 2}, {"sum_logits": -1.5599396228790283, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.5599396228790283, "logits_per_char": -0.7799698114395142, "num_chars": 2}, {"sum_logits": -1.0914230346679688, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.0914230346679688, "logits_per_char": -0.5457115173339844, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1058, "native_id": "MCAS_2013_8_29425", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.368298888206482, "incorrect_loss_raw": 1.3984832366307576, "correct_loss_per_char": 0.684149444103241, "incorrect_loss_per_char": 0.6992416183153788, "correct_loss_per_token": 1.368298888206482, "incorrect_loss_per_token": 1.3984832366307576, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.368298888206482, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.368298888206482, "logits_per_char": -0.684149444103241, "num_chars": 2}, {"sum_logits": -1.4136476516723633, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4136476516723633, "logits_per_char": -0.7068238258361816, "num_chars": 2}, {"sum_logits": -1.4299052953720093, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4299052953720093, "logits_per_char": -0.7149526476860046, "num_chars": 2}, {"sum_logits": -1.3518967628479004, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.3518967628479004, "logits_per_char": -0.6759483814239502, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1059, "native_id": "MEAP_2005_5_15", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4422303438186646, "incorrect_loss_raw": 1.3790544271469116, "correct_loss_per_char": 0.7211151719093323, "incorrect_loss_per_char": 0.6895272135734558, "correct_loss_per_token": 1.4422303438186646, "incorrect_loss_per_token": 1.3790544271469116, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2782248258590698, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.2782248258590698, "logits_per_char": -0.6391124129295349, "num_chars": 2}, {"sum_logits": -1.4422303438186646, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4422303438186646, "logits_per_char": -0.7211151719093323, "num_chars": 2}, {"sum_logits": -1.4431670904159546, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4431670904159546, "logits_per_char": -0.7215835452079773, "num_chars": 2}, {"sum_logits": -1.4157713651657104, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4157713651657104, "logits_per_char": -0.7078856825828552, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1060, "native_id": "Mercury_7016258", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3587833642959595, "incorrect_loss_raw": 1.4021844466527302, "correct_loss_per_char": 0.6793916821479797, "incorrect_loss_per_char": 0.7010922233263651, "correct_loss_per_token": 1.3587833642959595, "incorrect_loss_per_token": 1.4021844466527302, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3971985578536987, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3971985578536987, "logits_per_char": -0.6985992789268494, "num_chars": 2}, {"sum_logits": -1.3587833642959595, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3587833642959595, "logits_per_char": -0.6793916821479797, "num_chars": 2}, {"sum_logits": -1.468812346458435, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.468812346458435, "logits_per_char": -0.7344061732292175, "num_chars": 2}, {"sum_logits": -1.3405424356460571, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.3405424356460571, "logits_per_char": -0.6702712178230286, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1061, "native_id": "NCEOGA_2013_8_5", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5472240447998047, "incorrect_loss_raw": 1.344862659772237, "correct_loss_per_char": 0.7736120223999023, "incorrect_loss_per_char": 0.6724313298861185, "correct_loss_per_token": 1.5472240447998047, "incorrect_loss_per_token": 1.344862659772237, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5472240447998047, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5472240447998047, "logits_per_char": -0.7736120223999023, "num_chars": 2}, {"sum_logits": -1.3862828016281128, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3862828016281128, "logits_per_char": -0.6931414008140564, "num_chars": 2}, {"sum_logits": -1.3755582571029663, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3755582571029663, "logits_per_char": -0.6877791285514832, "num_chars": 2}, {"sum_logits": -1.2727469205856323, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.2727469205856323, "logits_per_char": -0.6363734602928162, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1062, "native_id": "Mercury_7015540", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.41923189163208, "incorrect_loss_raw": 1.385634183883667, "correct_loss_per_char": 0.70961594581604, "incorrect_loss_per_char": 0.6928170919418335, "correct_loss_per_token": 1.41923189163208, "incorrect_loss_per_token": 1.385634183883667, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4707955121994019, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4707955121994019, "logits_per_char": -0.7353977560997009, "num_chars": 2}, {"sum_logits": -1.41923189163208, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.41923189163208, "logits_per_char": -0.70961594581604, "num_chars": 2}, {"sum_logits": -1.4392364025115967, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4392364025115967, "logits_per_char": -0.7196182012557983, "num_chars": 2}, {"sum_logits": -1.2468706369400024, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.2468706369400024, "logits_per_char": -0.6234353184700012, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1063, "native_id": "Mercury_SC_414001", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.353683590888977, "incorrect_loss_raw": 1.403125007947286, "correct_loss_per_char": 0.6768417954444885, "incorrect_loss_per_char": 0.701562503973643, "correct_loss_per_token": 1.353683590888977, "incorrect_loss_per_token": 1.403125007947286, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3437538146972656, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.3437538146972656, "logits_per_char": -0.6718769073486328, "num_chars": 2}, {"sum_logits": -1.3981558084487915, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.3981558084487915, "logits_per_char": -0.6990779042243958, "num_chars": 2}, {"sum_logits": -1.4674654006958008, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.4674654006958008, "logits_per_char": -0.7337327003479004, "num_chars": 2}, {"sum_logits": -1.353683590888977, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.353683590888977, "logits_per_char": -0.6768417954444885, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1064, "native_id": "Mercury_7017973", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.292434573173523, "incorrect_loss_raw": 1.4263327916463215, "correct_loss_per_char": 0.6462172865867615, "incorrect_loss_per_char": 0.7131663958231608, "correct_loss_per_token": 1.292434573173523, "incorrect_loss_per_token": 1.4263327916463215, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4818084239959717, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4818084239959717, "logits_per_char": -0.7409042119979858, "num_chars": 2}, {"sum_logits": -1.3726396560668945, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3726396560668945, "logits_per_char": -0.6863198280334473, "num_chars": 2}, {"sum_logits": -1.4245502948760986, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4245502948760986, "logits_per_char": -0.7122751474380493, "num_chars": 2}, {"sum_logits": -1.292434573173523, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.292434573173523, "logits_per_char": -0.6462172865867615, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1065, "native_id": "Mercury_407097", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4355889558792114, "incorrect_loss_raw": 1.3780550956726074, "correct_loss_per_char": 0.7177944779396057, "incorrect_loss_per_char": 0.6890275478363037, "correct_loss_per_token": 1.4355889558792114, "incorrect_loss_per_token": 1.3780550956726074, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4337286949157715, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.4337286949157715, "logits_per_char": -0.7168643474578857, "num_chars": 2}, {"sum_logits": -1.4227970838546753, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.4227970838546753, "logits_per_char": -0.7113985419273376, "num_chars": 2}, {"sum_logits": -1.4355889558792114, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": false, "logits_per_token": -1.4355889558792114, "logits_per_char": -0.7177944779396057, "num_chars": 2}, {"sum_logits": -1.2776395082473755, "num_tokens": 1, "num_tokens_all": 430, "is_greedy": true, "logits_per_token": -1.2776395082473755, "logits_per_char": -0.6388197541236877, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1066, "native_id": "Mercury_SC_406794", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3862411975860596, "incorrect_loss_raw": 1.3922008275985718, "correct_loss_per_char": 0.6931205987930298, "incorrect_loss_per_char": 0.6961004137992859, "correct_loss_per_token": 1.3862411975860596, "incorrect_loss_per_token": 1.3922008275985718, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3767738342285156, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.3767738342285156, "logits_per_char": -0.6883869171142578, "num_chars": 2}, {"sum_logits": -1.4374951124191284, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.4374951124191284, "logits_per_char": -0.7187475562095642, "num_chars": 2}, {"sum_logits": -1.3862411975860596, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": false, "logits_per_token": -1.3862411975860596, "logits_per_char": -0.6931205987930298, "num_chars": 2}, {"sum_logits": -1.3623335361480713, "num_tokens": 1, "num_tokens_all": 384, "is_greedy": true, "logits_per_token": -1.3623335361480713, "logits_per_char": -0.6811667680740356, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1067, "native_id": "Mercury_7227710", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3659396171569824, "incorrect_loss_raw": 1.4007657766342163, "correct_loss_per_char": 0.6829698085784912, "incorrect_loss_per_char": 0.7003828883171082, "correct_loss_per_token": 1.3659396171569824, "incorrect_loss_per_token": 1.4007657766342163, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3430919647216797, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.3430919647216797, "logits_per_char": -0.6715459823608398, "num_chars": 2}, {"sum_logits": -1.3816709518432617, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3816709518432617, "logits_per_char": -0.6908354759216309, "num_chars": 2}, {"sum_logits": -1.4775344133377075, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4775344133377075, "logits_per_char": -0.7387672066688538, "num_chars": 2}, {"sum_logits": -1.3659396171569824, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3659396171569824, "logits_per_char": -0.6829698085784912, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1068, "native_id": "Mercury_SC_406710", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5106815099716187, "incorrect_loss_raw": 1.3615275621414185, "correct_loss_per_char": 0.7553407549858093, "incorrect_loss_per_char": 0.6807637810707092, "correct_loss_per_token": 1.5106815099716187, "incorrect_loss_per_token": 1.3615275621414185, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2067368030548096, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.2067368030548096, "logits_per_char": -0.6033684015274048, "num_chars": 2}, {"sum_logits": -1.3495759963989258, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3495759963989258, "logits_per_char": -0.6747879981994629, "num_chars": 2}, {"sum_logits": -1.5106815099716187, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.5106815099716187, "logits_per_char": -0.7553407549858093, "num_chars": 2}, {"sum_logits": -1.52826988697052, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.52826988697052, "logits_per_char": -0.76413494348526, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1069, "native_id": "Mercury_401926", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1696876287460327, "incorrect_loss_raw": 1.4801283677419026, "correct_loss_per_char": 0.5848438143730164, "incorrect_loss_per_char": 0.7400641838709513, "correct_loss_per_token": 1.1696876287460327, "incorrect_loss_per_token": 1.4801283677419026, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5530521869659424, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.5530521869659424, "logits_per_char": -0.7765260934829712, "num_chars": 2}, {"sum_logits": -1.351333737373352, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.351333737373352, "logits_per_char": -0.675666868686676, "num_chars": 2}, {"sum_logits": -1.5359991788864136, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": false, "logits_per_token": -1.5359991788864136, "logits_per_char": -0.7679995894432068, "num_chars": 2}, {"sum_logits": -1.1696876287460327, "num_tokens": 1, "num_tokens_all": 412, "is_greedy": true, "logits_per_token": -1.1696876287460327, "logits_per_char": -0.5848438143730164, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1070, "native_id": "MCAS_2014_5_15", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.368924617767334, "incorrect_loss_raw": 1.3996107578277588, "correct_loss_per_char": 0.684462308883667, "incorrect_loss_per_char": 0.6998053789138794, "correct_loss_per_token": 1.368924617767334, "incorrect_loss_per_token": 1.3996107578277588, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4661242961883545, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.4661242961883545, "logits_per_char": -0.7330621480941772, "num_chars": 2}, {"sum_logits": -1.4206129312515259, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.4206129312515259, "logits_per_char": -0.7103064656257629, "num_chars": 2}, {"sum_logits": -1.312095046043396, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": true, "logits_per_token": -1.312095046043396, "logits_per_char": -0.656047523021698, "num_chars": 2}, {"sum_logits": -1.368924617767334, "num_tokens": 1, "num_tokens_all": 392, "is_greedy": false, "logits_per_token": -1.368924617767334, "logits_per_char": -0.684462308883667, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1071, "native_id": "Mercury_LBS10151", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3814600706100464, "incorrect_loss_raw": 1.395727515220642, "correct_loss_per_char": 0.6907300353050232, "incorrect_loss_per_char": 0.697863757610321, "correct_loss_per_token": 1.3814600706100464, "incorrect_loss_per_token": 1.395727515220642, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3814600706100464, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3814600706100464, "logits_per_char": -0.6907300353050232, "num_chars": 2}, {"sum_logits": -1.3204463720321655, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.3204463720321655, "logits_per_char": -0.6602231860160828, "num_chars": 2}, {"sum_logits": -1.4397914409637451, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4397914409637451, "logits_per_char": -0.7198957204818726, "num_chars": 2}, {"sum_logits": -1.4269447326660156, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4269447326660156, "logits_per_char": -0.7134723663330078, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1072, "native_id": "ACTAAP_2013_5_8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4453985691070557, "incorrect_loss_raw": 1.3751713037490845, "correct_loss_per_char": 0.7226992845535278, "incorrect_loss_per_char": 0.6875856518745422, "correct_loss_per_token": 1.4453985691070557, "incorrect_loss_per_token": 1.3751713037490845, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.421107530593872, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.421107530593872, "logits_per_char": -0.710553765296936, "num_chars": 2}, {"sum_logits": -1.4453985691070557, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4453985691070557, "logits_per_char": -0.7226992845535278, "num_chars": 2}, {"sum_logits": -1.3897103071212769, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3897103071212769, "logits_per_char": -0.6948551535606384, "num_chars": 2}, {"sum_logits": -1.3146960735321045, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.3146960735321045, "logits_per_char": -0.6573480367660522, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1073, "native_id": "Mercury_SC_407592", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5799790620803833, "incorrect_loss_raw": 1.3366491794586182, "correct_loss_per_char": 0.7899895310401917, "incorrect_loss_per_char": 0.6683245897293091, "correct_loss_per_token": 1.5799790620803833, "incorrect_loss_per_token": 1.3366491794586182, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5799790620803833, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5799790620803833, "logits_per_char": -0.7899895310401917, "num_chars": 2}, {"sum_logits": -1.3959287405014038, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3959287405014038, "logits_per_char": -0.6979643702507019, "num_chars": 2}, {"sum_logits": -1.3684059381484985, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3684059381484985, "logits_per_char": -0.6842029690742493, "num_chars": 2}, {"sum_logits": -1.2456128597259521, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.2456128597259521, "logits_per_char": -0.6228064298629761, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1074, "native_id": "TIMSS_1995_8_L6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.240931510925293, "incorrect_loss_raw": 1.4478305180867512, "correct_loss_per_char": 0.6204657554626465, "incorrect_loss_per_char": 0.7239152590433756, "correct_loss_per_token": 1.240931510925293, "incorrect_loss_per_token": 1.4478305180867512, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4485689401626587, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": false, "logits_per_token": -1.4485689401626587, "logits_per_char": -0.7242844700813293, "num_chars": 2}, {"sum_logits": -1.3627301454544067, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": false, "logits_per_token": -1.3627301454544067, "logits_per_char": -0.6813650727272034, "num_chars": 2}, {"sum_logits": -1.5321924686431885, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": false, "logits_per_token": -1.5321924686431885, "logits_per_char": -0.7660962343215942, "num_chars": 2}, {"sum_logits": -1.240931510925293, "num_tokens": 1, "num_tokens_all": 439, "is_greedy": true, "logits_per_token": -1.240931510925293, "logits_per_char": -0.6204657554626465, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1075, "native_id": "Mercury_7233398", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4482899904251099, "incorrect_loss_raw": 1.377716024716695, "correct_loss_per_char": 0.7241449952125549, "incorrect_loss_per_char": 0.6888580123583475, "correct_loss_per_token": 1.4482899904251099, "incorrect_loss_per_token": 1.377716024716695, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4637194871902466, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4637194871902466, "logits_per_char": -0.7318597435951233, "num_chars": 2}, {"sum_logits": -1.430559754371643, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.430559754371643, "logits_per_char": -0.7152798771858215, "num_chars": 2}, {"sum_logits": -1.4482899904251099, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4482899904251099, "logits_per_char": -0.7241449952125549, "num_chars": 2}, {"sum_logits": -1.2388688325881958, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.2388688325881958, "logits_per_char": -0.6194344162940979, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1076, "native_id": "Mercury_407664", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.401767373085022, "incorrect_loss_raw": 1.3872812191645305, "correct_loss_per_char": 0.700883686542511, "incorrect_loss_per_char": 0.6936406095822653, "correct_loss_per_token": 1.401767373085022, "incorrect_loss_per_token": 1.3872812191645305, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4130610227584839, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4130610227584839, "logits_per_char": -0.7065305113792419, "num_chars": 2}, {"sum_logits": -1.3415772914886475, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.3415772914886475, "logits_per_char": -0.6707886457443237, "num_chars": 2}, {"sum_logits": -1.40720534324646, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.40720534324646, "logits_per_char": -0.70360267162323, "num_chars": 2}, {"sum_logits": -1.401767373085022, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.401767373085022, "logits_per_char": -0.700883686542511, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1077, "native_id": "Mercury_SC_408657", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2769049406051636, "incorrect_loss_raw": 1.4367845455805461, "correct_loss_per_char": 0.6384524703025818, "incorrect_loss_per_char": 0.7183922727902731, "correct_loss_per_token": 1.2769049406051636, "incorrect_loss_per_token": 1.4367845455805461, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.285170078277588, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.285170078277588, "logits_per_char": -0.642585039138794, "num_chars": 2}, {"sum_logits": -1.4643065929412842, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4643065929412842, "logits_per_char": -0.7321532964706421, "num_chars": 2}, {"sum_logits": -1.5608769655227661, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.5608769655227661, "logits_per_char": -0.7804384827613831, "num_chars": 2}, {"sum_logits": -1.2769049406051636, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.2769049406051636, "logits_per_char": -0.6384524703025818, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1078, "native_id": "Mercury_7142800", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.377284049987793, "incorrect_loss_raw": 1.3970853487650554, "correct_loss_per_char": 0.6886420249938965, "incorrect_loss_per_char": 0.6985426743825277, "correct_loss_per_token": 1.377284049987793, "incorrect_loss_per_token": 1.3970853487650554, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4656230211257935, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4656230211257935, "logits_per_char": -0.7328115105628967, "num_chars": 2}, {"sum_logits": -1.377284049987793, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.377284049987793, "logits_per_char": -0.6886420249938965, "num_chars": 2}, {"sum_logits": -1.4034940004348755, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4034940004348755, "logits_per_char": -0.7017470002174377, "num_chars": 2}, {"sum_logits": -1.322139024734497, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.322139024734497, "logits_per_char": -0.6610695123672485, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1079, "native_id": "Mercury_SC_410837", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3892030715942383, "incorrect_loss_raw": 1.4008676211039226, "correct_loss_per_char": 0.6946015357971191, "incorrect_loss_per_char": 0.7004338105519613, "correct_loss_per_token": 1.3892030715942383, "incorrect_loss_per_token": 1.4008676211039226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.59645414352417, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.59645414352417, "logits_per_char": -0.798227071762085, "num_chars": 2}, {"sum_logits": -1.3892030715942383, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.3892030715942383, "logits_per_char": -0.6946015357971191, "num_chars": 2}, {"sum_logits": -1.3706148862838745, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.3706148862838745, "logits_per_char": -0.6853074431419373, "num_chars": 2}, {"sum_logits": -1.2355338335037231, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -1.2355338335037231, "logits_per_char": -0.6177669167518616, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1080, "native_id": "Mercury_7154315", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.571804404258728, "incorrect_loss_raw": 1.3391348123550415, "correct_loss_per_char": 0.785902202129364, "incorrect_loss_per_char": 0.6695674061775208, "correct_loss_per_token": 1.571804404258728, "incorrect_loss_per_token": 1.3391348123550415, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.571804404258728, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.571804404258728, "logits_per_char": -0.785902202129364, "num_chars": 2}, {"sum_logits": -1.3791718482971191, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3791718482971191, "logits_per_char": -0.6895859241485596, "num_chars": 2}, {"sum_logits": -1.3924840688705444, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3924840688705444, "logits_per_char": -0.6962420344352722, "num_chars": 2}, {"sum_logits": -1.245748519897461, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.245748519897461, "logits_per_char": -0.6228742599487305, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1081, "native_id": "Mercury_7239628", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2624967098236084, "incorrect_loss_raw": 1.4375418027242024, "correct_loss_per_char": 0.6312483549118042, "incorrect_loss_per_char": 0.7187709013621012, "correct_loss_per_token": 1.2624967098236084, "incorrect_loss_per_token": 1.4375418027242024, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.473542332649231, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.473542332649231, "logits_per_char": -0.7367711663246155, "num_chars": 2}, {"sum_logits": -1.4041894674301147, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4041894674301147, "logits_per_char": -0.7020947337150574, "num_chars": 2}, {"sum_logits": -1.4348936080932617, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4348936080932617, "logits_per_char": -0.7174468040466309, "num_chars": 2}, {"sum_logits": -1.2624967098236084, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2624967098236084, "logits_per_char": -0.6312483549118042, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1082, "native_id": "Mercury_401241", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.348475456237793, "incorrect_loss_raw": 1.404459039370219, "correct_loss_per_char": 0.6742377281188965, "incorrect_loss_per_char": 0.7022295196851095, "correct_loss_per_token": 1.348475456237793, "incorrect_loss_per_token": 1.404459039370219, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.348475456237793, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.348475456237793, "logits_per_char": -0.6742377281188965, "num_chars": 2}, {"sum_logits": -1.3815172910690308, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3815172910690308, "logits_per_char": -0.6907586455345154, "num_chars": 2}, {"sum_logits": -1.445044755935669, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.445044755935669, "logits_per_char": -0.7225223779678345, "num_chars": 2}, {"sum_logits": -1.386815071105957, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.386815071105957, "logits_per_char": -0.6934075355529785, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1083, "native_id": "Mercury_SC_408251", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.26868736743927, "incorrect_loss_raw": 1.4426529010136921, "correct_loss_per_char": 0.634343683719635, "incorrect_loss_per_char": 0.7213264505068461, "correct_loss_per_token": 1.26868736743927, "incorrect_loss_per_token": 1.4426529010136921, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.26868736743927, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.26868736743927, "logits_per_char": -0.634343683719635, "num_chars": 2}, {"sum_logits": -1.423322081565857, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.423322081565857, "logits_per_char": -0.7116610407829285, "num_chars": 2}, {"sum_logits": -1.5840938091278076, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.5840938091278076, "logits_per_char": -0.7920469045639038, "num_chars": 2}, {"sum_logits": -1.320542812347412, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.320542812347412, "logits_per_char": -0.660271406173706, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1084, "native_id": "Mercury_7175893", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3238693475723267, "incorrect_loss_raw": 1.4126697381337483, "correct_loss_per_char": 0.6619346737861633, "incorrect_loss_per_char": 0.7063348690668741, "correct_loss_per_token": 1.3238693475723267, "incorrect_loss_per_token": 1.4126697381337483, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4276368618011475, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4276368618011475, "logits_per_char": -0.7138184309005737, "num_chars": 2}, {"sum_logits": -1.3893893957138062, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3893893957138062, "logits_per_char": -0.6946946978569031, "num_chars": 2}, {"sum_logits": -1.4209829568862915, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4209829568862915, "logits_per_char": -0.7104914784431458, "num_chars": 2}, {"sum_logits": -1.3238693475723267, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.3238693475723267, "logits_per_char": -0.6619346737861633, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1085, "native_id": "Mercury_7202843", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4490647315979004, "incorrect_loss_raw": 1.3754472732543945, "correct_loss_per_char": 0.7245323657989502, "incorrect_loss_per_char": 0.6877236366271973, "correct_loss_per_token": 1.4490647315979004, "incorrect_loss_per_token": 1.3754472732543945, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4490647315979004, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.4490647315979004, "logits_per_char": -0.7245323657989502, "num_chars": 2}, {"sum_logits": -1.3494280576705933, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.3494280576705933, "logits_per_char": -0.6747140288352966, "num_chars": 2}, {"sum_logits": -1.506451964378357, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.506451964378357, "logits_per_char": -0.7532259821891785, "num_chars": 2}, {"sum_logits": -1.2704617977142334, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": true, "logits_per_token": -1.2704617977142334, "logits_per_char": -0.6352308988571167, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1086, "native_id": "Mercury_7159023", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4866491556167603, "incorrect_loss_raw": 1.3604426383972168, "correct_loss_per_char": 0.7433245778083801, "incorrect_loss_per_char": 0.6802213191986084, "correct_loss_per_token": 1.4866491556167603, "incorrect_loss_per_token": 1.3604426383972168, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.372617244720459, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": false, "logits_per_token": -1.372617244720459, "logits_per_char": -0.6863086223602295, "num_chars": 2}, {"sum_logits": -1.3649892807006836, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": false, "logits_per_token": -1.3649892807006836, "logits_per_char": -0.6824946403503418, "num_chars": 2}, {"sum_logits": -1.4866491556167603, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": false, "logits_per_token": -1.4866491556167603, "logits_per_char": -0.7433245778083801, "num_chars": 2}, {"sum_logits": -1.3437213897705078, "num_tokens": 1, "num_tokens_all": 418, "is_greedy": true, "logits_per_token": -1.3437213897705078, "logits_per_char": -0.6718606948852539, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1087, "native_id": "MDSA_2008_8_3", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3017854690551758, "incorrect_loss_raw": 1.420918385187785, "correct_loss_per_char": 0.6508927345275879, "incorrect_loss_per_char": 0.7104591925938925, "correct_loss_per_token": 1.3017854690551758, "incorrect_loss_per_token": 1.420918385187785, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4592604637145996, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.4592604637145996, "logits_per_char": -0.7296302318572998, "num_chars": 2}, {"sum_logits": -1.402859091758728, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.402859091758728, "logits_per_char": -0.701429545879364, "num_chars": 2}, {"sum_logits": -1.4006356000900269, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.4006356000900269, "logits_per_char": -0.7003178000450134, "num_chars": 2}, {"sum_logits": -1.3017854690551758, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": true, "logits_per_token": -1.3017854690551758, "logits_per_char": -0.6508927345275879, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1088, "native_id": "Mercury_7218348", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5172396898269653, "incorrect_loss_raw": 1.3540714581807454, "correct_loss_per_char": 0.7586198449134827, "incorrect_loss_per_char": 0.6770357290903727, "correct_loss_per_token": 1.5172396898269653, "incorrect_loss_per_token": 1.3540714581807454, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5172396898269653, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5172396898269653, "logits_per_char": -0.7586198449134827, "num_chars": 2}, {"sum_logits": -1.4021590948104858, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4021590948104858, "logits_per_char": -0.7010795474052429, "num_chars": 2}, {"sum_logits": -1.395617961883545, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.395617961883545, "logits_per_char": -0.6978089809417725, "num_chars": 2}, {"sum_logits": -1.2644373178482056, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2644373178482056, "logits_per_char": -0.6322186589241028, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1089, "native_id": "Mercury_SC_406458", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2131102085113525, "incorrect_loss_raw": 1.4588065544764202, "correct_loss_per_char": 0.6065551042556763, "incorrect_loss_per_char": 0.7294032772382101, "correct_loss_per_token": 1.2131102085113525, "incorrect_loss_per_token": 1.4588065544764202, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.50537109375, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.50537109375, "logits_per_char": -0.752685546875, "num_chars": 2}, {"sum_logits": -1.50471031665802, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.50471031665802, "logits_per_char": -0.75235515832901, "num_chars": 2}, {"sum_logits": -1.3663382530212402, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": false, "logits_per_token": -1.3663382530212402, "logits_per_char": -0.6831691265106201, "num_chars": 2}, {"sum_logits": -1.2131102085113525, "num_tokens": 1, "num_tokens_all": 391, "is_greedy": true, "logits_per_token": -1.2131102085113525, "logits_per_char": -0.6065551042556763, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1090, "native_id": "LEAP_2007_4_10280", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.348179817199707, "incorrect_loss_raw": 1.4055243730545044, "correct_loss_per_char": 0.6740899085998535, "incorrect_loss_per_char": 0.7027621865272522, "correct_loss_per_token": 1.348179817199707, "incorrect_loss_per_token": 1.4055243730545044, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.348179817199707, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.348179817199707, "logits_per_char": -0.6740899085998535, "num_chars": 2}, {"sum_logits": -1.3966386318206787, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3966386318206787, "logits_per_char": -0.6983193159103394, "num_chars": 2}, {"sum_logits": -1.4817739725112915, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4817739725112915, "logits_per_char": -0.7408869862556458, "num_chars": 2}, {"sum_logits": -1.338160514831543, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.338160514831543, "logits_per_char": -0.6690802574157715, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1091, "native_id": "Mercury_7216965", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3994650840759277, "incorrect_loss_raw": 1.3884711662928264, "correct_loss_per_char": 0.6997325420379639, "incorrect_loss_per_char": 0.6942355831464132, "correct_loss_per_token": 1.3994650840759277, "incorrect_loss_per_token": 1.3884711662928264, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3931505680084229, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3931505680084229, "logits_per_char": -0.6965752840042114, "num_chars": 2}, {"sum_logits": -1.3681044578552246, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.3681044578552246, "logits_per_char": -0.6840522289276123, "num_chars": 2}, {"sum_logits": -1.3994650840759277, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.3994650840759277, "logits_per_char": -0.6997325420379639, "num_chars": 2}, {"sum_logits": -1.4041584730148315, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4041584730148315, "logits_per_char": -0.7020792365074158, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1092, "native_id": "NYSEDREGENTS_2010_8_42", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.394838571548462, "incorrect_loss_raw": 1.3936193386713664, "correct_loss_per_char": 0.697419285774231, "incorrect_loss_per_char": 0.6968096693356832, "correct_loss_per_token": 1.394838571548462, "incorrect_loss_per_token": 1.3936193386713664, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5223060846328735, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.5223060846328735, "logits_per_char": -0.7611530423164368, "num_chars": 2}, {"sum_logits": -1.394838571548462, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.394838571548462, "logits_per_char": -0.697419285774231, "num_chars": 2}, {"sum_logits": -1.3252506256103516, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": true, "logits_per_token": -1.3252506256103516, "logits_per_char": -0.6626253128051758, "num_chars": 2}, {"sum_logits": -1.333301305770874, "num_tokens": 1, "num_tokens_all": 416, "is_greedy": false, "logits_per_token": -1.333301305770874, "logits_per_char": -0.666650652885437, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1093, "native_id": "LEAP__7_10351", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3459627628326416, "incorrect_loss_raw": 1.409438133239746, "correct_loss_per_char": 0.6729813814163208, "incorrect_loss_per_char": 0.704719066619873, "correct_loss_per_token": 1.3459627628326416, "incorrect_loss_per_token": 1.409438133239746, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5334992408752441, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.5334992408752441, "logits_per_char": -0.7667496204376221, "num_chars": 2}, {"sum_logits": -1.339957356452942, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": true, "logits_per_token": -1.339957356452942, "logits_per_char": -0.669978678226471, "num_chars": 2}, {"sum_logits": -1.3459627628326416, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.3459627628326416, "logits_per_char": -0.6729813814163208, "num_chars": 2}, {"sum_logits": -1.3548578023910522, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.3548578023910522, "logits_per_char": -0.6774289011955261, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1094, "native_id": "Mercury_SC_400590", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3866623640060425, "incorrect_loss_raw": 1.391433556874593, "correct_loss_per_char": 0.6933311820030212, "incorrect_loss_per_char": 0.6957167784372965, "correct_loss_per_token": 1.3866623640060425, "incorrect_loss_per_token": 1.391433556874593, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4059946537017822, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4059946537017822, "logits_per_char": -0.7029973268508911, "num_chars": 2}, {"sum_logits": -1.3621330261230469, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.3621330261230469, "logits_per_char": -0.6810665130615234, "num_chars": 2}, {"sum_logits": -1.4061729907989502, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4061729907989502, "logits_per_char": -0.7030864953994751, "num_chars": 2}, {"sum_logits": -1.3866623640060425, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3866623640060425, "logits_per_char": -0.6933311820030212, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1095, "native_id": "Mercury_7086608", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4791678190231323, "incorrect_loss_raw": 1.366620620091756, "correct_loss_per_char": 0.7395839095115662, "incorrect_loss_per_char": 0.683310310045878, "correct_loss_per_token": 1.4791678190231323, "incorrect_loss_per_token": 1.366620620091756, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2353088855743408, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": true, "logits_per_token": -1.2353088855743408, "logits_per_char": -0.6176544427871704, "num_chars": 2}, {"sum_logits": -1.4791678190231323, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4791678190231323, "logits_per_char": -0.7395839095115662, "num_chars": 2}, {"sum_logits": -1.4194811582565308, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.4194811582565308, "logits_per_char": -0.7097405791282654, "num_chars": 2}, {"sum_logits": -1.445071816444397, "num_tokens": 1, "num_tokens_all": 350, "is_greedy": false, "logits_per_token": -1.445071816444397, "logits_per_char": -0.7225359082221985, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1096, "native_id": "Mercury_7187863", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4047763347625732, "incorrect_loss_raw": 1.3863040606180828, "correct_loss_per_char": 0.7023881673812866, "incorrect_loss_per_char": 0.6931520303090414, "correct_loss_per_token": 1.4047763347625732, "incorrect_loss_per_token": 1.3863040606180828, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.376351237297058, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.376351237297058, "logits_per_char": -0.688175618648529, "num_chars": 2}, {"sum_logits": -1.3286930322647095, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.3286930322647095, "logits_per_char": -0.6643465161323547, "num_chars": 2}, {"sum_logits": -1.4538679122924805, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4538679122924805, "logits_per_char": -0.7269339561462402, "num_chars": 2}, {"sum_logits": -1.4047763347625732, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4047763347625732, "logits_per_char": -0.7023881673812866, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1097, "native_id": "Mercury_7120873", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4108607769012451, "incorrect_loss_raw": 1.3858820994695027, "correct_loss_per_char": 0.7054303884506226, "incorrect_loss_per_char": 0.6929410497347513, "correct_loss_per_token": 1.4108607769012451, "incorrect_loss_per_token": 1.3858820994695027, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4652836322784424, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4652836322784424, "logits_per_char": -0.7326418161392212, "num_chars": 2}, {"sum_logits": -1.4140229225158691, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4140229225158691, "logits_per_char": -0.7070114612579346, "num_chars": 2}, {"sum_logits": -1.4108607769012451, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4108607769012451, "logits_per_char": -0.7054303884506226, "num_chars": 2}, {"sum_logits": -1.2783397436141968, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.2783397436141968, "logits_per_char": -0.6391698718070984, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1098, "native_id": "Mercury_184730", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3913880586624146, "incorrect_loss_raw": 1.3909773031870525, "correct_loss_per_char": 0.6956940293312073, "incorrect_loss_per_char": 0.6954886515935262, "correct_loss_per_token": 1.3913880586624146, "incorrect_loss_per_token": 1.3909773031870525, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3505460023880005, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": true, "logits_per_token": -1.3505460023880005, "logits_per_char": -0.6752730011940002, "num_chars": 2}, {"sum_logits": -1.3913880586624146, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.3913880586624146, "logits_per_char": -0.6956940293312073, "num_chars": 2}, {"sum_logits": -1.3805221319198608, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.3805221319198608, "logits_per_char": -0.6902610659599304, "num_chars": 2}, {"sum_logits": -1.441863775253296, "num_tokens": 1, "num_tokens_all": 401, "is_greedy": false, "logits_per_token": -1.441863775253296, "logits_per_char": -0.720931887626648, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1099, "native_id": "Mercury_SC_401265", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.425911545753479, "incorrect_loss_raw": 1.3822964429855347, "correct_loss_per_char": 0.7129557728767395, "incorrect_loss_per_char": 0.6911482214927673, "correct_loss_per_token": 1.425911545753479, "incorrect_loss_per_token": 1.3822964429855347, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2802059650421143, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.2802059650421143, "logits_per_char": -0.6401029825210571, "num_chars": 2}, {"sum_logits": -1.375040888786316, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.375040888786316, "logits_per_char": -0.687520444393158, "num_chars": 2}, {"sum_logits": -1.4916424751281738, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4916424751281738, "logits_per_char": -0.7458212375640869, "num_chars": 2}, {"sum_logits": -1.425911545753479, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.425911545753479, "logits_per_char": -0.7129557728767395, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1100, "native_id": "OHAT_2009_8_34", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2344450950622559, "incorrect_loss_raw": 1.4486524264017742, "correct_loss_per_char": 0.6172225475311279, "incorrect_loss_per_char": 0.7243262132008871, "correct_loss_per_token": 1.2344450950622559, "incorrect_loss_per_token": 1.4486524264017742, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4815882444381714, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4815882444381714, "logits_per_char": -0.7407941222190857, "num_chars": 2}, {"sum_logits": -1.4010995626449585, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4010995626449585, "logits_per_char": -0.7005497813224792, "num_chars": 2}, {"sum_logits": -1.4632694721221924, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.4632694721221924, "logits_per_char": -0.7316347360610962, "num_chars": 2}, {"sum_logits": -1.2344450950622559, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.2344450950622559, "logits_per_char": -0.6172225475311279, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1101, "native_id": "Mercury_406639", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4434701204299927, "incorrect_loss_raw": 1.3780375719070435, "correct_loss_per_char": 0.7217350602149963, "incorrect_loss_per_char": 0.6890187859535217, "correct_loss_per_token": 1.4434701204299927, "incorrect_loss_per_token": 1.3780375719070435, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.484570026397705, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.484570026397705, "logits_per_char": -0.7422850131988525, "num_chars": 2}, {"sum_logits": -1.4434701204299927, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4434701204299927, "logits_per_char": -0.7217350602149963, "num_chars": 2}, {"sum_logits": -1.4135773181915283, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4135773181915283, "logits_per_char": -0.7067886590957642, "num_chars": 2}, {"sum_logits": -1.235965371131897, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.235965371131897, "logits_per_char": -0.6179826855659485, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1102, "native_id": "Mercury_7008610", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.387046456336975, "incorrect_loss_raw": 1.394417643547058, "correct_loss_per_char": 0.6935232281684875, "incorrect_loss_per_char": 0.697208821773529, "correct_loss_per_token": 1.387046456336975, "incorrect_loss_per_token": 1.394417643547058, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.299863576889038, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.299863576889038, "logits_per_char": -0.649931788444519, "num_chars": 2}, {"sum_logits": -1.4310182332992554, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4310182332992554, "logits_per_char": -0.7155091166496277, "num_chars": 2}, {"sum_logits": -1.4523711204528809, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4523711204528809, "logits_per_char": -0.7261855602264404, "num_chars": 2}, {"sum_logits": -1.387046456336975, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.387046456336975, "logits_per_char": -0.6935232281684875, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1103, "native_id": "MCAS_2009_8_12", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4412230253219604, "incorrect_loss_raw": 1.3755098978678386, "correct_loss_per_char": 0.7206115126609802, "incorrect_loss_per_char": 0.6877549489339193, "correct_loss_per_token": 1.4412230253219604, "incorrect_loss_per_token": 1.3755098978678386, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4412230253219604, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4412230253219604, "logits_per_char": -0.7206115126609802, "num_chars": 2}, {"sum_logits": -1.4027162790298462, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4027162790298462, "logits_per_char": -0.7013581395149231, "num_chars": 2}, {"sum_logits": -1.3846981525421143, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3846981525421143, "logits_per_char": -0.6923490762710571, "num_chars": 2}, {"sum_logits": -1.3391152620315552, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.3391152620315552, "logits_per_char": -0.6695576310157776, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1104, "native_id": "MCAS_2005_8_12", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.384428858757019, "incorrect_loss_raw": 1.3943270444869995, "correct_loss_per_char": 0.6922144293785095, "incorrect_loss_per_char": 0.6971635222434998, "correct_loss_per_token": 1.384428858757019, "incorrect_loss_per_token": 1.3943270444869995, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4434031248092651, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.4434031248092651, "logits_per_char": -0.7217015624046326, "num_chars": 2}, {"sum_logits": -1.3769110441207886, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3769110441207886, "logits_per_char": -0.6884555220603943, "num_chars": 2}, {"sum_logits": -1.384428858757019, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.384428858757019, "logits_per_char": -0.6922144293785095, "num_chars": 2}, {"sum_logits": -1.3626669645309448, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.3626669645309448, "logits_per_char": -0.6813334822654724, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1105, "native_id": "ACTAAP_2008_7_4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4473211765289307, "incorrect_loss_raw": 1.3801090717315674, "correct_loss_per_char": 0.7236605882644653, "incorrect_loss_per_char": 0.6900545358657837, "correct_loss_per_token": 1.4473211765289307, "incorrect_loss_per_token": 1.3801090717315674, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5474700927734375, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.5474700927734375, "logits_per_char": -0.7737350463867188, "num_chars": 2}, {"sum_logits": -1.4473211765289307, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4473211765289307, "logits_per_char": -0.7236605882644653, "num_chars": 2}, {"sum_logits": -1.3486889600753784, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3486889600753784, "logits_per_char": -0.6743444800376892, "num_chars": 2}, {"sum_logits": -1.2441681623458862, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.2441681623458862, "logits_per_char": -0.6220840811729431, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1106, "native_id": "NYSEDREGENTS_2008_4_3", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2097244262695312, "incorrect_loss_raw": 1.460094730059306, "correct_loss_per_char": 0.6048622131347656, "incorrect_loss_per_char": 0.730047365029653, "correct_loss_per_token": 1.2097244262695312, "incorrect_loss_per_token": 1.460094730059306, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.431959867477417, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.431959867477417, "logits_per_char": -0.7159799337387085, "num_chars": 2}, {"sum_logits": -1.5265274047851562, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5265274047851562, "logits_per_char": -0.7632637023925781, "num_chars": 2}, {"sum_logits": -1.4217969179153442, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4217969179153442, "logits_per_char": -0.7108984589576721, "num_chars": 2}, {"sum_logits": -1.2097244262695312, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.2097244262695312, "logits_per_char": -0.6048622131347656, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1107, "native_id": "Mercury_SC_416181", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3261258602142334, "incorrect_loss_raw": 1.4165504376093547, "correct_loss_per_char": 0.6630629301071167, "incorrect_loss_per_char": 0.7082752188046774, "correct_loss_per_token": 1.3261258602142334, "incorrect_loss_per_token": 1.4165504376093547, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5301635265350342, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.5301635265350342, "logits_per_char": -0.7650817632675171, "num_chars": 2}, {"sum_logits": -1.3261258602142334, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3261258602142334, "logits_per_char": -0.6630629301071167, "num_chars": 2}, {"sum_logits": -1.4262423515319824, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4262423515319824, "logits_per_char": -0.7131211757659912, "num_chars": 2}, {"sum_logits": -1.2932454347610474, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.2932454347610474, "logits_per_char": -0.6466227173805237, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1108, "native_id": "NYSEDREGENTS_2010_4_30", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3484551906585693, "incorrect_loss_raw": 1.4269368648529053, "correct_loss_per_char": 0.6742275953292847, "incorrect_loss_per_char": 0.7134684324264526, "correct_loss_per_token": 1.3484551906585693, "incorrect_loss_per_token": 1.4269368648529053, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2170764207839966, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": true, "logits_per_token": -1.2170764207839966, "logits_per_char": -0.6085382103919983, "num_chars": 2}, {"sum_logits": -1.3484551906585693, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.3484551906585693, "logits_per_char": -0.6742275953292847, "num_chars": 2}, {"sum_logits": -1.686633825302124, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.686633825302124, "logits_per_char": -0.843316912651062, "num_chars": 2}, {"sum_logits": -1.3771003484725952, "num_tokens": 1, "num_tokens_all": 423, "is_greedy": false, "logits_per_token": -1.3771003484725952, "logits_per_char": -0.6885501742362976, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1109, "native_id": "Mercury_7025060", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2404261827468872, "incorrect_loss_raw": 1.4458479086558025, "correct_loss_per_char": 0.6202130913734436, "incorrect_loss_per_char": 0.7229239543279012, "correct_loss_per_token": 1.2404261827468872, "incorrect_loss_per_token": 1.4458479086558025, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.41166353225708, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.41166353225708, "logits_per_char": -0.70583176612854, "num_chars": 2}, {"sum_logits": -1.4777294397354126, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4777294397354126, "logits_per_char": -0.7388647198677063, "num_chars": 2}, {"sum_logits": -1.4481507539749146, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4481507539749146, "logits_per_char": -0.7240753769874573, "num_chars": 2}, {"sum_logits": -1.2404261827468872, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.2404261827468872, "logits_per_char": -0.6202130913734436, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1110, "native_id": "Mercury_SC_402103", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2822076082229614, "incorrect_loss_raw": 1.4307116667429607, "correct_loss_per_char": 0.6411038041114807, "incorrect_loss_per_char": 0.7153558333714803, "correct_loss_per_token": 1.2822076082229614, "incorrect_loss_per_token": 1.4307116667429607, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3925880193710327, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3925880193710327, "logits_per_char": -0.6962940096855164, "num_chars": 2}, {"sum_logits": -1.4871630668640137, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4871630668640137, "logits_per_char": -0.7435815334320068, "num_chars": 2}, {"sum_logits": -1.4123839139938354, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4123839139938354, "logits_per_char": -0.7061919569969177, "num_chars": 2}, {"sum_logits": -1.2822076082229614, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.2822076082229614, "logits_per_char": -0.6411038041114807, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1111, "native_id": "VASoL_2009_5_37", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4619792699813843, "incorrect_loss_raw": 1.3694509665171306, "correct_loss_per_char": 0.7309896349906921, "incorrect_loss_per_char": 0.6847254832585653, "correct_loss_per_token": 1.4619792699813843, "incorrect_loss_per_token": 1.3694509665171306, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4564298391342163, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.4564298391342163, "logits_per_char": -0.7282149195671082, "num_chars": 2}, {"sum_logits": -1.3594292402267456, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.3594292402267456, "logits_per_char": -0.6797146201133728, "num_chars": 2}, {"sum_logits": -1.4619792699813843, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": false, "logits_per_token": -1.4619792699813843, "logits_per_char": -0.7309896349906921, "num_chars": 2}, {"sum_logits": -1.2924938201904297, "num_tokens": 1, "num_tokens_all": 405, "is_greedy": true, "logits_per_token": -1.2924938201904297, "logits_per_char": -0.6462469100952148, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1112, "native_id": "Mercury_SC_402981", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4079593420028687, "incorrect_loss_raw": 1.3851947784423828, "correct_loss_per_char": 0.7039796710014343, "incorrect_loss_per_char": 0.6925973892211914, "correct_loss_per_token": 1.4079593420028687, "incorrect_loss_per_token": 1.3851947784423828, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3860869407653809, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3860869407653809, "logits_per_char": -0.6930434703826904, "num_chars": 2}, {"sum_logits": -1.3506548404693604, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.3506548404693604, "logits_per_char": -0.6753274202346802, "num_chars": 2}, {"sum_logits": -1.4188425540924072, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4188425540924072, "logits_per_char": -0.7094212770462036, "num_chars": 2}, {"sum_logits": -1.4079593420028687, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4079593420028687, "logits_per_char": -0.7039796710014343, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1113, "native_id": "NYSEDREGENTS_2008_8_5", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.333006739616394, "incorrect_loss_raw": 1.4228462775548298, "correct_loss_per_char": 0.666503369808197, "incorrect_loss_per_char": 0.7114231387774149, "correct_loss_per_token": 1.333006739616394, "incorrect_loss_per_token": 1.4228462775548298, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6012498140335083, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.6012498140335083, "logits_per_char": -0.8006249070167542, "num_chars": 2}, {"sum_logits": -1.333006739616394, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.333006739616394, "logits_per_char": -0.666503369808197, "num_chars": 2}, {"sum_logits": -1.3546340465545654, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": false, "logits_per_token": -1.3546340465545654, "logits_per_char": -0.6773170232772827, "num_chars": 2}, {"sum_logits": -1.312654972076416, "num_tokens": 1, "num_tokens_all": 353, "is_greedy": true, "logits_per_token": -1.312654972076416, "logits_per_char": -0.656327486038208, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1114, "native_id": "MCAS_1998_4_13", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3795543909072876, "incorrect_loss_raw": 1.3938213189442952, "correct_loss_per_char": 0.6897771954536438, "incorrect_loss_per_char": 0.6969106594721476, "correct_loss_per_token": 1.3795543909072876, "incorrect_loss_per_token": 1.3938213189442952, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3399823904037476, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.3399823904037476, "logits_per_char": -0.6699911952018738, "num_chars": 2}, {"sum_logits": -1.3795543909072876, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3795543909072876, "logits_per_char": -0.6897771954536438, "num_chars": 2}, {"sum_logits": -1.4317524433135986, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4317524433135986, "logits_per_char": -0.7158762216567993, "num_chars": 2}, {"sum_logits": -1.4097291231155396, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4097291231155396, "logits_per_char": -0.7048645615577698, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1115, "native_id": "MDSA_2008_8_20", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.353638768196106, "incorrect_loss_raw": 1.4041639566421509, "correct_loss_per_char": 0.676819384098053, "incorrect_loss_per_char": 0.7020819783210754, "correct_loss_per_token": 1.353638768196106, "incorrect_loss_per_token": 1.4041639566421509, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.322374939918518, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.322374939918518, "logits_per_char": -0.661187469959259, "num_chars": 2}, {"sum_logits": -1.4379665851593018, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4379665851593018, "logits_per_char": -0.7189832925796509, "num_chars": 2}, {"sum_logits": -1.353638768196106, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.353638768196106, "logits_per_char": -0.676819384098053, "num_chars": 2}, {"sum_logits": -1.4521503448486328, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4521503448486328, "logits_per_char": -0.7260751724243164, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1116, "native_id": "Mercury_SC_400134", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2727220058441162, "incorrect_loss_raw": 1.4332433541615803, "correct_loss_per_char": 0.6363610029220581, "incorrect_loss_per_char": 0.7166216770807902, "correct_loss_per_token": 1.2727220058441162, "incorrect_loss_per_token": 1.4332433541615803, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4489507675170898, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4489507675170898, "logits_per_char": -0.7244753837585449, "num_chars": 2}, {"sum_logits": -1.4103718996047974, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4103718996047974, "logits_per_char": -0.7051859498023987, "num_chars": 2}, {"sum_logits": -1.440407395362854, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.440407395362854, "logits_per_char": -0.720203697681427, "num_chars": 2}, {"sum_logits": -1.2727220058441162, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.2727220058441162, "logits_per_char": -0.6363610029220581, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1117, "native_id": "Mercury_SC_LBS10265", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3472791910171509, "incorrect_loss_raw": 1.4061936140060425, "correct_loss_per_char": 0.6736395955085754, "incorrect_loss_per_char": 0.7030968070030212, "correct_loss_per_token": 1.3472791910171509, "incorrect_loss_per_token": 1.4061936140060425, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4871150255203247, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.4871150255203247, "logits_per_char": -0.7435575127601624, "num_chars": 2}, {"sum_logits": -1.3466542959213257, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": true, "logits_per_token": -1.3466542959213257, "logits_per_char": -0.6733271479606628, "num_chars": 2}, {"sum_logits": -1.3472791910171509, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.3472791910171509, "logits_per_char": -0.6736395955085754, "num_chars": 2}, {"sum_logits": -1.384811520576477, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.384811520576477, "logits_per_char": -0.6924057602882385, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1118, "native_id": "Mercury_7188580", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4383631944656372, "incorrect_loss_raw": 1.3773978153864543, "correct_loss_per_char": 0.7191815972328186, "incorrect_loss_per_char": 0.6886989076932272, "correct_loss_per_token": 1.4383631944656372, "incorrect_loss_per_token": 1.3773978153864543, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4383631944656372, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.4383631944656372, "logits_per_char": -0.7191815972328186, "num_chars": 2}, {"sum_logits": -1.3278855085372925, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.3278855085372925, "logits_per_char": -0.6639427542686462, "num_chars": 2}, {"sum_logits": -1.4735125303268433, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.4735125303268433, "logits_per_char": -0.7367562651634216, "num_chars": 2}, {"sum_logits": -1.330795407295227, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.330795407295227, "logits_per_char": -0.6653977036476135, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1119, "native_id": "Mercury_402348", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.515873908996582, "incorrect_loss_raw": 1.3539396524429321, "correct_loss_per_char": 0.757936954498291, "incorrect_loss_per_char": 0.6769698262214661, "correct_loss_per_token": 1.515873908996582, "incorrect_loss_per_token": 1.3539396524429321, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.515873908996582, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.515873908996582, "logits_per_char": -0.757936954498291, "num_chars": 2}, {"sum_logits": -1.4017503261566162, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.4017503261566162, "logits_per_char": -0.7008751630783081, "num_chars": 2}, {"sum_logits": -1.346861720085144, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.346861720085144, "logits_per_char": -0.673430860042572, "num_chars": 2}, {"sum_logits": -1.3132069110870361, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.3132069110870361, "logits_per_char": -0.6566034555435181, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1120, "native_id": "Mercury_7030555", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4917690753936768, "incorrect_loss_raw": 1.3606210947036743, "correct_loss_per_char": 0.7458845376968384, "incorrect_loss_per_char": 0.6803105473518372, "correct_loss_per_token": 1.4917690753936768, "incorrect_loss_per_token": 1.3606210947036743, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3280431032180786, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.3280431032180786, "logits_per_char": -0.6640215516090393, "num_chars": 2}, {"sum_logits": -1.432619333267212, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.432619333267212, "logits_per_char": -0.716309666633606, "num_chars": 2}, {"sum_logits": -1.4917690753936768, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.4917690753936768, "logits_per_char": -0.7458845376968384, "num_chars": 2}, {"sum_logits": -1.3212008476257324, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.3212008476257324, "logits_per_char": -0.6606004238128662, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1121, "native_id": "Mercury_SC_415453", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5728018283843994, "incorrect_loss_raw": 1.3456088701883953, "correct_loss_per_char": 0.7864009141921997, "incorrect_loss_per_char": 0.6728044350941976, "correct_loss_per_token": 1.5728018283843994, "incorrect_loss_per_token": 1.3456088701883953, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5728018283843994, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.5728018283843994, "logits_per_char": -0.7864009141921997, "num_chars": 2}, {"sum_logits": -1.5245027542114258, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.5245027542114258, "logits_per_char": -0.7622513771057129, "num_chars": 2}, {"sum_logits": -1.3324259519577026, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3324259519577026, "logits_per_char": -0.6662129759788513, "num_chars": 2}, {"sum_logits": -1.1798979043960571, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.1798979043960571, "logits_per_char": -0.5899489521980286, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1122, "native_id": "Mercury_7074848", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.173321008682251, "incorrect_loss_raw": 1.476042906443278, "correct_loss_per_char": 0.5866605043411255, "incorrect_loss_per_char": 0.738021453221639, "correct_loss_per_token": 1.173321008682251, "incorrect_loss_per_token": 1.476042906443278, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.173321008682251, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": true, "logits_per_token": -1.173321008682251, "logits_per_char": -0.5866605043411255, "num_chars": 2}, {"sum_logits": -1.3868377208709717, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.3868377208709717, "logits_per_char": -0.6934188604354858, "num_chars": 2}, {"sum_logits": -1.5462968349456787, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.5462968349456787, "logits_per_char": -0.7731484174728394, "num_chars": 2}, {"sum_logits": -1.4949941635131836, "num_tokens": 1, "num_tokens_all": 359, "is_greedy": false, "logits_per_token": -1.4949941635131836, "logits_per_char": -0.7474970817565918, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1123, "native_id": "Mercury_SC_400582", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2776458263397217, "incorrect_loss_raw": 1.4339702526728313, "correct_loss_per_char": 0.6388229131698608, "incorrect_loss_per_char": 0.7169851263364156, "correct_loss_per_token": 1.2776458263397217, "incorrect_loss_per_token": 1.4339702526728313, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2776458263397217, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.2776458263397217, "logits_per_char": -0.6388229131698608, "num_chars": 2}, {"sum_logits": -1.350666880607605, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.350666880607605, "logits_per_char": -0.6753334403038025, "num_chars": 2}, {"sum_logits": -1.4136955738067627, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4136955738067627, "logits_per_char": -0.7068477869033813, "num_chars": 2}, {"sum_logits": -1.537548303604126, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.537548303604126, "logits_per_char": -0.768774151802063, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1124, "native_id": "Mercury_SC_401168", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3103808164596558, "incorrect_loss_raw": 1.42924165725708, "correct_loss_per_char": 0.6551904082298279, "incorrect_loss_per_char": 0.71462082862854, "correct_loss_per_token": 1.3103808164596558, "incorrect_loss_per_token": 1.42924165725708, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2332358360290527, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.2332358360290527, "logits_per_char": -0.6166179180145264, "num_chars": 2}, {"sum_logits": -1.524198055267334, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.524198055267334, "logits_per_char": -0.762099027633667, "num_chars": 2}, {"sum_logits": -1.3103808164596558, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3103808164596558, "logits_per_char": -0.6551904082298279, "num_chars": 2}, {"sum_logits": -1.5302910804748535, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.5302910804748535, "logits_per_char": -0.7651455402374268, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1125, "native_id": "Mercury_180828", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4703618288040161, "incorrect_loss_raw": 1.365851879119873, "correct_loss_per_char": 0.7351809144020081, "incorrect_loss_per_char": 0.6829259395599365, "correct_loss_per_token": 1.4703618288040161, "incorrect_loss_per_token": 1.365851879119873, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3788520097732544, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3788520097732544, "logits_per_char": -0.6894260048866272, "num_chars": 2}, {"sum_logits": -1.4005153179168701, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4005153179168701, "logits_per_char": -0.7002576589584351, "num_chars": 2}, {"sum_logits": -1.4703618288040161, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4703618288040161, "logits_per_char": -0.7351809144020081, "num_chars": 2}, {"sum_logits": -1.3181883096694946, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.3181883096694946, "logits_per_char": -0.6590941548347473, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1126, "native_id": "FCAT_2008_5_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.365841031074524, "incorrect_loss_raw": 1.3981274366378784, "correct_loss_per_char": 0.682920515537262, "incorrect_loss_per_char": 0.6990637183189392, "correct_loss_per_token": 1.365841031074524, "incorrect_loss_per_token": 1.3981274366378784, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.365841031074524, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": true, "logits_per_token": -1.365841031074524, "logits_per_char": -0.682920515537262, "num_chars": 2}, {"sum_logits": -1.4049433469772339, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.4049433469772339, "logits_per_char": -0.7024716734886169, "num_chars": 2}, {"sum_logits": -1.3992968797683716, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3992968797683716, "logits_per_char": -0.6996484398841858, "num_chars": 2}, {"sum_logits": -1.3901420831680298, "num_tokens": 1, "num_tokens_all": 383, "is_greedy": false, "logits_per_token": -1.3901420831680298, "logits_per_char": -0.6950710415840149, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1127, "native_id": "TAKS_2009_5_25", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4201797246932983, "incorrect_loss_raw": 1.3824939727783203, "correct_loss_per_char": 0.7100898623466492, "incorrect_loss_per_char": 0.6912469863891602, "correct_loss_per_token": 1.4201797246932983, "incorrect_loss_per_token": 1.3824939727783203, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4612374305725098, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4612374305725098, "logits_per_char": -0.7306187152862549, "num_chars": 2}, {"sum_logits": -1.4029209613800049, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4029209613800049, "logits_per_char": -0.7014604806900024, "num_chars": 2}, {"sum_logits": -1.4201797246932983, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4201797246932983, "logits_per_char": -0.7100898623466492, "num_chars": 2}, {"sum_logits": -1.2833235263824463, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.2833235263824463, "logits_per_char": -0.6416617631912231, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1128, "native_id": "Mercury_SC_LBS10392", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3968403339385986, "incorrect_loss_raw": 1.391398549079895, "correct_loss_per_char": 0.6984201669692993, "incorrect_loss_per_char": 0.6956992745399475, "correct_loss_per_token": 1.3968403339385986, "incorrect_loss_per_token": 1.391398549079895, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3968403339385986, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3968403339385986, "logits_per_char": -0.6984201669692993, "num_chars": 2}, {"sum_logits": -1.3961905241012573, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3961905241012573, "logits_per_char": -0.6980952620506287, "num_chars": 2}, {"sum_logits": -1.48599112033844, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.48599112033844, "logits_per_char": -0.74299556016922, "num_chars": 2}, {"sum_logits": -1.2920140027999878, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.2920140027999878, "logits_per_char": -0.6460070013999939, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1129, "native_id": "Mercury_7212905", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3610810041427612, "incorrect_loss_raw": 1.4043190876642864, "correct_loss_per_char": 0.6805405020713806, "incorrect_loss_per_char": 0.7021595438321432, "correct_loss_per_token": 1.3610810041427612, "incorrect_loss_per_token": 1.4043190876642864, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4980844259262085, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.4980844259262085, "logits_per_char": -0.7490422129631042, "num_chars": 2}, {"sum_logits": -1.3610810041427612, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.3610810041427612, "logits_per_char": -0.6805405020713806, "num_chars": 2}, {"sum_logits": -1.433342695236206, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.433342695236206, "logits_per_char": -0.716671347618103, "num_chars": 2}, {"sum_logits": -1.2815301418304443, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": true, "logits_per_token": -1.2815301418304443, "logits_per_char": -0.6407650709152222, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1130, "native_id": "Mercury_7212888", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4256395101547241, "incorrect_loss_raw": 1.3803367614746094, "correct_loss_per_char": 0.7128197550773621, "incorrect_loss_per_char": 0.6901683807373047, "correct_loss_per_token": 1.4256395101547241, "incorrect_loss_per_token": 1.3803367614746094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4256395101547241, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4256395101547241, "logits_per_char": -0.7128197550773621, "num_chars": 2}, {"sum_logits": -1.3874143362045288, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.3874143362045288, "logits_per_char": -0.6937071681022644, "num_chars": 2}, {"sum_logits": -1.4546879529953003, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4546879529953003, "logits_per_char": -0.7273439764976501, "num_chars": 2}, {"sum_logits": -1.298907995223999, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.298907995223999, "logits_per_char": -0.6494539976119995, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1131, "native_id": "MDSA_2007_8_42", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.300402283668518, "incorrect_loss_raw": 1.4242687622706096, "correct_loss_per_char": 0.650201141834259, "incorrect_loss_per_char": 0.7121343811353048, "correct_loss_per_token": 1.300402283668518, "incorrect_loss_per_token": 1.4242687622706096, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5002961158752441, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.5002961158752441, "logits_per_char": -0.7501480579376221, "num_chars": 2}, {"sum_logits": -1.397384762763977, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.397384762763977, "logits_per_char": -0.6986923813819885, "num_chars": 2}, {"sum_logits": -1.3751254081726074, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.3751254081726074, "logits_per_char": -0.6875627040863037, "num_chars": 2}, {"sum_logits": -1.300402283668518, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.300402283668518, "logits_per_char": -0.650201141834259, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1132, "native_id": "Mercury_SC_415534", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4166083335876465, "incorrect_loss_raw": 1.3835978905359905, "correct_loss_per_char": 0.7083041667938232, "incorrect_loss_per_char": 0.6917989452679952, "correct_loss_per_token": 1.4166083335876465, "incorrect_loss_per_token": 1.3835978905359905, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.388780951499939, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.388780951499939, "logits_per_char": -0.6943904757499695, "num_chars": 2}, {"sum_logits": -1.4166083335876465, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4166083335876465, "logits_per_char": -0.7083041667938232, "num_chars": 2}, {"sum_logits": -1.3510315418243408, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.3510315418243408, "logits_per_char": -0.6755157709121704, "num_chars": 2}, {"sum_logits": -1.4109811782836914, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4109811782836914, "logits_per_char": -0.7054905891418457, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1133, "native_id": "Mercury_7213413", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3244142532348633, "incorrect_loss_raw": 1.4142796993255615, "correct_loss_per_char": 0.6622071266174316, "incorrect_loss_per_char": 0.7071398496627808, "correct_loss_per_token": 1.3244142532348633, "incorrect_loss_per_token": 1.4142796993255615, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3597649335861206, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.3597649335861206, "logits_per_char": -0.6798824667930603, "num_chars": 2}, {"sum_logits": -1.431095004081726, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.431095004081726, "logits_per_char": -0.715547502040863, "num_chars": 2}, {"sum_logits": -1.451979160308838, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.451979160308838, "logits_per_char": -0.725989580154419, "num_chars": 2}, {"sum_logits": -1.3244142532348633, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.3244142532348633, "logits_per_char": -0.6622071266174316, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1134, "native_id": "Mercury_7068635", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.457375407218933, "incorrect_loss_raw": 1.4101951519648235, "correct_loss_per_char": 0.7286877036094666, "incorrect_loss_per_char": 0.7050975759824117, "correct_loss_per_token": 1.457375407218933, "incorrect_loss_per_token": 1.4101951519648235, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7053332328796387, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.7053332328796387, "logits_per_char": -0.8526666164398193, "num_chars": 2}, {"sum_logits": -1.4903918504714966, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4903918504714966, "logits_per_char": -0.7451959252357483, "num_chars": 2}, {"sum_logits": -1.457375407218933, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.457375407218933, "logits_per_char": -0.7286877036094666, "num_chars": 2}, {"sum_logits": -1.034860372543335, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.034860372543335, "logits_per_char": -0.5174301862716675, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1135, "native_id": "Mercury_417137", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3416023254394531, "incorrect_loss_raw": 1.4101862907409668, "correct_loss_per_char": 0.6708011627197266, "incorrect_loss_per_char": 0.7050931453704834, "correct_loss_per_token": 1.3416023254394531, "incorrect_loss_per_token": 1.4101862907409668, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.354414939880371, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.354414939880371, "logits_per_char": -0.6772074699401855, "num_chars": 2}, {"sum_logits": -1.4657080173492432, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.4657080173492432, "logits_per_char": -0.7328540086746216, "num_chars": 2}, {"sum_logits": -1.3416023254394531, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.3416023254394531, "logits_per_char": -0.6708011627197266, "num_chars": 2}, {"sum_logits": -1.4104359149932861, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.4104359149932861, "logits_per_char": -0.7052179574966431, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1136, "native_id": "Mercury_7268258", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4868870973587036, "incorrect_loss_raw": 1.371715744336446, "correct_loss_per_char": 0.7434435486793518, "incorrect_loss_per_char": 0.685857872168223, "correct_loss_per_token": 1.4868870973587036, "incorrect_loss_per_token": 1.371715744336446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2393903732299805, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.2393903732299805, "logits_per_char": -0.6196951866149902, "num_chars": 2}, {"sum_logits": -1.3167026042938232, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3167026042938232, "logits_per_char": -0.6583513021469116, "num_chars": 2}, {"sum_logits": -1.4868870973587036, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4868870973587036, "logits_per_char": -0.7434435486793518, "num_chars": 2}, {"sum_logits": -1.5590542554855347, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.5590542554855347, "logits_per_char": -0.7795271277427673, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1137, "native_id": "NAEP_2005_4_S13+14", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.429315447807312, "incorrect_loss_raw": 1.3833896319071453, "correct_loss_per_char": 0.714657723903656, "incorrect_loss_per_char": 0.6916948159535726, "correct_loss_per_token": 1.429315447807312, "incorrect_loss_per_token": 1.3833896319071453, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5014315843582153, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.5014315843582153, "logits_per_char": -0.7507157921791077, "num_chars": 2}, {"sum_logits": -1.412043809890747, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.412043809890747, "logits_per_char": -0.7060219049453735, "num_chars": 2}, {"sum_logits": -1.429315447807312, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.429315447807312, "logits_per_char": -0.714657723903656, "num_chars": 2}, {"sum_logits": -1.2366935014724731, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": true, "logits_per_token": -1.2366935014724731, "logits_per_char": -0.6183467507362366, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1138, "native_id": "Mercury_SC_406089", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.354691982269287, "incorrect_loss_raw": 1.4032629330952961, "correct_loss_per_char": 0.6773459911346436, "incorrect_loss_per_char": 0.7016314665476481, "correct_loss_per_token": 1.354691982269287, "incorrect_loss_per_token": 1.4032629330952961, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3967732191085815, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3967732191085815, "logits_per_char": -0.6983866095542908, "num_chars": 2}, {"sum_logits": -1.3871921300888062, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3871921300888062, "logits_per_char": -0.6935960650444031, "num_chars": 2}, {"sum_logits": -1.425823450088501, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.425823450088501, "logits_per_char": -0.7129117250442505, "num_chars": 2}, {"sum_logits": -1.354691982269287, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.354691982269287, "logits_per_char": -0.6773459911346436, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1139, "native_id": "Mercury_SC_400700", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4781447649002075, "incorrect_loss_raw": 1.3647699356079102, "correct_loss_per_char": 0.7390723824501038, "incorrect_loss_per_char": 0.6823849678039551, "correct_loss_per_token": 1.4781447649002075, "incorrect_loss_per_token": 1.3647699356079102, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2697181701660156, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.2697181701660156, "logits_per_char": -0.6348590850830078, "num_chars": 2}, {"sum_logits": -1.4265117645263672, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4265117645263672, "logits_per_char": -0.7132558822631836, "num_chars": 2}, {"sum_logits": -1.4781447649002075, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4781447649002075, "logits_per_char": -0.7390723824501038, "num_chars": 2}, {"sum_logits": -1.3980798721313477, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3980798721313477, "logits_per_char": -0.6990399360656738, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1140, "native_id": "Mercury_7223493", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3605029582977295, "incorrect_loss_raw": 1.4033575057983398, "correct_loss_per_char": 0.6802514791488647, "incorrect_loss_per_char": 0.7016787528991699, "correct_loss_per_token": 1.3605029582977295, "incorrect_loss_per_token": 1.4033575057983398, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3605029582977295, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.3605029582977295, "logits_per_char": -0.6802514791488647, "num_chars": 2}, {"sum_logits": -1.4195698499679565, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.4195698499679565, "logits_per_char": -0.7097849249839783, "num_chars": 2}, {"sum_logits": -1.5049326419830322, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": false, "logits_per_token": -1.5049326419830322, "logits_per_char": -0.7524663209915161, "num_chars": 2}, {"sum_logits": -1.2855700254440308, "num_tokens": 1, "num_tokens_all": 393, "is_greedy": true, "logits_per_token": -1.2855700254440308, "logits_per_char": -0.6427850127220154, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1141, "native_id": "Mercury_SC_405928", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1999820470809937, "incorrect_loss_raw": 1.4622018337249756, "correct_loss_per_char": 0.5999910235404968, "incorrect_loss_per_char": 0.7311009168624878, "correct_loss_per_token": 1.1999820470809937, "incorrect_loss_per_token": 1.4622018337249756, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.462398648262024, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.462398648262024, "logits_per_char": -0.731199324131012, "num_chars": 2}, {"sum_logits": -1.4501757621765137, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4501757621765137, "logits_per_char": -0.7250878810882568, "num_chars": 2}, {"sum_logits": -1.4740310907363892, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4740310907363892, "logits_per_char": -0.7370155453681946, "num_chars": 2}, {"sum_logits": -1.1999820470809937, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.1999820470809937, "logits_per_char": -0.5999910235404968, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1142, "native_id": "MCAS_2009_5_6518", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.410266637802124, "incorrect_loss_raw": 1.3838378588358562, "correct_loss_per_char": 0.705133318901062, "incorrect_loss_per_char": 0.6919189294179281, "correct_loss_per_token": 1.410266637802124, "incorrect_loss_per_token": 1.3838378588358562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3397648334503174, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.3397648334503174, "logits_per_char": -0.6698824167251587, "num_chars": 2}, {"sum_logits": -1.375016450881958, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.375016450881958, "logits_per_char": -0.687508225440979, "num_chars": 2}, {"sum_logits": -1.436732292175293, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.436732292175293, "logits_per_char": -0.7183661460876465, "num_chars": 2}, {"sum_logits": -1.410266637802124, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.410266637802124, "logits_per_char": -0.705133318901062, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1143, "native_id": "MCAS_2006_9_1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.368569254875183, "incorrect_loss_raw": 1.397365967432658, "correct_loss_per_char": 0.6842846274375916, "incorrect_loss_per_char": 0.698682983716329, "correct_loss_per_token": 1.368569254875183, "incorrect_loss_per_token": 1.397365967432658, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4290635585784912, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.4290635585784912, "logits_per_char": -0.7145317792892456, "num_chars": 2}, {"sum_logits": -1.368569254875183, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": true, "logits_per_token": -1.368569254875183, "logits_per_char": -0.6842846274375916, "num_chars": 2}, {"sum_logits": -1.3887443542480469, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.3887443542480469, "logits_per_char": -0.6943721771240234, "num_chars": 2}, {"sum_logits": -1.3742899894714355, "num_tokens": 1, "num_tokens_all": 400, "is_greedy": false, "logits_per_token": -1.3742899894714355, "logits_per_char": -0.6871449947357178, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1144, "native_id": "Mercury_7239383", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3453959226608276, "incorrect_loss_raw": 1.4066929419835408, "correct_loss_per_char": 0.6726979613304138, "incorrect_loss_per_char": 0.7033464709917704, "correct_loss_per_token": 1.3453959226608276, "incorrect_loss_per_token": 1.4066929419835408, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4372402429580688, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.4372402429580688, "logits_per_char": -0.7186201214790344, "num_chars": 2}, {"sum_logits": -1.3453959226608276, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": true, "logits_per_token": -1.3453959226608276, "logits_per_char": -0.6726979613304138, "num_chars": 2}, {"sum_logits": -1.3985447883605957, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.3985447883605957, "logits_per_char": -0.6992723941802979, "num_chars": 2}, {"sum_logits": -1.384293794631958, "num_tokens": 1, "num_tokens_all": 386, "is_greedy": false, "logits_per_token": -1.384293794631958, "logits_per_char": -0.692146897315979, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1145, "native_id": "Mercury_SC_400130", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.327688455581665, "incorrect_loss_raw": 1.4263606468836467, "correct_loss_per_char": 0.6638442277908325, "incorrect_loss_per_char": 0.7131803234418234, "correct_loss_per_token": 1.327688455581665, "incorrect_loss_per_token": 1.4263606468836467, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2144992351531982, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.2144992351531982, "logits_per_char": -0.6072496175765991, "num_chars": 2}, {"sum_logits": -1.327688455581665, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.327688455581665, "logits_per_char": -0.6638442277908325, "num_chars": 2}, {"sum_logits": -1.4341942071914673, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4341942071914673, "logits_per_char": -0.7170971035957336, "num_chars": 2}, {"sum_logits": -1.6303884983062744, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.6303884983062744, "logits_per_char": -0.8151942491531372, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1146, "native_id": "Mercury_401426", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3433047533035278, "incorrect_loss_raw": 1.4072943925857544, "correct_loss_per_char": 0.6716523766517639, "incorrect_loss_per_char": 0.7036471962928772, "correct_loss_per_token": 1.3433047533035278, "incorrect_loss_per_token": 1.4072943925857544, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.397585153579712, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.397585153579712, "logits_per_char": -0.698792576789856, "num_chars": 2}, {"sum_logits": -1.3780163526535034, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3780163526535034, "logits_per_char": -0.6890081763267517, "num_chars": 2}, {"sum_logits": -1.4462816715240479, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4462816715240479, "logits_per_char": -0.7231408357620239, "num_chars": 2}, {"sum_logits": -1.3433047533035278, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.3433047533035278, "logits_per_char": -0.6716523766517639, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1147, "native_id": "MCAS_2010_8_12016", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.419445276260376, "incorrect_loss_raw": 1.393527865409851, "correct_loss_per_char": 0.709722638130188, "incorrect_loss_per_char": 0.6967639327049255, "correct_loss_per_token": 1.419445276260376, "incorrect_loss_per_token": 1.393527865409851, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6075578927993774, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.6075578927993774, "logits_per_char": -0.8037789463996887, "num_chars": 2}, {"sum_logits": -1.3064308166503906, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.3064308166503906, "logits_per_char": -0.6532154083251953, "num_chars": 2}, {"sum_logits": -1.419445276260376, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": false, "logits_per_token": -1.419445276260376, "logits_per_char": -0.709722638130188, "num_chars": 2}, {"sum_logits": -1.2665948867797852, "num_tokens": 1, "num_tokens_all": 410, "is_greedy": true, "logits_per_token": -1.2665948867797852, "logits_per_char": -0.6332974433898926, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1148, "native_id": "Mercury_SC_400324", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4604592323303223, "incorrect_loss_raw": 1.3742716312408447, "correct_loss_per_char": 0.7302296161651611, "incorrect_loss_per_char": 0.6871358156204224, "correct_loss_per_token": 1.4604592323303223, "incorrect_loss_per_token": 1.3742716312408447, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.448367953300476, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.448367953300476, "logits_per_char": -0.724183976650238, "num_chars": 2}, {"sum_logits": -1.4604592323303223, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4604592323303223, "logits_per_char": -0.7302296161651611, "num_chars": 2}, {"sum_logits": -1.4595555067062378, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": false, "logits_per_token": -1.4595555067062378, "logits_per_char": -0.7297777533531189, "num_chars": 2}, {"sum_logits": -1.2148914337158203, "num_tokens": 1, "num_tokens_all": 382, "is_greedy": true, "logits_per_token": -1.2148914337158203, "logits_per_char": -0.6074457168579102, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1149, "native_id": "Mercury_SC_LBS10662", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3563231229782104, "incorrect_loss_raw": 1.4067633549372356, "correct_loss_per_char": 0.6781615614891052, "incorrect_loss_per_char": 0.7033816774686178, "correct_loss_per_token": 1.3563231229782104, "incorrect_loss_per_token": 1.4067633549372356, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3433252573013306, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3433252573013306, "logits_per_char": -0.6716626286506653, "num_chars": 2}, {"sum_logits": -1.3252395391464233, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.3252395391464233, "logits_per_char": -0.6626197695732117, "num_chars": 2}, {"sum_logits": -1.3563231229782104, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3563231229782104, "logits_per_char": -0.6781615614891052, "num_chars": 2}, {"sum_logits": -1.5517252683639526, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.5517252683639526, "logits_per_char": -0.7758626341819763, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1150, "native_id": "VASoL_2009_3_8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.379908561706543, "incorrect_loss_raw": 1.400240421295166, "correct_loss_per_char": 0.6899542808532715, "incorrect_loss_per_char": 0.700120210647583, "correct_loss_per_token": 1.379908561706543, "incorrect_loss_per_token": 1.400240421295166, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3814011812210083, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3814011812210083, "logits_per_char": -0.6907005906105042, "num_chars": 2}, {"sum_logits": -1.3255643844604492, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3255643844604492, "logits_per_char": -0.6627821922302246, "num_chars": 2}, {"sum_logits": -1.4937556982040405, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4937556982040405, "logits_per_char": -0.7468778491020203, "num_chars": 2}, {"sum_logits": -1.379908561706543, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.379908561706543, "logits_per_char": -0.6899542808532715, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1151, "native_id": "Mercury_SC_401185", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4355299472808838, "incorrect_loss_raw": 1.3783409198125203, "correct_loss_per_char": 0.7177649736404419, "incorrect_loss_per_char": 0.6891704599062601, "correct_loss_per_token": 1.4355299472808838, "incorrect_loss_per_token": 1.3783409198125203, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3044527769088745, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": true, "logits_per_token": -1.3044527769088745, "logits_per_char": -0.6522263884544373, "num_chars": 2}, {"sum_logits": -1.3424831628799438, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.3424831628799438, "logits_per_char": -0.6712415814399719, "num_chars": 2}, {"sum_logits": -1.4355299472808838, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.4355299472808838, "logits_per_char": -0.7177649736404419, "num_chars": 2}, {"sum_logits": -1.4880868196487427, "num_tokens": 1, "num_tokens_all": 398, "is_greedy": false, "logits_per_token": -1.4880868196487427, "logits_per_char": -0.7440434098243713, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1152, "native_id": "NYSEDREGENTS_2015_8_29", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.403590202331543, "incorrect_loss_raw": 1.3898055950800579, "correct_loss_per_char": 0.7017951011657715, "incorrect_loss_per_char": 0.6949027975400289, "correct_loss_per_token": 1.403590202331543, "incorrect_loss_per_token": 1.3898055950800579, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.312829613685608, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.312829613685608, "logits_per_char": -0.656414806842804, "num_chars": 2}, {"sum_logits": -1.3535248041152954, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3535248041152954, "logits_per_char": -0.6767624020576477, "num_chars": 2}, {"sum_logits": -1.403590202331543, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.403590202331543, "logits_per_char": -0.7017951011657715, "num_chars": 2}, {"sum_logits": -1.50306236743927, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.50306236743927, "logits_per_char": -0.751531183719635, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1153, "native_id": "Mercury_7234378", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2498341798782349, "incorrect_loss_raw": 1.4449224869410198, "correct_loss_per_char": 0.6249170899391174, "incorrect_loss_per_char": 0.7224612434705099, "correct_loss_per_token": 1.2498341798782349, "incorrect_loss_per_token": 1.4449224869410198, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4043158292770386, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.4043158292770386, "logits_per_char": -0.7021579146385193, "num_chars": 2}, {"sum_logits": -1.5615438222885132, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.5615438222885132, "logits_per_char": -0.7807719111442566, "num_chars": 2}, {"sum_logits": -1.3689078092575073, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": false, "logits_per_token": -1.3689078092575073, "logits_per_char": -0.6844539046287537, "num_chars": 2}, {"sum_logits": -1.2498341798782349, "num_tokens": 1, "num_tokens_all": 379, "is_greedy": true, "logits_per_token": -1.2498341798782349, "logits_per_char": -0.6249170899391174, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1154, "native_id": "ACTAAP_2014_7_3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.400515079498291, "incorrect_loss_raw": 1.3892884651819866, "correct_loss_per_char": 0.7002575397491455, "incorrect_loss_per_char": 0.6946442325909933, "correct_loss_per_token": 1.400515079498291, "incorrect_loss_per_token": 1.3892884651819866, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.332218885421753, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": true, "logits_per_token": -1.332218885421753, "logits_per_char": -0.6661094427108765, "num_chars": 2}, {"sum_logits": -1.3800729513168335, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.3800729513168335, "logits_per_char": -0.6900364756584167, "num_chars": 2}, {"sum_logits": -1.455573558807373, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.455573558807373, "logits_per_char": -0.7277867794036865, "num_chars": 2}, {"sum_logits": -1.400515079498291, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.400515079498291, "logits_per_char": -0.7002575397491455, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1155, "native_id": "MDSA_2008_8_27", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.431727409362793, "incorrect_loss_raw": 1.377048373222351, "correct_loss_per_char": 0.7158637046813965, "incorrect_loss_per_char": 0.6885241866111755, "correct_loss_per_token": 1.431727409362793, "incorrect_loss_per_token": 1.377048373222351, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3446234464645386, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.3446234464645386, "logits_per_char": -0.6723117232322693, "num_chars": 2}, {"sum_logits": -1.3818913698196411, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.3818913698196411, "logits_per_char": -0.6909456849098206, "num_chars": 2}, {"sum_logits": -1.4046303033828735, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4046303033828735, "logits_per_char": -0.7023151516914368, "num_chars": 2}, {"sum_logits": -1.431727409362793, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.431727409362793, "logits_per_char": -0.7158637046813965, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1156, "native_id": "Mercury_7004725", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3040755987167358, "incorrect_loss_raw": 1.4205020666122437, "correct_loss_per_char": 0.6520377993583679, "incorrect_loss_per_char": 0.7102510333061218, "correct_loss_per_token": 1.3040755987167358, "incorrect_loss_per_token": 1.4205020666122437, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3040755987167358, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.3040755987167358, "logits_per_char": -0.6520377993583679, "num_chars": 2}, {"sum_logits": -1.4189389944076538, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4189389944076538, "logits_per_char": -0.7094694972038269, "num_chars": 2}, {"sum_logits": -1.4545862674713135, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4545862674713135, "logits_per_char": -0.7272931337356567, "num_chars": 2}, {"sum_logits": -1.3879809379577637, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3879809379577637, "logits_per_char": -0.6939904689788818, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1157, "native_id": "Mercury_405143", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4177805185317993, "incorrect_loss_raw": 1.3858391841252644, "correct_loss_per_char": 0.7088902592658997, "incorrect_loss_per_char": 0.6929195920626322, "correct_loss_per_token": 1.4177805185317993, "incorrect_loss_per_token": 1.3858391841252644, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4754433631896973, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4754433631896973, "logits_per_char": -0.7377216815948486, "num_chars": 2}, {"sum_logits": -1.4408570528030396, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4408570528030396, "logits_per_char": -0.7204285264015198, "num_chars": 2}, {"sum_logits": -1.4177805185317993, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4177805185317993, "logits_per_char": -0.7088902592658997, "num_chars": 2}, {"sum_logits": -1.2412171363830566, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.2412171363830566, "logits_per_char": -0.6206085681915283, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1158, "native_id": "MCAS_2003_8_7", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3914457559585571, "incorrect_loss_raw": 1.3916761080423992, "correct_loss_per_char": 0.6957228779792786, "incorrect_loss_per_char": 0.6958380540211996, "correct_loss_per_token": 1.3914457559585571, "incorrect_loss_per_token": 1.3916761080423992, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3914457559585571, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.3914457559585571, "logits_per_char": -0.6957228779792786, "num_chars": 2}, {"sum_logits": -1.4172906875610352, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4172906875610352, "logits_per_char": -0.7086453437805176, "num_chars": 2}, {"sum_logits": -1.4239869117736816, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4239869117736816, "logits_per_char": -0.7119934558868408, "num_chars": 2}, {"sum_logits": -1.3337507247924805, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.3337507247924805, "logits_per_char": -0.6668753623962402, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1159, "native_id": "Mercury_SC_405341", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.522052526473999, "incorrect_loss_raw": 1.349635163942973, "correct_loss_per_char": 0.7610262632369995, "incorrect_loss_per_char": 0.6748175819714864, "correct_loss_per_token": 1.522052526473999, "incorrect_loss_per_token": 1.349635163942973, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.522052526473999, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.522052526473999, "logits_per_char": -0.7610262632369995, "num_chars": 2}, {"sum_logits": -1.3897875547409058, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3897875547409058, "logits_per_char": -0.6948937773704529, "num_chars": 2}, {"sum_logits": -1.3301730155944824, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3301730155944824, "logits_per_char": -0.6650865077972412, "num_chars": 2}, {"sum_logits": -1.3289449214935303, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.3289449214935303, "logits_per_char": -0.6644724607467651, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1160, "native_id": "Mercury_7283833", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.413889765739441, "incorrect_loss_raw": 1.3876151243845622, "correct_loss_per_char": 0.7069448828697205, "incorrect_loss_per_char": 0.6938075621922811, "correct_loss_per_token": 1.413889765739441, "incorrect_loss_per_token": 1.3876151243845622, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4143280982971191, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4143280982971191, "logits_per_char": -0.7071640491485596, "num_chars": 2}, {"sum_logits": -1.413889765739441, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.413889765739441, "logits_per_char": -0.7069448828697205, "num_chars": 2}, {"sum_logits": -1.4921586513519287, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": false, "logits_per_token": -1.4921586513519287, "logits_per_char": -0.7460793256759644, "num_chars": 2}, {"sum_logits": -1.2563586235046387, "num_tokens": 1, "num_tokens_all": 347, "is_greedy": true, "logits_per_token": -1.2563586235046387, "logits_per_char": -0.6281793117523193, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1161, "native_id": "Mercury_7159303", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.644803524017334, "incorrect_loss_raw": 1.3215750853220622, "correct_loss_per_char": 0.822401762008667, "incorrect_loss_per_char": 0.6607875426610311, "correct_loss_per_token": 1.644803524017334, "incorrect_loss_per_token": 1.3215750853220622, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.644803524017334, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.644803524017334, "logits_per_char": -0.822401762008667, "num_chars": 2}, {"sum_logits": -1.3104310035705566, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.3104310035705566, "logits_per_char": -0.6552155017852783, "num_chars": 2}, {"sum_logits": -1.4263391494750977, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": false, "logits_per_token": -1.4263391494750977, "logits_per_char": -0.7131695747375488, "num_chars": 2}, {"sum_logits": -1.2279551029205322, "num_tokens": 1, "num_tokens_all": 403, "is_greedy": true, "logits_per_token": -1.2279551029205322, "logits_per_char": -0.6139775514602661, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1162, "native_id": "Mercury_406427", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4839680194854736, "incorrect_loss_raw": 1.362395445505778, "correct_loss_per_char": 0.7419840097427368, "incorrect_loss_per_char": 0.681197722752889, "correct_loss_per_token": 1.4839680194854736, "incorrect_loss_per_token": 1.362395445505778, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3875881433486938, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.3875881433486938, "logits_per_char": -0.6937940716743469, "num_chars": 2}, {"sum_logits": -1.419427752494812, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.419427752494812, "logits_per_char": -0.709713876247406, "num_chars": 2}, {"sum_logits": -1.4839680194854736, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": false, "logits_per_token": -1.4839680194854736, "logits_per_char": -0.7419840097427368, "num_chars": 2}, {"sum_logits": -1.2801704406738281, "num_tokens": 1, "num_tokens_all": 394, "is_greedy": true, "logits_per_token": -1.2801704406738281, "logits_per_char": -0.6400852203369141, "num_chars": 2}], "label": 2, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1163, "native_id": "Mercury_SC_414129", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5242793560028076, "incorrect_loss_raw": 1.3614269097646077, "correct_loss_per_char": 0.7621396780014038, "incorrect_loss_per_char": 0.6807134548823038, "correct_loss_per_token": 1.5242793560028076, "incorrect_loss_per_token": 1.3614269097646077, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3903992176055908, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3903992176055908, "logits_per_char": -0.6951996088027954, "num_chars": 2}, {"sum_logits": -1.5242793560028076, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5242793560028076, "logits_per_char": -0.7621396780014038, "num_chars": 2}, {"sum_logits": -1.5203700065612793, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5203700065612793, "logits_per_char": -0.7601850032806396, "num_chars": 2}, {"sum_logits": -1.1735115051269531, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.1735115051269531, "logits_per_char": -0.5867557525634766, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1164, "native_id": "Mercury_7108990", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2635595798492432, "incorrect_loss_raw": 1.4422442118326824, "correct_loss_per_char": 0.6317797899246216, "incorrect_loss_per_char": 0.7211221059163412, "correct_loss_per_token": 1.2635595798492432, "incorrect_loss_per_token": 1.4422442118326824, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2635595798492432, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.2635595798492432, "logits_per_char": -0.6317797899246216, "num_chars": 2}, {"sum_logits": -1.4664534330368042, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4664534330368042, "logits_per_char": -0.7332267165184021, "num_chars": 2}, {"sum_logits": -1.3201372623443604, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3201372623443604, "logits_per_char": -0.6600686311721802, "num_chars": 2}, {"sum_logits": -1.5401419401168823, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.5401419401168823, "logits_per_char": -0.7700709700584412, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1165, "native_id": "Mercury_SC_407315", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3540139198303223, "incorrect_loss_raw": 1.4054994185765584, "correct_loss_per_char": 0.6770069599151611, "incorrect_loss_per_char": 0.7027497092882792, "correct_loss_per_token": 1.3540139198303223, "incorrect_loss_per_token": 1.4054994185765584, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3540139198303223, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3540139198303223, "logits_per_char": -0.6770069599151611, "num_chars": 2}, {"sum_logits": -1.4953378438949585, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4953378438949585, "logits_per_char": -0.7476689219474792, "num_chars": 2}, {"sum_logits": -1.3199176788330078, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.3199176788330078, "logits_per_char": -0.6599588394165039, "num_chars": 2}, {"sum_logits": -1.401242733001709, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.401242733001709, "logits_per_char": -0.7006213665008545, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1166, "native_id": "Mercury_SC_408663", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4744677543640137, "incorrect_loss_raw": 1.3680020968119304, "correct_loss_per_char": 0.7372338771820068, "incorrect_loss_per_char": 0.6840010484059652, "correct_loss_per_token": 1.4744677543640137, "incorrect_loss_per_token": 1.3680020968119304, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4104794263839722, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.4104794263839722, "logits_per_char": -0.7052397131919861, "num_chars": 2}, {"sum_logits": -1.4744677543640137, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.4744677543640137, "logits_per_char": -0.7372338771820068, "num_chars": 2}, {"sum_logits": -1.4480819702148438, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.4480819702148438, "logits_per_char": -0.7240409851074219, "num_chars": 2}, {"sum_logits": -1.245444893836975, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": true, "logits_per_token": -1.245444893836975, "logits_per_char": -0.6227224469184875, "num_chars": 2}], "label": 1, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1167, "native_id": "MEA_2013_8_18", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.425990343093872, "incorrect_loss_raw": 1.3796533743540447, "correct_loss_per_char": 0.712995171546936, "incorrect_loss_per_char": 0.6898266871770223, "correct_loss_per_token": 1.425990343093872, "incorrect_loss_per_token": 1.3796533743540447, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.425990343093872, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.425990343093872, "logits_per_char": -0.712995171546936, "num_chars": 2}, {"sum_logits": -1.4259295463562012, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4259295463562012, "logits_per_char": -0.7129647731781006, "num_chars": 2}, {"sum_logits": -1.3641360998153687, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.3641360998153687, "logits_per_char": -0.6820680499076843, "num_chars": 2}, {"sum_logits": -1.348894476890564, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.348894476890564, "logits_per_char": -0.674447238445282, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1168, "native_id": "Mercury_7111125", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4441440105438232, "incorrect_loss_raw": 1.3758581082026164, "correct_loss_per_char": 0.7220720052719116, "incorrect_loss_per_char": 0.6879290541013082, "correct_loss_per_token": 1.4441440105438232, "incorrect_loss_per_token": 1.3758581082026164, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4441440105438232, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4441440105438232, "logits_per_char": -0.7220720052719116, "num_chars": 2}, {"sum_logits": -1.2991523742675781, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2991523742675781, "logits_per_char": -0.6495761871337891, "num_chars": 2}, {"sum_logits": -1.4640834331512451, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.4640834331512451, "logits_per_char": -0.7320417165756226, "num_chars": 2}, {"sum_logits": -1.3643385171890259, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3643385171890259, "logits_per_char": -0.6821692585945129, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1169, "native_id": "LEAP_2009_8_10430", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.449593186378479, "incorrect_loss_raw": 1.3715223868687947, "correct_loss_per_char": 0.7247965931892395, "incorrect_loss_per_char": 0.6857611934343973, "correct_loss_per_token": 1.449593186378479, "incorrect_loss_per_token": 1.3715223868687947, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.449593186378479, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.449593186378479, "logits_per_char": -0.7247965931892395, "num_chars": 2}, {"sum_logits": -1.3317698240280151, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": true, "logits_per_token": -1.3317698240280151, "logits_per_char": -0.6658849120140076, "num_chars": 2}, {"sum_logits": -1.3589861392974854, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.3589861392974854, "logits_per_char": -0.6794930696487427, "num_chars": 2}, {"sum_logits": -1.4238111972808838, "num_tokens": 1, "num_tokens_all": 397, "is_greedy": false, "logits_per_token": -1.4238111972808838, "logits_per_char": -0.7119055986404419, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1170, "native_id": "Mercury_7165218", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3705341815948486, "incorrect_loss_raw": 1.3987463315327961, "correct_loss_per_char": 0.6852670907974243, "incorrect_loss_per_char": 0.6993731657663981, "correct_loss_per_token": 1.3705341815948486, "incorrect_loss_per_token": 1.3987463315327961, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4494643211364746, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4494643211364746, "logits_per_char": -0.7247321605682373, "num_chars": 2}, {"sum_logits": -1.3664095401763916, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.3664095401763916, "logits_per_char": -0.6832047700881958, "num_chars": 2}, {"sum_logits": -1.3803651332855225, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3803651332855225, "logits_per_char": -0.6901825666427612, "num_chars": 2}, {"sum_logits": -1.3705341815948486, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.3705341815948486, "logits_per_char": -0.6852670907974243, "num_chars": 2}], "label": 3, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"} +{"doc_id": 1171, "native_id": "MEA_2013_8_15", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2896947860717773, "incorrect_loss_raw": 1.4304099082946777, "correct_loss_per_char": 0.6448473930358887, "incorrect_loss_per_char": 0.7152049541473389, "correct_loss_per_token": 1.2896947860717773, "incorrect_loss_per_token": 1.4304099082946777, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2896947860717773, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": true, "logits_per_token": -1.2896947860717773, "logits_per_char": -0.6448473930358887, "num_chars": 2}, {"sum_logits": -1.535305142402649, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.535305142402649, "logits_per_char": -0.7676525712013245, "num_chars": 2}, {"sum_logits": -1.4639288187026978, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.4639288187026978, "logits_per_char": -0.7319644093513489, "num_chars": 2}, {"sum_logits": -1.2919957637786865, "num_tokens": 1, "num_tokens_all": 369, "is_greedy": false, "logits_per_token": -1.2919957637786865, "logits_per_char": -0.6459978818893433, "num_chars": 2}], "label": 0, "task_hash": "867052288d273ab0fe8a12a6c5c548e6", "model_hash": "03418cf8091a9882619950ffb07429a5"}