{"doc_id": 0, "native_id": 0, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.563197135925293, "incorrect_loss_raw": 1.342955191930135, "correct_loss_per_char": 0.7815985679626465, "incorrect_loss_per_char": 0.6714775959650675, "correct_loss_per_token": 1.563197135925293, "incorrect_loss_per_token": 1.342955191930135, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.563197135925293, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.563197135925293, "logits_per_char": -0.7815985679626465, "num_chars": 2}, {"sum_logits": -1.4054348468780518, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.4054348468780518, "logits_per_char": -0.7027174234390259, "num_chars": 2}, {"sum_logits": -1.401798963546753, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.401798963546753, "logits_per_char": -0.7008994817733765, "num_chars": 2}, {"sum_logits": -1.2216317653656006, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.2216317653656006, "logits_per_char": -0.6108158826828003, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 1, "native_id": 1, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3779592514038086, "incorrect_loss_raw": 1.3976569573084514, "correct_loss_per_char": 0.6889796257019043, "incorrect_loss_per_char": 0.6988284786542257, "correct_loss_per_token": 1.3779592514038086, "incorrect_loss_per_token": 1.3976569573084514, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4438936710357666, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4438936710357666, "logits_per_char": -0.7219468355178833, "num_chars": 2}, {"sum_logits": -1.3779592514038086, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3779592514038086, "logits_per_char": -0.6889796257019043, "num_chars": 2}, {"sum_logits": -1.424455165863037, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.424455165863037, "logits_per_char": -0.7122275829315186, "num_chars": 2}, {"sum_logits": -1.3246220350265503, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.3246220350265503, "logits_per_char": -0.6623110175132751, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 2, "native_id": 2, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.492455244064331, "incorrect_loss_raw": 1.3637032111485798, "correct_loss_per_char": 0.7462276220321655, "incorrect_loss_per_char": 0.6818516055742899, "correct_loss_per_token": 1.492455244064331, "incorrect_loss_per_token": 1.3637032111485798, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.492455244064331, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.492455244064331, "logits_per_char": -0.7462276220321655, "num_chars": 2}, {"sum_logits": -1.4297733306884766, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.4297733306884766, "logits_per_char": -0.7148866653442383, "num_chars": 2}, {"sum_logits": -1.3581385612487793, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.3581385612487793, "logits_per_char": -0.6790692806243896, "num_chars": 2}, {"sum_logits": -1.3031977415084839, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -1.3031977415084839, "logits_per_char": -0.6515988707542419, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 3, "native_id": 3, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4431936740875244, "incorrect_loss_raw": 1.3812886079152424, "correct_loss_per_char": 0.7215968370437622, "incorrect_loss_per_char": 0.6906443039576212, "correct_loss_per_token": 1.4431936740875244, "incorrect_loss_per_token": 1.3812886079152424, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4884270429611206, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4884270429611206, "logits_per_char": -0.7442135214805603, "num_chars": 2}, {"sum_logits": -1.3451071977615356, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3451071977615356, "logits_per_char": -0.6725535988807678, "num_chars": 2}, {"sum_logits": -1.4431936740875244, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4431936740875244, "logits_per_char": -0.7215968370437622, "num_chars": 2}, {"sum_logits": -1.3103315830230713, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.3103315830230713, "logits_per_char": -0.6551657915115356, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 4, "native_id": 4, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4889402389526367, "incorrect_loss_raw": 1.3642373085021973, "correct_loss_per_char": 0.7444701194763184, "incorrect_loss_per_char": 0.6821186542510986, "correct_loss_per_token": 1.4889402389526367, "incorrect_loss_per_token": 1.3642373085021973, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3749785423278809, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3749785423278809, "logits_per_char": -0.6874892711639404, "num_chars": 2}, {"sum_logits": -1.4889402389526367, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4889402389526367, "logits_per_char": -0.7444701194763184, "num_chars": 2}, {"sum_logits": -1.3615074157714844, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3615074157714844, "logits_per_char": -0.6807537078857422, "num_chars": 2}, {"sum_logits": -1.3562259674072266, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.3562259674072266, "logits_per_char": -0.6781129837036133, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 5, "native_id": 5, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4924943447113037, "incorrect_loss_raw": 1.3906415303548176, "correct_loss_per_char": 0.7462471723556519, "incorrect_loss_per_char": 0.6953207651774088, "correct_loss_per_token": 1.4924943447113037, "incorrect_loss_per_token": 1.3906415303548176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6802542209625244, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.6802542209625244, "logits_per_char": -0.8401271104812622, "num_chars": 2}, {"sum_logits": -1.4924943447113037, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4924943447113037, "logits_per_char": -0.7462471723556519, "num_chars": 2}, {"sum_logits": -1.4230399131774902, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4230399131774902, "logits_per_char": -0.7115199565887451, "num_chars": 2}, {"sum_logits": -1.0686304569244385, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.0686304569244385, "logits_per_char": -0.5343152284622192, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 6, "native_id": 6, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.465418815612793, "incorrect_loss_raw": 1.3734362920125325, "correct_loss_per_char": 0.7327094078063965, "incorrect_loss_per_char": 0.6867181460062662, "correct_loss_per_token": 1.465418815612793, "incorrect_loss_per_token": 1.3734362920125325, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.465418815612793, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.465418815612793, "logits_per_char": -0.7327094078063965, "num_chars": 2}, {"sum_logits": -1.4751441478729248, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4751441478729248, "logits_per_char": -0.7375720739364624, "num_chars": 2}, {"sum_logits": -1.3490246534347534, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3490246534347534, "logits_per_char": -0.6745123267173767, "num_chars": 2}, {"sum_logits": -1.2961400747299194, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.2961400747299194, "logits_per_char": -0.6480700373649597, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 7, "native_id": 7, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3915174007415771, "incorrect_loss_raw": 1.399284799893697, "correct_loss_per_char": 0.6957587003707886, "incorrect_loss_per_char": 0.6996423999468485, "correct_loss_per_token": 1.3915174007415771, "incorrect_loss_per_token": 1.399284799893697, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.553247332572937, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.553247332572937, "logits_per_char": -0.7766236662864685, "num_chars": 2}, {"sum_logits": -1.3915174007415771, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3915174007415771, "logits_per_char": -0.6957587003707886, "num_chars": 2}, {"sum_logits": -1.3428510427474976, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3428510427474976, "logits_per_char": -0.6714255213737488, "num_chars": 2}, {"sum_logits": -1.3017560243606567, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.3017560243606567, "logits_per_char": -0.6508780121803284, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 8, "native_id": 8, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4749739170074463, "incorrect_loss_raw": 1.3717796802520752, "correct_loss_per_char": 0.7374869585037231, "incorrect_loss_per_char": 0.6858898401260376, "correct_loss_per_token": 1.4749739170074463, "incorrect_loss_per_token": 1.3717796802520752, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.319251298904419, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.319251298904419, "logits_per_char": -0.6596256494522095, "num_chars": 2}, {"sum_logits": -1.5377075672149658, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.5377075672149658, "logits_per_char": -0.7688537836074829, "num_chars": 2}, {"sum_logits": -1.4749739170074463, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4749739170074463, "logits_per_char": -0.7374869585037231, "num_chars": 2}, {"sum_logits": -1.2583801746368408, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.2583801746368408, "logits_per_char": -0.6291900873184204, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 9, "native_id": 9, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3438395261764526, "incorrect_loss_raw": 1.4073220491409302, "correct_loss_per_char": 0.6719197630882263, "incorrect_loss_per_char": 0.7036610245704651, "correct_loss_per_token": 1.3438395261764526, "incorrect_loss_per_token": 1.4073220491409302, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3919554948806763, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3919554948806763, "logits_per_char": -0.6959777474403381, "num_chars": 2}, {"sum_logits": -1.4163683652877808, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4163683652877808, "logits_per_char": -0.7081841826438904, "num_chars": 2}, {"sum_logits": -1.4136422872543335, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4136422872543335, "logits_per_char": -0.7068211436271667, "num_chars": 2}, {"sum_logits": -1.3438395261764526, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.3438395261764526, "logits_per_char": -0.6719197630882263, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 10, "native_id": 10, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.366104245185852, "incorrect_loss_raw": 1.4015949964523315, "correct_loss_per_char": 0.683052122592926, "incorrect_loss_per_char": 0.7007974982261658, "correct_loss_per_token": 1.366104245185852, "incorrect_loss_per_token": 1.4015949964523315, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.419993281364441, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.419993281364441, "logits_per_char": -0.7099966406822205, "num_chars": 2}, {"sum_logits": -1.3369123935699463, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.3369123935699463, "logits_per_char": -0.6684561967849731, "num_chars": 2}, {"sum_logits": -1.4478793144226074, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.4478793144226074, "logits_per_char": -0.7239396572113037, "num_chars": 2}, {"sum_logits": -1.366104245185852, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.366104245185852, "logits_per_char": -0.683052122592926, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 11, "native_id": 11, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2189528942108154, "incorrect_loss_raw": 1.4638481537501018, "correct_loss_per_char": 0.6094764471054077, "incorrect_loss_per_char": 0.7319240768750509, "correct_loss_per_token": 1.2189528942108154, "incorrect_loss_per_token": 1.4638481537501018, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6227052211761475, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.6227052211761475, "logits_per_char": -0.8113526105880737, "num_chars": 2}, {"sum_logits": -1.4544343948364258, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.4544343948364258, "logits_per_char": -0.7272171974182129, "num_chars": 2}, {"sum_logits": -1.314404845237732, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": false, "logits_per_token": -1.314404845237732, "logits_per_char": -0.657202422618866, "num_chars": 2}, {"sum_logits": -1.2189528942108154, "num_tokens": 1, "num_tokens_all": 381, "is_greedy": true, "logits_per_token": -1.2189528942108154, "logits_per_char": -0.6094764471054077, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 12, "native_id": 12, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4294936656951904, "incorrect_loss_raw": 1.3855040868123372, "correct_loss_per_char": 0.7147468328475952, "incorrect_loss_per_char": 0.6927520434061686, "correct_loss_per_token": 1.4294936656951904, "incorrect_loss_per_token": 1.3855040868123372, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4150245189666748, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4150245189666748, "logits_per_char": -0.7075122594833374, "num_chars": 2}, {"sum_logits": -1.3296947479248047, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": true, "logits_per_token": -1.3296947479248047, "logits_per_char": -0.6648473739624023, "num_chars": 2}, {"sum_logits": -1.4117929935455322, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4117929935455322, "logits_per_char": -0.7058964967727661, "num_chars": 2}, {"sum_logits": -1.4294936656951904, "num_tokens": 1, "num_tokens_all": 380, "is_greedy": false, "logits_per_token": -1.4294936656951904, "logits_per_char": -0.7147468328475952, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 13, "native_id": 13, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2889593839645386, "incorrect_loss_raw": 1.4309061368306477, "correct_loss_per_char": 0.6444796919822693, "incorrect_loss_per_char": 0.7154530684153239, "correct_loss_per_token": 1.2889593839645386, "incorrect_loss_per_token": 1.4309061368306477, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3177268505096436, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": false, "logits_per_token": -1.3177268505096436, "logits_per_char": -0.6588634252548218, "num_chars": 2}, {"sum_logits": -1.2889593839645386, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": true, "logits_per_token": -1.2889593839645386, "logits_per_char": -0.6444796919822693, "num_chars": 2}, {"sum_logits": -1.5435893535614014, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": false, "logits_per_token": -1.5435893535614014, "logits_per_char": -0.7717946767807007, "num_chars": 2}, {"sum_logits": -1.4314022064208984, "num_tokens": 1, "num_tokens_all": 421, "is_greedy": false, "logits_per_token": -1.4314022064208984, "logits_per_char": -0.7157011032104492, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 14, "native_id": 14, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3550472259521484, "incorrect_loss_raw": 1.4195955197016399, "correct_loss_per_char": 0.6775236129760742, "incorrect_loss_per_char": 0.7097977598508199, "correct_loss_per_token": 1.3550472259521484, "incorrect_loss_per_token": 1.4195955197016399, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3550472259521484, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.3550472259521484, "logits_per_char": -0.6775236129760742, "num_chars": 2}, {"sum_logits": -1.2495994567871094, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.2495994567871094, "logits_per_char": -0.6247997283935547, "num_chars": 2}, {"sum_logits": -1.4892295598983765, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.4892295598983765, "logits_per_char": -0.7446147799491882, "num_chars": 2}, {"sum_logits": -1.5199575424194336, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.5199575424194336, "logits_per_char": -0.7599787712097168, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 15, "native_id": 15, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.421711802482605, "incorrect_loss_raw": 1.3886305093765259, "correct_loss_per_char": 0.7108559012413025, "incorrect_loss_per_char": 0.6943152546882629, "correct_loss_per_token": 1.421711802482605, "incorrect_loss_per_token": 1.3886305093765259, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4800524711608887, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4800524711608887, "logits_per_char": -0.7400262355804443, "num_chars": 2}, {"sum_logits": -1.3821851015090942, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.3821851015090942, "logits_per_char": -0.6910925507545471, "num_chars": 2}, {"sum_logits": -1.421711802482605, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.421711802482605, "logits_per_char": -0.7108559012413025, "num_chars": 2}, {"sum_logits": -1.3036539554595947, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.3036539554595947, "logits_per_char": -0.6518269777297974, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 16, "native_id": 16, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.393099069595337, "incorrect_loss_raw": 1.3946635723114014, "correct_loss_per_char": 0.6965495347976685, "incorrect_loss_per_char": 0.6973317861557007, "correct_loss_per_token": 1.393099069595337, "incorrect_loss_per_token": 1.3946635723114014, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.508659839630127, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.508659839630127, "logits_per_char": -0.7543299198150635, "num_chars": 2}, {"sum_logits": -1.393099069595337, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.393099069595337, "logits_per_char": -0.6965495347976685, "num_chars": 2}, {"sum_logits": -1.2896406650543213, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.2896406650543213, "logits_per_char": -0.6448203325271606, "num_chars": 2}, {"sum_logits": -1.3856902122497559, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3856902122497559, "logits_per_char": -0.6928451061248779, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 17, "native_id": 17, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3862005472183228, "incorrect_loss_raw": 1.3982794284820557, "correct_loss_per_char": 0.6931002736091614, "incorrect_loss_per_char": 0.6991397142410278, "correct_loss_per_token": 1.3862005472183228, "incorrect_loss_per_token": 1.3982794284820557, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4385910034179688, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.4385910034179688, "logits_per_char": -0.7192955017089844, "num_chars": 2}, {"sum_logits": -1.387176752090454, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.387176752090454, "logits_per_char": -0.693588376045227, "num_chars": 2}, {"sum_logits": -1.3690705299377441, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": true, "logits_per_token": -1.3690705299377441, "logits_per_char": -0.6845352649688721, "num_chars": 2}, {"sum_logits": -1.3862005472183228, "num_tokens": 1, "num_tokens_all": 326, "is_greedy": false, "logits_per_token": -1.3862005472183228, "logits_per_char": -0.6931002736091614, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 18, "native_id": 18, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3233951330184937, "incorrect_loss_raw": 1.4237796465555828, "correct_loss_per_char": 0.6616975665092468, "incorrect_loss_per_char": 0.7118898232777914, "correct_loss_per_token": 1.3233951330184937, "incorrect_loss_per_token": 1.4237796465555828, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2966448068618774, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.2966448068618774, "logits_per_char": -0.6483224034309387, "num_chars": 2}, {"sum_logits": -1.4010498523712158, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4010498523712158, "logits_per_char": -0.7005249261856079, "num_chars": 2}, {"sum_logits": -1.5736442804336548, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.5736442804336548, "logits_per_char": -0.7868221402168274, "num_chars": 2}, {"sum_logits": -1.3233951330184937, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3233951330184937, "logits_per_char": -0.6616975665092468, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 19, "native_id": 19, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4577877521514893, "incorrect_loss_raw": 1.373625119527181, "correct_loss_per_char": 0.7288938760757446, "incorrect_loss_per_char": 0.6868125597635905, "correct_loss_per_token": 1.4577877521514893, "incorrect_loss_per_token": 1.373625119527181, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.339766502380371, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.339766502380371, "logits_per_char": -0.6698832511901855, "num_chars": 2}, {"sum_logits": -1.458911418914795, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.458911418914795, "logits_per_char": -0.7294557094573975, "num_chars": 2}, {"sum_logits": -1.4577877521514893, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4577877521514893, "logits_per_char": -0.7288938760757446, "num_chars": 2}, {"sum_logits": -1.322197437286377, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.322197437286377, "logits_per_char": -0.6610987186431885, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 20, "native_id": 20, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.335674524307251, "incorrect_loss_raw": 1.4279861847559612, "correct_loss_per_char": 0.6678372621536255, "incorrect_loss_per_char": 0.7139930923779806, "correct_loss_per_token": 1.335674524307251, "incorrect_loss_per_token": 1.4279861847559612, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5312916040420532, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5312916040420532, "logits_per_char": -0.7656458020210266, "num_chars": 2}, {"sum_logits": -1.5854696035385132, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5854696035385132, "logits_per_char": -0.7927348017692566, "num_chars": 2}, {"sum_logits": -1.335674524307251, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.335674524307251, "logits_per_char": -0.6678372621536255, "num_chars": 2}, {"sum_logits": -1.167197346687317, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.167197346687317, "logits_per_char": -0.5835986733436584, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 21, "native_id": 21, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3766592741012573, "incorrect_loss_raw": 1.4016400973002117, "correct_loss_per_char": 0.6883296370506287, "incorrect_loss_per_char": 0.7008200486501058, "correct_loss_per_token": 1.3766592741012573, "incorrect_loss_per_token": 1.4016400973002117, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4193023443222046, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4193023443222046, "logits_per_char": -0.7096511721611023, "num_chars": 2}, {"sum_logits": -1.516585111618042, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.516585111618042, "logits_per_char": -0.758292555809021, "num_chars": 2}, {"sum_logits": -1.3766592741012573, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3766592741012573, "logits_per_char": -0.6883296370506287, "num_chars": 2}, {"sum_logits": -1.2690328359603882, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.2690328359603882, "logits_per_char": -0.6345164179801941, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 22, "native_id": 22, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4974565505981445, "incorrect_loss_raw": 1.3600106239318848, "correct_loss_per_char": 0.7487282752990723, "incorrect_loss_per_char": 0.6800053119659424, "correct_loss_per_token": 1.4974565505981445, "incorrect_loss_per_token": 1.3600106239318848, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4490904808044434, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4490904808044434, "logits_per_char": -0.7245452404022217, "num_chars": 2}, {"sum_logits": -1.4974565505981445, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4974565505981445, "logits_per_char": -0.7487282752990723, "num_chars": 2}, {"sum_logits": -1.3340678215026855, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3340678215026855, "logits_per_char": -0.6670339107513428, "num_chars": 2}, {"sum_logits": -1.2968735694885254, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.2968735694885254, "logits_per_char": -0.6484367847442627, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 23, "native_id": 23, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.422816514968872, "incorrect_loss_raw": 1.3843737443288167, "correct_loss_per_char": 0.711408257484436, "incorrect_loss_per_char": 0.6921868721644083, "correct_loss_per_token": 1.422816514968872, "incorrect_loss_per_token": 1.3843737443288167, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4647932052612305, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4647932052612305, "logits_per_char": -0.7323966026306152, "num_chars": 2}, {"sum_logits": -1.422816514968872, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.422816514968872, "logits_per_char": -0.711408257484436, "num_chars": 2}, {"sum_logits": -1.425504207611084, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.425504207611084, "logits_per_char": -0.712752103805542, "num_chars": 2}, {"sum_logits": -1.2628238201141357, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.2628238201141357, "logits_per_char": -0.6314119100570679, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 24, "native_id": 24, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4328713417053223, "incorrect_loss_raw": 1.381482720375061, "correct_loss_per_char": 0.7164356708526611, "incorrect_loss_per_char": 0.6907413601875305, "correct_loss_per_token": 1.4328713417053223, "incorrect_loss_per_token": 1.381482720375061, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4219847917556763, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4219847917556763, "logits_per_char": -0.7109923958778381, "num_chars": 2}, {"sum_logits": -1.4328713417053223, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4328713417053223, "logits_per_char": -0.7164356708526611, "num_chars": 2}, {"sum_logits": -1.4346158504486084, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4346158504486084, "logits_per_char": -0.7173079252243042, "num_chars": 2}, {"sum_logits": -1.2878475189208984, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.2878475189208984, "logits_per_char": -0.6439237594604492, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 25, "native_id": 25, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3536278009414673, "incorrect_loss_raw": 1.4098401467005413, "correct_loss_per_char": 0.6768139004707336, "incorrect_loss_per_char": 0.7049200733502706, "correct_loss_per_token": 1.3536278009414673, "incorrect_loss_per_token": 1.4098401467005413, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5394415855407715, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.5394415855407715, "logits_per_char": -0.7697207927703857, "num_chars": 2}, {"sum_logits": -1.331103801727295, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.331103801727295, "logits_per_char": -0.6655519008636475, "num_chars": 2}, {"sum_logits": -1.3536278009414673, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3536278009414673, "logits_per_char": -0.6768139004707336, "num_chars": 2}, {"sum_logits": -1.3589750528335571, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3589750528335571, "logits_per_char": -0.6794875264167786, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 26, "native_id": 26, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.48549222946167, "incorrect_loss_raw": 1.3738659222920735, "correct_loss_per_char": 0.742746114730835, "incorrect_loss_per_char": 0.6869329611460367, "correct_loss_per_token": 1.48549222946167, "incorrect_loss_per_token": 1.3738659222920735, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.447365164756775, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.447365164756775, "logits_per_char": -0.7236825823783875, "num_chars": 2}, {"sum_logits": -1.5082006454467773, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.5082006454467773, "logits_per_char": -0.7541003227233887, "num_chars": 2}, {"sum_logits": -1.48549222946167, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": false, "logits_per_token": -1.48549222946167, "logits_per_char": -0.742746114730835, "num_chars": 2}, {"sum_logits": -1.1660319566726685, "num_tokens": 1, "num_tokens_all": 345, "is_greedy": true, "logits_per_token": -1.1660319566726685, "logits_per_char": -0.5830159783363342, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 27, "native_id": 27, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3545526266098022, "incorrect_loss_raw": 1.40550696849823, "correct_loss_per_char": 0.6772763133049011, "incorrect_loss_per_char": 0.702753484249115, "correct_loss_per_token": 1.3545526266098022, "incorrect_loss_per_token": 1.40550696849823, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3545526266098022, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.3545526266098022, "logits_per_char": -0.6772763133049011, "num_chars": 2}, {"sum_logits": -1.4286096096038818, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4286096096038818, "logits_per_char": -0.7143048048019409, "num_chars": 2}, {"sum_logits": -1.3983309268951416, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3983309268951416, "logits_per_char": -0.6991654634475708, "num_chars": 2}, {"sum_logits": -1.3895803689956665, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3895803689956665, "logits_per_char": -0.6947901844978333, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 28, "native_id": 28, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4536499977111816, "incorrect_loss_raw": 1.3840632836023967, "correct_loss_per_char": 0.7268249988555908, "incorrect_loss_per_char": 0.6920316418011984, "correct_loss_per_token": 1.4536499977111816, "incorrect_loss_per_token": 1.3840632836023967, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4718444347381592, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4718444347381592, "logits_per_char": -0.7359222173690796, "num_chars": 2}, {"sum_logits": -1.4997689723968506, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4997689723968506, "logits_per_char": -0.7498844861984253, "num_chars": 2}, {"sum_logits": -1.4536499977111816, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4536499977111816, "logits_per_char": -0.7268249988555908, "num_chars": 2}, {"sum_logits": -1.1805764436721802, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.1805764436721802, "logits_per_char": -0.5902882218360901, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 29, "native_id": 29, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2975351810455322, "incorrect_loss_raw": 1.4279372692108154, "correct_loss_per_char": 0.6487675905227661, "incorrect_loss_per_char": 0.7139686346054077, "correct_loss_per_token": 1.2975351810455322, "incorrect_loss_per_token": 1.4279372692108154, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4875357151031494, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.4875357151031494, "logits_per_char": -0.7437678575515747, "num_chars": 2}, {"sum_logits": -1.2975351810455322, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": true, "logits_per_token": -1.2975351810455322, "logits_per_char": -0.6487675905227661, "num_chars": 2}, {"sum_logits": -1.3521440029144287, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.3521440029144287, "logits_per_char": -0.6760720014572144, "num_chars": 2}, {"sum_logits": -1.4441320896148682, "num_tokens": 1, "num_tokens_all": 409, "is_greedy": false, "logits_per_token": -1.4441320896148682, "logits_per_char": -0.7220660448074341, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 30, "native_id": 30, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4515025615692139, "incorrect_loss_raw": 1.3965996106465657, "correct_loss_per_char": 0.7257512807846069, "incorrect_loss_per_char": 0.6982998053232828, "correct_loss_per_token": 1.4515025615692139, "incorrect_loss_per_token": 1.3965996106465657, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4784427881240845, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.4784427881240845, "logits_per_char": -0.7392213940620422, "num_chars": 2}, {"sum_logits": -1.5635497570037842, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.5635497570037842, "logits_per_char": -0.7817748785018921, "num_chars": 2}, {"sum_logits": -1.4515025615692139, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.4515025615692139, "logits_per_char": -0.7257512807846069, "num_chars": 2}, {"sum_logits": -1.1478062868118286, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -1.1478062868118286, "logits_per_char": -0.5739031434059143, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 31, "native_id": 31, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4813228845596313, "incorrect_loss_raw": 1.36553955078125, "correct_loss_per_char": 0.7406614422798157, "incorrect_loss_per_char": 0.682769775390625, "correct_loss_per_token": 1.4813228845596313, "incorrect_loss_per_token": 1.36553955078125, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4813228845596313, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4813228845596313, "logits_per_char": -0.7406614422798157, "num_chars": 2}, {"sum_logits": -1.422375202178955, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.422375202178955, "logits_per_char": -0.7111876010894775, "num_chars": 2}, {"sum_logits": -1.4115208387374878, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4115208387374878, "logits_per_char": -0.7057604193687439, "num_chars": 2}, {"sum_logits": -1.2627226114273071, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.2627226114273071, "logits_per_char": -0.6313613057136536, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 32, "native_id": 32, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4704034328460693, "incorrect_loss_raw": 1.3775686820348103, "correct_loss_per_char": 0.7352017164230347, "incorrect_loss_per_char": 0.6887843410174052, "correct_loss_per_token": 1.4704034328460693, "incorrect_loss_per_token": 1.3775686820348103, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.397162914276123, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.397162914276123, "logits_per_char": -0.6985814571380615, "num_chars": 2}, {"sum_logits": -1.4704034328460693, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4704034328460693, "logits_per_char": -0.7352017164230347, "num_chars": 2}, {"sum_logits": -1.4907883405685425, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4907883405685425, "logits_per_char": -0.7453941702842712, "num_chars": 2}, {"sum_logits": -1.2447547912597656, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.2447547912597656, "logits_per_char": -0.6223773956298828, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 33, "native_id": 33, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4967492818832397, "incorrect_loss_raw": 1.404780427614848, "correct_loss_per_char": 0.7483746409416199, "incorrect_loss_per_char": 0.702390213807424, "correct_loss_per_token": 1.4967492818832397, "incorrect_loss_per_token": 1.404780427614848, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5772771835327148, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.5772771835327148, "logits_per_char": -0.7886385917663574, "num_chars": 2}, {"sum_logits": -1.4967492818832397, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4967492818832397, "logits_per_char": -0.7483746409416199, "num_chars": 2}, {"sum_logits": -1.4351080656051636, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4351080656051636, "logits_per_char": -0.7175540328025818, "num_chars": 2}, {"sum_logits": -1.201956033706665, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.201956033706665, "logits_per_char": -0.6009780168533325, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 34, "native_id": 34, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4456462860107422, "incorrect_loss_raw": 1.3803341388702393, "correct_loss_per_char": 0.7228231430053711, "incorrect_loss_per_char": 0.6901670694351196, "correct_loss_per_token": 1.4456462860107422, "incorrect_loss_per_token": 1.3803341388702393, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4568976163864136, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4568976163864136, "logits_per_char": -0.7284488081932068, "num_chars": 2}, {"sum_logits": -1.285117268562317, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.285117268562317, "logits_per_char": -0.6425586342811584, "num_chars": 2}, {"sum_logits": -1.4456462860107422, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4456462860107422, "logits_per_char": -0.7228231430053711, "num_chars": 2}, {"sum_logits": -1.3989875316619873, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.3989875316619873, "logits_per_char": -0.6994937658309937, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 35, "native_id": 35, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.442177176475525, "incorrect_loss_raw": 1.3768330415089924, "correct_loss_per_char": 0.7210885882377625, "incorrect_loss_per_char": 0.6884165207544962, "correct_loss_per_token": 1.442177176475525, "incorrect_loss_per_token": 1.3768330415089924, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4453903436660767, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.4453903436660767, "logits_per_char": -0.7226951718330383, "num_chars": 2}, {"sum_logits": -1.3826018571853638, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.3826018571853638, "logits_per_char": -0.6913009285926819, "num_chars": 2}, {"sum_logits": -1.442177176475525, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": false, "logits_per_token": -1.442177176475525, "logits_per_char": -0.7210885882377625, "num_chars": 2}, {"sum_logits": -1.302506923675537, "num_tokens": 1, "num_tokens_all": 355, "is_greedy": true, "logits_per_token": -1.302506923675537, "logits_per_char": -0.6512534618377686, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 36, "native_id": 36, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1942259073257446, "incorrect_loss_raw": 1.4705386956532795, "correct_loss_per_char": 0.5971129536628723, "incorrect_loss_per_char": 0.7352693478266398, "correct_loss_per_token": 1.1942259073257446, "incorrect_loss_per_token": 1.4705386956532795, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5808656215667725, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.5808656215667725, "logits_per_char": -0.7904328107833862, "num_chars": 2}, {"sum_logits": -1.4738879203796387, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4738879203796387, "logits_per_char": -0.7369439601898193, "num_chars": 2}, {"sum_logits": -1.3568625450134277, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3568625450134277, "logits_per_char": -0.6784312725067139, "num_chars": 2}, {"sum_logits": -1.1942259073257446, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.1942259073257446, "logits_per_char": -0.5971129536628723, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 37, "native_id": 37, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6506786346435547, "incorrect_loss_raw": 1.3213859001795452, "correct_loss_per_char": 0.8253393173217773, "incorrect_loss_per_char": 0.6606929500897726, "correct_loss_per_token": 1.6506786346435547, "incorrect_loss_per_token": 1.3213859001795452, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3004087209701538, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.3004087209701538, "logits_per_char": -0.6502043604850769, "num_chars": 2}, {"sum_logits": -1.3711838722229004, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.3711838722229004, "logits_per_char": -0.6855919361114502, "num_chars": 2}, {"sum_logits": -1.6506786346435547, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": false, "logits_per_token": -1.6506786346435547, "logits_per_char": -0.8253393173217773, "num_chars": 2}, {"sum_logits": -1.292565107345581, "num_tokens": 1, "num_tokens_all": 336, "is_greedy": true, "logits_per_token": -1.292565107345581, "logits_per_char": -0.6462825536727905, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 38, "native_id": 38, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4230719804763794, "incorrect_loss_raw": 1.3798617919286091, "correct_loss_per_char": 0.7115359902381897, "incorrect_loss_per_char": 0.6899308959643046, "correct_loss_per_token": 1.4230719804763794, "incorrect_loss_per_token": 1.3798617919286091, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4305106401443481, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4305106401443481, "logits_per_char": -0.7152553200721741, "num_chars": 2}, {"sum_logits": -1.3548449277877808, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3548449277877808, "logits_per_char": -0.6774224638938904, "num_chars": 2}, {"sum_logits": -1.3542298078536987, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.3542298078536987, "logits_per_char": -0.6771149039268494, "num_chars": 2}, {"sum_logits": -1.4230719804763794, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4230719804763794, "logits_per_char": -0.7115359902381897, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 39, "native_id": 39, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4298149347305298, "incorrect_loss_raw": 1.3914151986440022, "correct_loss_per_char": 0.7149074673652649, "incorrect_loss_per_char": 0.6957075993220011, "correct_loss_per_token": 1.4298149347305298, "incorrect_loss_per_token": 1.3914151986440022, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5394723415374756, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.5394723415374756, "logits_per_char": -0.7697361707687378, "num_chars": 2}, {"sum_logits": -1.460456132888794, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.460456132888794, "logits_per_char": -0.730228066444397, "num_chars": 2}, {"sum_logits": -1.4298149347305298, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4298149347305298, "logits_per_char": -0.7149074673652649, "num_chars": 2}, {"sum_logits": -1.1743171215057373, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.1743171215057373, "logits_per_char": -0.5871585607528687, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 40, "native_id": 40, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5575207471847534, "incorrect_loss_raw": 1.3466081221898396, "correct_loss_per_char": 0.7787603735923767, "incorrect_loss_per_char": 0.6733040610949198, "correct_loss_per_token": 1.5575207471847534, "incorrect_loss_per_token": 1.3466081221898396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4442192316055298, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4442192316055298, "logits_per_char": -0.7221096158027649, "num_chars": 2}, {"sum_logits": -1.3884406089782715, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3884406089782715, "logits_per_char": -0.6942203044891357, "num_chars": 2}, {"sum_logits": -1.5575207471847534, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.5575207471847534, "logits_per_char": -0.7787603735923767, "num_chars": 2}, {"sum_logits": -1.2071645259857178, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.2071645259857178, "logits_per_char": -0.6035822629928589, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 41, "native_id": 41, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3070034980773926, "incorrect_loss_raw": 1.4301775693893433, "correct_loss_per_char": 0.6535017490386963, "incorrect_loss_per_char": 0.7150887846946716, "correct_loss_per_token": 1.3070034980773926, "incorrect_loss_per_token": 1.4301775693893433, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5387811660766602, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5387811660766602, "logits_per_char": -0.7693905830383301, "num_chars": 2}, {"sum_logits": -1.5194611549377441, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.5194611549377441, "logits_per_char": -0.7597305774688721, "num_chars": 2}, {"sum_logits": -1.3070034980773926, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": false, "logits_per_token": -1.3070034980773926, "logits_per_char": -0.6535017490386963, "num_chars": 2}, {"sum_logits": -1.2322903871536255, "num_tokens": 1, "num_tokens_all": 351, "is_greedy": true, "logits_per_token": -1.2322903871536255, "logits_per_char": -0.6161451935768127, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 42, "native_id": 42, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4956848621368408, "incorrect_loss_raw": 1.3632001876831055, "correct_loss_per_char": 0.7478424310684204, "incorrect_loss_per_char": 0.6816000938415527, "correct_loss_per_token": 1.4956848621368408, "incorrect_loss_per_token": 1.3632001876831055, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4189622402191162, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4189622402191162, "logits_per_char": -0.7094811201095581, "num_chars": 2}, {"sum_logits": -1.4956848621368408, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4956848621368408, "logits_per_char": -0.7478424310684204, "num_chars": 2}, {"sum_logits": -1.3868379592895508, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.3868379592895508, "logits_per_char": -0.6934189796447754, "num_chars": 2}, {"sum_logits": -1.2838003635406494, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.2838003635406494, "logits_per_char": -0.6419001817703247, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 43, "native_id": 43, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4748634099960327, "incorrect_loss_raw": 1.3715445597966511, "correct_loss_per_char": 0.7374317049980164, "incorrect_loss_per_char": 0.6857722798983256, "correct_loss_per_token": 1.4748634099960327, "incorrect_loss_per_token": 1.3715445597966511, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4544682502746582, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4544682502746582, "logits_per_char": -0.7272341251373291, "num_chars": 2}, {"sum_logits": -1.4232337474822998, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4232337474822998, "logits_per_char": -0.7116168737411499, "num_chars": 2}, {"sum_logits": -1.4748634099960327, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.4748634099960327, "logits_per_char": -0.7374317049980164, "num_chars": 2}, {"sum_logits": -1.2369316816329956, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.2369316816329956, "logits_per_char": -0.6184658408164978, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 44, "native_id": 44, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5099855661392212, "incorrect_loss_raw": 1.3548030455907185, "correct_loss_per_char": 0.7549927830696106, "incorrect_loss_per_char": 0.6774015227953593, "correct_loss_per_token": 1.5099855661392212, "incorrect_loss_per_token": 1.3548030455907185, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5099855661392212, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.5099855661392212, "logits_per_char": -0.7549927830696106, "num_chars": 2}, {"sum_logits": -1.3793530464172363, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3793530464172363, "logits_per_char": -0.6896765232086182, "num_chars": 2}, {"sum_logits": -1.3566436767578125, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": false, "logits_per_token": -1.3566436767578125, "logits_per_char": -0.6783218383789062, "num_chars": 2}, {"sum_logits": -1.328412413597107, "num_tokens": 1, "num_tokens_all": 376, "is_greedy": true, "logits_per_token": -1.328412413597107, "logits_per_char": -0.6642062067985535, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 45, "native_id": 45, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4379284381866455, "incorrect_loss_raw": 1.375552733739217, "correct_loss_per_char": 0.7189642190933228, "incorrect_loss_per_char": 0.6877763668696085, "correct_loss_per_token": 1.4379284381866455, "incorrect_loss_per_token": 1.375552733739217, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3525404930114746, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": true, "logits_per_token": -1.3525404930114746, "logits_per_char": -0.6762702465057373, "num_chars": 2}, {"sum_logits": -1.390488624572754, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.390488624572754, "logits_per_char": -0.695244312286377, "num_chars": 2}, {"sum_logits": -1.4379284381866455, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.4379284381866455, "logits_per_char": -0.7189642190933228, "num_chars": 2}, {"sum_logits": -1.3836290836334229, "num_tokens": 1, "num_tokens_all": 370, "is_greedy": false, "logits_per_token": -1.3836290836334229, "logits_per_char": -0.6918145418167114, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 46, "native_id": 46, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3289506435394287, "incorrect_loss_raw": 1.4213038285573323, "correct_loss_per_char": 0.6644753217697144, "incorrect_loss_per_char": 0.7106519142786661, "correct_loss_per_token": 1.3289506435394287, "incorrect_loss_per_token": 1.4213038285573323, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5441606044769287, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.5441606044769287, "logits_per_char": -0.7720803022384644, "num_chars": 2}, {"sum_logits": -1.3289506435394287, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.3289506435394287, "logits_per_char": -0.6644753217697144, "num_chars": 2}, {"sum_logits": -1.379122257232666, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.379122257232666, "logits_per_char": -0.689561128616333, "num_chars": 2}, {"sum_logits": -1.3406286239624023, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3406286239624023, "logits_per_char": -0.6703143119812012, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 47, "native_id": 47, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3399772644042969, "incorrect_loss_raw": 1.4260101318359375, "correct_loss_per_char": 0.6699886322021484, "incorrect_loss_per_char": 0.7130050659179688, "correct_loss_per_token": 1.3399772644042969, "incorrect_loss_per_token": 1.4260101318359375, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.632694959640503, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.632694959640503, "logits_per_char": -0.8163474798202515, "num_chars": 2}, {"sum_logits": -1.3272124528884888, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3272124528884888, "logits_per_char": -0.6636062264442444, "num_chars": 2}, {"sum_logits": -1.3399772644042969, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3399772644042969, "logits_per_char": -0.6699886322021484, "num_chars": 2}, {"sum_logits": -1.3181229829788208, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.3181229829788208, "logits_per_char": -0.6590614914894104, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 48, "native_id": 48, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3005026578903198, "incorrect_loss_raw": 1.43043319384257, "correct_loss_per_char": 0.6502513289451599, "incorrect_loss_per_char": 0.715216596921285, "correct_loss_per_token": 1.3005026578903198, "incorrect_loss_per_token": 1.43043319384257, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3005026578903198, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.3005026578903198, "logits_per_char": -0.6502513289451599, "num_chars": 2}, {"sum_logits": -1.334620714187622, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.334620714187622, "logits_per_char": -0.667310357093811, "num_chars": 2}, {"sum_logits": -1.4631870985031128, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4631870985031128, "logits_per_char": -0.7315935492515564, "num_chars": 2}, {"sum_logits": -1.493491768836975, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.493491768836975, "logits_per_char": -0.7467458844184875, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 49, "native_id": 49, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3759609460830688, "incorrect_loss_raw": 1.4015600283940632, "correct_loss_per_char": 0.6879804730415344, "incorrect_loss_per_char": 0.7007800141970316, "correct_loss_per_token": 1.3759609460830688, "incorrect_loss_per_token": 1.4015600283940632, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5070326328277588, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.5070326328277588, "logits_per_char": -0.7535163164138794, "num_chars": 2}, {"sum_logits": -1.3452061414718628, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": true, "logits_per_token": -1.3452061414718628, "logits_per_char": -0.6726030707359314, "num_chars": 2}, {"sum_logits": -1.3759609460830688, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.3759609460830688, "logits_per_char": -0.6879804730415344, "num_chars": 2}, {"sum_logits": -1.3524413108825684, "num_tokens": 1, "num_tokens_all": 390, "is_greedy": false, "logits_per_token": -1.3524413108825684, "logits_per_char": -0.6762206554412842, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 50, "native_id": 50, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.281430959701538, "incorrect_loss_raw": 1.4344633022944133, "correct_loss_per_char": 0.640715479850769, "incorrect_loss_per_char": 0.7172316511472067, "correct_loss_per_token": 1.281430959701538, "incorrect_loss_per_token": 1.4344633022944133, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4596978425979614, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4596978425979614, "logits_per_char": -0.7298489212989807, "num_chars": 2}, {"sum_logits": -1.433915615081787, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.433915615081787, "logits_per_char": -0.7169578075408936, "num_chars": 2}, {"sum_logits": -1.4097764492034912, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": false, "logits_per_token": -1.4097764492034912, "logits_per_char": -0.7048882246017456, "num_chars": 2}, {"sum_logits": -1.281430959701538, "num_tokens": 1, "num_tokens_all": 342, "is_greedy": true, "logits_per_token": -1.281430959701538, "logits_per_char": -0.640715479850769, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 51, "native_id": 51, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3876689672470093, "incorrect_loss_raw": 1.3946672280629475, "correct_loss_per_char": 0.6938344836235046, "incorrect_loss_per_char": 0.6973336140314738, "correct_loss_per_token": 1.3876689672470093, "incorrect_loss_per_token": 1.3946672280629475, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4599204063415527, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4599204063415527, "logits_per_char": -0.7299602031707764, "num_chars": 2}, {"sum_logits": -1.3277256488800049, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.3277256488800049, "logits_per_char": -0.6638628244400024, "num_chars": 2}, {"sum_logits": -1.3963556289672852, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3963556289672852, "logits_per_char": -0.6981778144836426, "num_chars": 2}, {"sum_logits": -1.3876689672470093, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.3876689672470093, "logits_per_char": -0.6938344836235046, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 52, "native_id": 52, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3311041593551636, "incorrect_loss_raw": 1.4194758733113606, "correct_loss_per_char": 0.6655520796775818, "incorrect_loss_per_char": 0.7097379366556803, "correct_loss_per_token": 1.3311041593551636, "incorrect_loss_per_token": 1.4194758733113606, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5730392932891846, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.5730392932891846, "logits_per_char": -0.7865196466445923, "num_chars": 2}, {"sum_logits": -1.3311041593551636, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3311041593551636, "logits_per_char": -0.6655520796775818, "num_chars": 2}, {"sum_logits": -1.3923864364624023, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": false, "logits_per_token": -1.3923864364624023, "logits_per_char": -0.6961932182312012, "num_chars": 2}, {"sum_logits": -1.2930018901824951, "num_tokens": 1, "num_tokens_all": 365, "is_greedy": true, "logits_per_token": -1.2930018901824951, "logits_per_char": -0.6465009450912476, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 53, "native_id": 53, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3456785678863525, "incorrect_loss_raw": 1.4118904272715251, "correct_loss_per_char": 0.6728392839431763, "incorrect_loss_per_char": 0.7059452136357626, "correct_loss_per_token": 1.3456785678863525, "incorrect_loss_per_token": 1.4118904272715251, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3206889629364014, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": true, "logits_per_token": -1.3206889629364014, "logits_per_char": -0.6603444814682007, "num_chars": 2}, {"sum_logits": -1.3456785678863525, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.3456785678863525, "logits_per_char": -0.6728392839431763, "num_chars": 2}, {"sum_logits": -1.4620575904846191, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4620575904846191, "logits_per_char": -0.7310287952423096, "num_chars": 2}, {"sum_logits": -1.4529247283935547, "num_tokens": 1, "num_tokens_all": 321, "is_greedy": false, "logits_per_token": -1.4529247283935547, "logits_per_char": -0.7264623641967773, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 54, "native_id": 54, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.424447774887085, "incorrect_loss_raw": 1.3822198311487834, "correct_loss_per_char": 0.7122238874435425, "incorrect_loss_per_char": 0.6911099155743917, "correct_loss_per_token": 1.424447774887085, "incorrect_loss_per_token": 1.3822198311487834, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4786186218261719, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.4786186218261719, "logits_per_char": -0.7393093109130859, "num_chars": 2}, {"sum_logits": -1.424447774887085, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.424447774887085, "logits_per_char": -0.7122238874435425, "num_chars": 2}, {"sum_logits": -1.351736068725586, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": false, "logits_per_token": -1.351736068725586, "logits_per_char": -0.675868034362793, "num_chars": 2}, {"sum_logits": -1.3163048028945923, "num_tokens": 1, "num_tokens_all": 371, "is_greedy": true, "logits_per_token": -1.3163048028945923, "logits_per_char": -0.6581524014472961, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 55, "native_id": 55, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.20891535282135, "incorrect_loss_raw": 1.4701717297236125, "correct_loss_per_char": 0.604457676410675, "incorrect_loss_per_char": 0.7350858648618063, "correct_loss_per_token": 1.20891535282135, "incorrect_loss_per_token": 1.4701717297236125, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3620232343673706, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.3620232343673706, "logits_per_char": -0.6810116171836853, "num_chars": 2}, {"sum_logits": -1.5669913291931152, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.5669913291931152, "logits_per_char": -0.7834956645965576, "num_chars": 2}, {"sum_logits": -1.4815006256103516, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": false, "logits_per_token": -1.4815006256103516, "logits_per_char": -0.7407503128051758, "num_chars": 2}, {"sum_logits": -1.20891535282135, "num_tokens": 1, "num_tokens_all": 330, "is_greedy": true, "logits_per_token": -1.20891535282135, "logits_per_char": -0.604457676410675, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 56, "native_id": 56, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.52428138256073, "incorrect_loss_raw": 1.3511553605397542, "correct_loss_per_char": 0.762140691280365, "incorrect_loss_per_char": 0.6755776802698771, "correct_loss_per_token": 1.52428138256073, "incorrect_loss_per_token": 1.3511553605397542, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.52428138256073, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.52428138256073, "logits_per_char": -0.762140691280365, "num_chars": 2}, {"sum_logits": -1.351934552192688, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.351934552192688, "logits_per_char": -0.675967276096344, "num_chars": 2}, {"sum_logits": -1.368146538734436, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.368146538734436, "logits_per_char": -0.684073269367218, "num_chars": 2}, {"sum_logits": -1.3333849906921387, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.3333849906921387, "logits_per_char": -0.6666924953460693, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 57, "native_id": 57, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5041191577911377, "incorrect_loss_raw": 1.3625326951344807, "correct_loss_per_char": 0.7520595788955688, "incorrect_loss_per_char": 0.6812663475672404, "correct_loss_per_token": 1.5041191577911377, "incorrect_loss_per_token": 1.3625326951344807, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4162843227386475, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4162843227386475, "logits_per_char": -0.7081421613693237, "num_chars": 2}, {"sum_logits": -1.4237165451049805, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.4237165451049805, "logits_per_char": -0.7118582725524902, "num_chars": 2}, {"sum_logits": -1.5041191577911377, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": false, "logits_per_token": -1.5041191577911377, "logits_per_char": -0.7520595788955688, "num_chars": 2}, {"sum_logits": -1.2475972175598145, "num_tokens": 1, "num_tokens_all": 340, "is_greedy": true, "logits_per_token": -1.2475972175598145, "logits_per_char": -0.6237986087799072, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 58, "native_id": 58, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3964601755142212, "incorrect_loss_raw": 1.395130713780721, "correct_loss_per_char": 0.6982300877571106, "incorrect_loss_per_char": 0.6975653568903605, "correct_loss_per_token": 1.3964601755142212, "incorrect_loss_per_token": 1.395130713780721, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.421704649925232, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.421704649925232, "logits_per_char": -0.710852324962616, "num_chars": 2}, {"sum_logits": -1.494692087173462, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.494692087173462, "logits_per_char": -0.747346043586731, "num_chars": 2}, {"sum_logits": -1.2689954042434692, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.2689954042434692, "logits_per_char": -0.6344977021217346, "num_chars": 2}, {"sum_logits": -1.3964601755142212, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3964601755142212, "logits_per_char": -0.6982300877571106, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 59, "native_id": 59, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2905855178833008, "incorrect_loss_raw": 1.4309311707814534, "correct_loss_per_char": 0.6452927589416504, "incorrect_loss_per_char": 0.7154655853907267, "correct_loss_per_token": 1.2905855178833008, "incorrect_loss_per_token": 1.4309311707814534, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4182418584823608, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4182418584823608, "logits_per_char": -0.7091209292411804, "num_chars": 2}, {"sum_logits": -1.3941551446914673, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3941551446914673, "logits_per_char": -0.6970775723457336, "num_chars": 2}, {"sum_logits": -1.4803965091705322, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4803965091705322, "logits_per_char": -0.7401982545852661, "num_chars": 2}, {"sum_logits": -1.2905855178833008, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.2905855178833008, "logits_per_char": -0.6452927589416504, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 60, "native_id": 60, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.302152156829834, "incorrect_loss_raw": 1.424132506052653, "correct_loss_per_char": 0.651076078414917, "incorrect_loss_per_char": 0.7120662530263265, "correct_loss_per_token": 1.302152156829834, "incorrect_loss_per_token": 1.424132506052653, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4007667303085327, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4007667303085327, "logits_per_char": -0.7003833651542664, "num_chars": 2}, {"sum_logits": -1.4317498207092285, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4317498207092285, "logits_per_char": -0.7158749103546143, "num_chars": 2}, {"sum_logits": -1.4398809671401978, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": false, "logits_per_token": -1.4398809671401978, "logits_per_char": -0.7199404835700989, "num_chars": 2}, {"sum_logits": -1.302152156829834, "num_tokens": 1, "num_tokens_all": 337, "is_greedy": true, "logits_per_token": -1.302152156829834, "logits_per_char": -0.651076078414917, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 61, "native_id": 61, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4312598705291748, "incorrect_loss_raw": 1.3951601187388103, "correct_loss_per_char": 0.7156299352645874, "incorrect_loss_per_char": 0.6975800593694051, "correct_loss_per_token": 1.4312598705291748, "incorrect_loss_per_token": 1.3951601187388103, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5593225955963135, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.5593225955963135, "logits_per_char": -0.7796612977981567, "num_chars": 2}, {"sum_logits": -1.4855937957763672, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4855937957763672, "logits_per_char": -0.7427968978881836, "num_chars": 2}, {"sum_logits": -1.4312598705291748, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4312598705291748, "logits_per_char": -0.7156299352645874, "num_chars": 2}, {"sum_logits": -1.14056396484375, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.14056396484375, "logits_per_char": -0.570281982421875, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 62, "native_id": 62, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3671926259994507, "incorrect_loss_raw": 1.4011414845784504, "correct_loss_per_char": 0.6835963129997253, "incorrect_loss_per_char": 0.7005707422892252, "correct_loss_per_token": 1.3671926259994507, "incorrect_loss_per_token": 1.4011414845784504, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4609004259109497, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4609004259109497, "logits_per_char": -0.7304502129554749, "num_chars": 2}, {"sum_logits": -1.3671926259994507, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3671926259994507, "logits_per_char": -0.6835963129997253, "num_chars": 2}, {"sum_logits": -1.4255353212356567, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4255353212356567, "logits_per_char": -0.7127676606178284, "num_chars": 2}, {"sum_logits": -1.3169887065887451, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.3169887065887451, "logits_per_char": -0.6584943532943726, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 63, "native_id": 63, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5851492881774902, "incorrect_loss_raw": 1.3395481904347737, "correct_loss_per_char": 0.7925746440887451, "incorrect_loss_per_char": 0.6697740952173868, "correct_loss_per_token": 1.5851492881774902, "incorrect_loss_per_token": 1.3395481904347737, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2407119274139404, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.2407119274139404, "logits_per_char": -0.6203559637069702, "num_chars": 2}, {"sum_logits": -1.4369733333587646, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4369733333587646, "logits_per_char": -0.7184866666793823, "num_chars": 2}, {"sum_logits": -1.5851492881774902, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.5851492881774902, "logits_per_char": -0.7925746440887451, "num_chars": 2}, {"sum_logits": -1.3409593105316162, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3409593105316162, "logits_per_char": -0.6704796552658081, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 64, "native_id": 64, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.374846339225769, "incorrect_loss_raw": 1.3995441198349, "correct_loss_per_char": 0.6874231696128845, "incorrect_loss_per_char": 0.69977205991745, "correct_loss_per_token": 1.374846339225769, "incorrect_loss_per_token": 1.3995441198349, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4495353698730469, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4495353698730469, "logits_per_char": -0.7247676849365234, "num_chars": 2}, {"sum_logits": -1.4472920894622803, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.4472920894622803, "logits_per_char": -0.7236460447311401, "num_chars": 2}, {"sum_logits": -1.374846339225769, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": false, "logits_per_token": -1.374846339225769, "logits_per_char": -0.6874231696128845, "num_chars": 2}, {"sum_logits": -1.3018049001693726, "num_tokens": 1, "num_tokens_all": 361, "is_greedy": true, "logits_per_token": -1.3018049001693726, "logits_per_char": -0.6509024500846863, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 65, "native_id": 65, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.340667963027954, "incorrect_loss_raw": 1.4150243997573853, "correct_loss_per_char": 0.670333981513977, "incorrect_loss_per_char": 0.7075121998786926, "correct_loss_per_token": 1.340667963027954, "incorrect_loss_per_token": 1.4150243997573853, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5200755596160889, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.5200755596160889, "logits_per_char": -0.7600377798080444, "num_chars": 2}, {"sum_logits": -1.340667963027954, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": true, "logits_per_token": -1.340667963027954, "logits_per_char": -0.670333981513977, "num_chars": 2}, {"sum_logits": -1.3750029802322388, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3750029802322388, "logits_per_char": -0.6875014901161194, "num_chars": 2}, {"sum_logits": -1.3499946594238281, "num_tokens": 1, "num_tokens_all": 375, "is_greedy": false, "logits_per_token": -1.3499946594238281, "logits_per_char": -0.6749973297119141, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 66, "native_id": 66, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2796448469161987, "incorrect_loss_raw": 1.4343043963114421, "correct_loss_per_char": 0.6398224234580994, "incorrect_loss_per_char": 0.7171521981557211, "correct_loss_per_token": 1.2796448469161987, "incorrect_loss_per_token": 1.4343043963114421, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4382573366165161, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4382573366165161, "logits_per_char": -0.7191286683082581, "num_chars": 2}, {"sum_logits": -1.4688067436218262, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.4688067436218262, "logits_per_char": -0.7344033718109131, "num_chars": 2}, {"sum_logits": -1.3958491086959839, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": false, "logits_per_token": -1.3958491086959839, "logits_per_char": -0.6979245543479919, "num_chars": 2}, {"sum_logits": -1.2796448469161987, "num_tokens": 1, "num_tokens_all": 354, "is_greedy": true, "logits_per_token": -1.2796448469161987, "logits_per_char": -0.6398224234580994, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 67, "native_id": 67, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4068788290023804, "incorrect_loss_raw": 1.3889549175898235, "correct_loss_per_char": 0.7034394145011902, "incorrect_loss_per_char": 0.6944774587949117, "correct_loss_per_token": 1.4068788290023804, "incorrect_loss_per_token": 1.3889549175898235, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4139437675476074, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4139437675476074, "logits_per_char": -0.7069718837738037, "num_chars": 2}, {"sum_logits": -1.3312739133834839, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.3312739133834839, "logits_per_char": -0.6656369566917419, "num_chars": 2}, {"sum_logits": -1.4068788290023804, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4068788290023804, "logits_per_char": -0.7034394145011902, "num_chars": 2}, {"sum_logits": -1.421647071838379, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.421647071838379, "logits_per_char": -0.7108235359191895, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 68, "native_id": 68, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3937675952911377, "incorrect_loss_raw": 1.396345814069112, "correct_loss_per_char": 0.6968837976455688, "incorrect_loss_per_char": 0.698172907034556, "correct_loss_per_token": 1.3937675952911377, "incorrect_loss_per_token": 1.396345814069112, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3327804803848267, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3327804803848267, "logits_per_char": -0.6663902401924133, "num_chars": 2}, {"sum_logits": -1.322502613067627, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.322502613067627, "logits_per_char": -0.6612513065338135, "num_chars": 2}, {"sum_logits": -1.5337543487548828, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.5337543487548828, "logits_per_char": -0.7668771743774414, "num_chars": 2}, {"sum_logits": -1.3937675952911377, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3937675952911377, "logits_per_char": -0.6968837976455688, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 69, "native_id": 69, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4348045587539673, "incorrect_loss_raw": 1.399053692817688, "correct_loss_per_char": 0.7174022793769836, "incorrect_loss_per_char": 0.699526846408844, "correct_loss_per_token": 1.4348045587539673, "incorrect_loss_per_token": 1.399053692817688, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.561347246170044, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.561347246170044, "logits_per_char": -0.780673623085022, "num_chars": 2}, {"sum_logits": -1.4348045587539673, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.4348045587539673, "logits_per_char": -0.7174022793769836, "num_chars": 2}, {"sum_logits": -1.5114295482635498, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": false, "logits_per_token": -1.5114295482635498, "logits_per_char": -0.7557147741317749, "num_chars": 2}, {"sum_logits": -1.1243842840194702, "num_tokens": 1, "num_tokens_all": 377, "is_greedy": true, "logits_per_token": -1.1243842840194702, "logits_per_char": -0.5621921420097351, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 70, "native_id": 70, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3193047046661377, "incorrect_loss_raw": 1.4204453627268474, "correct_loss_per_char": 0.6596523523330688, "incorrect_loss_per_char": 0.7102226813634237, "correct_loss_per_token": 1.3193047046661377, "incorrect_loss_per_token": 1.4204453627268474, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5459351539611816, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.5459351539611816, "logits_per_char": -0.7729675769805908, "num_chars": 2}, {"sum_logits": -1.3193047046661377, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": true, "logits_per_token": -1.3193047046661377, "logits_per_char": -0.6596523523330688, "num_chars": 2}, {"sum_logits": -1.3227715492248535, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.3227715492248535, "logits_per_char": -0.6613857746124268, "num_chars": 2}, {"sum_logits": -1.3926293849945068, "num_tokens": 1, "num_tokens_all": 396, "is_greedy": false, "logits_per_token": -1.3926293849945068, "logits_per_char": -0.6963146924972534, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 71, "native_id": 71, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4539293050765991, "incorrect_loss_raw": 1.3747801383336384, "correct_loss_per_char": 0.7269646525382996, "incorrect_loss_per_char": 0.6873900691668192, "correct_loss_per_token": 1.4539293050765991, "incorrect_loss_per_token": 1.3747801383336384, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4539293050765991, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4539293050765991, "logits_per_char": -0.7269646525382996, "num_chars": 2}, {"sum_logits": -1.3482908010482788, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3482908010482788, "logits_per_char": -0.6741454005241394, "num_chars": 2}, {"sum_logits": -1.4563688039779663, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4563688039779663, "logits_per_char": -0.7281844019889832, "num_chars": 2}, {"sum_logits": -1.3196808099746704, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.3196808099746704, "logits_per_char": -0.6598404049873352, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 72, "native_id": 72, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.254641056060791, "incorrect_loss_raw": 1.4426615238189697, "correct_loss_per_char": 0.6273205280303955, "incorrect_loss_per_char": 0.7213307619094849, "correct_loss_per_token": 1.254641056060791, "incorrect_loss_per_token": 1.4426615238189697, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.413939118385315, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.413939118385315, "logits_per_char": -0.7069695591926575, "num_chars": 2}, {"sum_logits": -1.4840360879898071, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.4840360879898071, "logits_per_char": -0.7420180439949036, "num_chars": 2}, {"sum_logits": -1.430009365081787, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.430009365081787, "logits_per_char": -0.7150046825408936, "num_chars": 2}, {"sum_logits": -1.254641056060791, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": true, "logits_per_token": -1.254641056060791, "logits_per_char": -0.6273205280303955, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 73, "native_id": 73, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4002169370651245, "incorrect_loss_raw": 1.3939930200576782, "correct_loss_per_char": 0.7001084685325623, "incorrect_loss_per_char": 0.6969965100288391, "correct_loss_per_token": 1.4002169370651245, "incorrect_loss_per_token": 1.3939930200576782, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4002169370651245, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.4002169370651245, "logits_per_char": -0.7001084685325623, "num_chars": 2}, {"sum_logits": -1.2965366840362549, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": true, "logits_per_token": -1.2965366840362549, "logits_per_char": -0.6482683420181274, "num_chars": 2}, {"sum_logits": -1.4890124797821045, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.4890124797821045, "logits_per_char": -0.7445062398910522, "num_chars": 2}, {"sum_logits": -1.3964298963546753, "num_tokens": 1, "num_tokens_all": 399, "is_greedy": false, "logits_per_token": -1.3964298963546753, "logits_per_char": -0.6982149481773376, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 74, "native_id": 74, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.257271409034729, "incorrect_loss_raw": 1.4439935684204102, "correct_loss_per_char": 0.6286357045173645, "incorrect_loss_per_char": 0.7219967842102051, "correct_loss_per_token": 1.257271409034729, "incorrect_loss_per_token": 1.4439935684204102, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4870247840881348, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4870247840881348, "logits_per_char": -0.7435123920440674, "num_chars": 2}, {"sum_logits": -1.3913884162902832, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.3913884162902832, "logits_per_char": -0.6956942081451416, "num_chars": 2}, {"sum_logits": -1.4535675048828125, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": false, "logits_per_token": -1.4535675048828125, "logits_per_char": -0.7267837524414062, "num_chars": 2}, {"sum_logits": -1.257271409034729, "num_tokens": 1, "num_tokens_all": 357, "is_greedy": true, "logits_per_token": -1.257271409034729, "logits_per_char": -0.6286357045173645, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 75, "native_id": 75, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4755953550338745, "incorrect_loss_raw": 1.3689340353012085, "correct_loss_per_char": 0.7377976775169373, "incorrect_loss_per_char": 0.6844670176506042, "correct_loss_per_token": 1.4755953550338745, "incorrect_loss_per_token": 1.3689340353012085, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.350499153137207, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.350499153137207, "logits_per_char": -0.6752495765686035, "num_chars": 2}, {"sum_logits": -1.4063252210617065, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4063252210617065, "logits_per_char": -0.7031626105308533, "num_chars": 2}, {"sum_logits": -1.4755953550338745, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.4755953550338745, "logits_per_char": -0.7377976775169373, "num_chars": 2}, {"sum_logits": -1.349977731704712, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.349977731704712, "logits_per_char": -0.674988865852356, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 76, "native_id": 76, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4085084199905396, "incorrect_loss_raw": 1.3871545791625977, "correct_loss_per_char": 0.7042542099952698, "incorrect_loss_per_char": 0.6935772895812988, "correct_loss_per_token": 1.4085084199905396, "incorrect_loss_per_token": 1.3871545791625977, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4085084199905396, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4085084199905396, "logits_per_char": -0.7042542099952698, "num_chars": 2}, {"sum_logits": -1.434197187423706, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.434197187423706, "logits_per_char": -0.717098593711853, "num_chars": 2}, {"sum_logits": -1.415527105331421, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.415527105331421, "logits_per_char": -0.7077635526657104, "num_chars": 2}, {"sum_logits": -1.311739444732666, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.311739444732666, "logits_per_char": -0.655869722366333, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 77, "native_id": 77, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3950653076171875, "incorrect_loss_raw": 1.403741717338562, "correct_loss_per_char": 0.6975326538085938, "incorrect_loss_per_char": 0.701870858669281, "correct_loss_per_token": 1.3950653076171875, "incorrect_loss_per_token": 1.403741717338562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3950653076171875, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.3950653076171875, "logits_per_char": -0.6975326538085938, "num_chars": 2}, {"sum_logits": -1.4594858884811401, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.4594858884811401, "logits_per_char": -0.7297429442405701, "num_chars": 2}, {"sum_logits": -1.5160008668899536, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": false, "logits_per_token": -1.5160008668899536, "logits_per_char": -0.7580004334449768, "num_chars": 2}, {"sum_logits": -1.2357383966445923, "num_tokens": 1, "num_tokens_all": 419, "is_greedy": true, "logits_per_token": -1.2357383966445923, "logits_per_char": -0.6178691983222961, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 78, "native_id": 78, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4428610801696777, "incorrect_loss_raw": 1.3861937522888184, "correct_loss_per_char": 0.7214305400848389, "incorrect_loss_per_char": 0.6930968761444092, "correct_loss_per_token": 1.4428610801696777, "incorrect_loss_per_token": 1.3861937522888184, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5019128322601318, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.5019128322601318, "logits_per_char": -0.7509564161300659, "num_chars": 2}, {"sum_logits": -1.422453761100769, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.422453761100769, "logits_per_char": -0.7112268805503845, "num_chars": 2}, {"sum_logits": -1.4428610801696777, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.4428610801696777, "logits_per_char": -0.7214305400848389, "num_chars": 2}, {"sum_logits": -1.2342146635055542, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.2342146635055542, "logits_per_char": -0.6171073317527771, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 79, "native_id": 79, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3610156774520874, "incorrect_loss_raw": 1.4066529671351116, "correct_loss_per_char": 0.6805078387260437, "incorrect_loss_per_char": 0.7033264835675558, "correct_loss_per_token": 1.3610156774520874, "incorrect_loss_per_token": 1.4066529671351116, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5225169658660889, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.5225169658660889, "logits_per_char": -0.7612584829330444, "num_chars": 2}, {"sum_logits": -1.3947315216064453, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3947315216064453, "logits_per_char": -0.6973657608032227, "num_chars": 2}, {"sum_logits": -1.3027104139328003, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": true, "logits_per_token": -1.3027104139328003, "logits_per_char": -0.6513552069664001, "num_chars": 2}, {"sum_logits": -1.3610156774520874, "num_tokens": 1, "num_tokens_all": 366, "is_greedy": false, "logits_per_token": -1.3610156774520874, "logits_per_char": -0.6805078387260437, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 80, "native_id": 80, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5300803184509277, "incorrect_loss_raw": 1.362587332725525, "correct_loss_per_char": 0.7650401592254639, "incorrect_loss_per_char": 0.6812936663627625, "correct_loss_per_token": 1.5300803184509277, "incorrect_loss_per_token": 1.362587332725525, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2259231805801392, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.2259231805801392, "logits_per_char": -0.6129615902900696, "num_chars": 2}, {"sum_logits": -1.5300803184509277, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.5300803184509277, "logits_per_char": -0.7650401592254639, "num_chars": 2}, {"sum_logits": -1.5111362934112549, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.5111362934112549, "logits_per_char": -0.7555681467056274, "num_chars": 2}, {"sum_logits": -1.3507025241851807, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3507025241851807, "logits_per_char": -0.6753512620925903, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 81, "native_id": 81, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4773914813995361, "incorrect_loss_raw": 1.3706538279851277, "correct_loss_per_char": 0.7386957406997681, "incorrect_loss_per_char": 0.6853269139925638, "correct_loss_per_token": 1.4773914813995361, "incorrect_loss_per_token": 1.3706538279851277, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4844069480895996, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4844069480895996, "logits_per_char": -0.7422034740447998, "num_chars": 2}, {"sum_logits": -1.4773914813995361, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4773914813995361, "logits_per_char": -0.7386957406997681, "num_chars": 2}, {"sum_logits": -1.4014756679534912, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4014756679534912, "logits_per_char": -0.7007378339767456, "num_chars": 2}, {"sum_logits": -1.2260788679122925, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.2260788679122925, "logits_per_char": -0.6130394339561462, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 82, "native_id": 82, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.381076693534851, "incorrect_loss_raw": 1.3963967164357503, "correct_loss_per_char": 0.6905383467674255, "incorrect_loss_per_char": 0.6981983582178751, "correct_loss_per_token": 1.381076693534851, "incorrect_loss_per_token": 1.3963967164357503, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4200353622436523, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4200353622436523, "logits_per_char": -0.7100176811218262, "num_chars": 2}, {"sum_logits": -1.4258406162261963, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4258406162261963, "logits_per_char": -0.7129203081130981, "num_chars": 2}, {"sum_logits": -1.381076693534851, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.381076693534851, "logits_per_char": -0.6905383467674255, "num_chars": 2}, {"sum_logits": -1.3433141708374023, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.3433141708374023, "logits_per_char": -0.6716570854187012, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 83, "native_id": 83, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4181771278381348, "incorrect_loss_raw": 1.3899809122085571, "correct_loss_per_char": 0.7090885639190674, "incorrect_loss_per_char": 0.6949904561042786, "correct_loss_per_token": 1.4181771278381348, "incorrect_loss_per_token": 1.3899809122085571, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5328834056854248, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.5328834056854248, "logits_per_char": -0.7664417028427124, "num_chars": 2}, {"sum_logits": -1.4181771278381348, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.4181771278381348, "logits_per_char": -0.7090885639190674, "num_chars": 2}, {"sum_logits": -1.3570555448532104, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.3570555448532104, "logits_per_char": -0.6785277724266052, "num_chars": 2}, {"sum_logits": -1.2800037860870361, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.2800037860870361, "logits_per_char": -0.6400018930435181, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 84, "native_id": 84, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.384613037109375, "incorrect_loss_raw": 1.3981503248214722, "correct_loss_per_char": 0.6923065185546875, "incorrect_loss_per_char": 0.6990751624107361, "correct_loss_per_token": 1.384613037109375, "incorrect_loss_per_token": 1.3981503248214722, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4857311248779297, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.4857311248779297, "logits_per_char": -0.7428655624389648, "num_chars": 2}, {"sum_logits": -1.384613037109375, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.384613037109375, "logits_per_char": -0.6923065185546875, "num_chars": 2}, {"sum_logits": -1.3361790180206299, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.3361790180206299, "logits_per_char": -0.6680895090103149, "num_chars": 2}, {"sum_logits": -1.372540831565857, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.372540831565857, "logits_per_char": -0.6862704157829285, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 85, "native_id": 85, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3732819557189941, "incorrect_loss_raw": 1.4022226730982463, "correct_loss_per_char": 0.6866409778594971, "incorrect_loss_per_char": 0.7011113365491232, "correct_loss_per_token": 1.3732819557189941, "incorrect_loss_per_token": 1.4022226730982463, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.525125503540039, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.525125503540039, "logits_per_char": -0.7625627517700195, "num_chars": 2}, {"sum_logits": -1.3740441799163818, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3740441799163818, "logits_per_char": -0.6870220899581909, "num_chars": 2}, {"sum_logits": -1.3732819557189941, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3732819557189941, "logits_per_char": -0.6866409778594971, "num_chars": 2}, {"sum_logits": -1.3074983358383179, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.3074983358383179, "logits_per_char": -0.6537491679191589, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 86, "native_id": 86, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5560740232467651, "incorrect_loss_raw": 1.3495208024978638, "correct_loss_per_char": 0.7780370116233826, "incorrect_loss_per_char": 0.6747604012489319, "correct_loss_per_token": 1.5560740232467651, "incorrect_loss_per_token": 1.3495208024978638, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4646544456481934, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.4646544456481934, "logits_per_char": -0.7323272228240967, "num_chars": 2}, {"sum_logits": -1.281211495399475, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": true, "logits_per_token": -1.281211495399475, "logits_per_char": -0.6406057476997375, "num_chars": 2}, {"sum_logits": -1.5560740232467651, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.5560740232467651, "logits_per_char": -0.7780370116233826, "num_chars": 2}, {"sum_logits": -1.3026964664459229, "num_tokens": 1, "num_tokens_all": 395, "is_greedy": false, "logits_per_token": -1.3026964664459229, "logits_per_char": -0.6513482332229614, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 87, "native_id": 87, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4430649280548096, "incorrect_loss_raw": 1.3754634062449138, "correct_loss_per_char": 0.7215324640274048, "incorrect_loss_per_char": 0.6877317031224569, "correct_loss_per_token": 1.4430649280548096, "incorrect_loss_per_token": 1.3754634062449138, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3798530101776123, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.3798530101776123, "logits_per_char": -0.6899265050888062, "num_chars": 2}, {"sum_logits": -1.4145967960357666, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4145967960357666, "logits_per_char": -0.7072983980178833, "num_chars": 2}, {"sum_logits": -1.4430649280548096, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.4430649280548096, "logits_per_char": -0.7215324640274048, "num_chars": 2}, {"sum_logits": -1.3319404125213623, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.3319404125213623, "logits_per_char": -0.6659702062606812, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 88, "native_id": 88, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3409428596496582, "incorrect_loss_raw": 1.4166302680969238, "correct_loss_per_char": 0.6704714298248291, "incorrect_loss_per_char": 0.7083151340484619, "correct_loss_per_token": 1.3409428596496582, "incorrect_loss_per_token": 1.4166302680969238, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5632479190826416, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.5632479190826416, "logits_per_char": -0.7816239595413208, "num_chars": 2}, {"sum_logits": -1.4071846008300781, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.4071846008300781, "logits_per_char": -0.7035923004150391, "num_chars": 2}, {"sum_logits": -1.3409428596496582, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": false, "logits_per_token": -1.3409428596496582, "logits_per_char": -0.6704714298248291, "num_chars": 2}, {"sum_logits": -1.2794582843780518, "num_tokens": 1, "num_tokens_all": 367, "is_greedy": true, "logits_per_token": -1.2794582843780518, "logits_per_char": -0.6397291421890259, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 89, "native_id": 89, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5363613367080688, "incorrect_loss_raw": 1.349589228630066, "correct_loss_per_char": 0.7681806683540344, "incorrect_loss_per_char": 0.674794614315033, "correct_loss_per_token": 1.5363613367080688, "incorrect_loss_per_token": 1.349589228630066, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5363613367080688, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.5363613367080688, "logits_per_char": -0.7681806683540344, "num_chars": 2}, {"sum_logits": -1.3562228679656982, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.3562228679656982, "logits_per_char": -0.6781114339828491, "num_chars": 2}, {"sum_logits": -1.3163365125656128, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -1.3163365125656128, "logits_per_char": -0.6581682562828064, "num_chars": 2}, {"sum_logits": -1.3762083053588867, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.3762083053588867, "logits_per_char": -0.6881041526794434, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 90, "native_id": 90, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3104743957519531, "incorrect_loss_raw": 1.4228119055430095, "correct_loss_per_char": 0.6552371978759766, "incorrect_loss_per_char": 0.7114059527715048, "correct_loss_per_token": 1.3104743957519531, "incorrect_loss_per_token": 1.4228119055430095, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3818178176879883, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.3818178176879883, "logits_per_char": -0.6909089088439941, "num_chars": 2}, {"sum_logits": -1.3812520503997803, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.3812520503997803, "logits_per_char": -0.6906260251998901, "num_chars": 2}, {"sum_logits": -1.5053658485412598, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.5053658485412598, "logits_per_char": -0.7526829242706299, "num_chars": 2}, {"sum_logits": -1.3104743957519531, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.3104743957519531, "logits_per_char": -0.6552371978759766, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 91, "native_id": 91, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4354448318481445, "incorrect_loss_raw": 1.3936253388722737, "correct_loss_per_char": 0.7177224159240723, "incorrect_loss_per_char": 0.6968126694361368, "correct_loss_per_token": 1.4354448318481445, "incorrect_loss_per_token": 1.3936253388722737, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6251407861709595, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.6251407861709595, "logits_per_char": -0.8125703930854797, "num_chars": 2}, {"sum_logits": -1.4354448318481445, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.4354448318481445, "logits_per_char": -0.7177224159240723, "num_chars": 2}, {"sum_logits": -1.3810405731201172, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": false, "logits_per_token": -1.3810405731201172, "logits_per_char": -0.6905202865600586, "num_chars": 2}, {"sum_logits": -1.1746946573257446, "num_tokens": 1, "num_tokens_all": 413, "is_greedy": true, "logits_per_token": -1.1746946573257446, "logits_per_char": -0.5873473286628723, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 92, "native_id": 92, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3720020055770874, "incorrect_loss_raw": 1.3998610973358154, "correct_loss_per_char": 0.6860010027885437, "incorrect_loss_per_char": 0.6999305486679077, "correct_loss_per_token": 1.3720020055770874, "incorrect_loss_per_token": 1.3998610973358154, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3721803426742554, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3721803426742554, "logits_per_char": -0.6860901713371277, "num_chars": 2}, {"sum_logits": -1.3720020055770874, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3720020055770874, "logits_per_char": -0.6860010027885437, "num_chars": 2}, {"sum_logits": -1.469805359840393, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.469805359840393, "logits_per_char": -0.7349026799201965, "num_chars": 2}, {"sum_logits": -1.3575975894927979, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.3575975894927979, "logits_per_char": -0.6787987947463989, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 93, "native_id": 93, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.376626968383789, "incorrect_loss_raw": 1.3986618916193645, "correct_loss_per_char": 0.6883134841918945, "incorrect_loss_per_char": 0.6993309458096822, "correct_loss_per_token": 1.376626968383789, "incorrect_loss_per_token": 1.3986618916193645, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4904141426086426, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4904141426086426, "logits_per_char": -0.7452070713043213, "num_chars": 2}, {"sum_logits": -1.4159488677978516, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.4159488677978516, "logits_per_char": -0.7079744338989258, "num_chars": 2}, {"sum_logits": -1.2896226644515991, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": true, "logits_per_token": -1.2896226644515991, "logits_per_char": -0.6448113322257996, "num_chars": 2}, {"sum_logits": -1.376626968383789, "num_tokens": 1, "num_tokens_all": 360, "is_greedy": false, "logits_per_token": -1.376626968383789, "logits_per_char": -0.6883134841918945, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 94, "native_id": 94, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4370901584625244, "incorrect_loss_raw": 1.3807520071665447, "correct_loss_per_char": 0.7185450792312622, "incorrect_loss_per_char": 0.6903760035832723, "correct_loss_per_token": 1.4370901584625244, "incorrect_loss_per_token": 1.3807520071665447, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4370901584625244, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4370901584625244, "logits_per_char": -0.7185450792312622, "num_chars": 2}, {"sum_logits": -1.5022492408752441, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.5022492408752441, "logits_per_char": -0.7511246204376221, "num_chars": 2}, {"sum_logits": -1.3253902196884155, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.3253902196884155, "logits_per_char": -0.6626951098442078, "num_chars": 2}, {"sum_logits": -1.3146165609359741, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.3146165609359741, "logits_per_char": -0.6573082804679871, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 95, "native_id": 95, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4175785779953003, "incorrect_loss_raw": 1.3838114341100056, "correct_loss_per_char": 0.7087892889976501, "incorrect_loss_per_char": 0.6919057170550028, "correct_loss_per_token": 1.4175785779953003, "incorrect_loss_per_token": 1.3838114341100056, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4013547897338867, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4013547897338867, "logits_per_char": -0.7006773948669434, "num_chars": 2}, {"sum_logits": -1.361914038658142, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": true, "logits_per_token": -1.361914038658142, "logits_per_char": -0.680957019329071, "num_chars": 2}, {"sum_logits": -1.4175785779953003, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.4175785779953003, "logits_per_char": -0.7087892889976501, "num_chars": 2}, {"sum_logits": -1.3881654739379883, "num_tokens": 1, "num_tokens_all": 349, "is_greedy": false, "logits_per_token": -1.3881654739379883, "logits_per_char": -0.6940827369689941, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 96, "native_id": 96, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4709925651550293, "incorrect_loss_raw": 1.3706598281860352, "correct_loss_per_char": 0.7354962825775146, "incorrect_loss_per_char": 0.6853299140930176, "correct_loss_per_token": 1.4709925651550293, "incorrect_loss_per_token": 1.3706598281860352, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4709925651550293, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4709925651550293, "logits_per_char": -0.7354962825775146, "num_chars": 2}, {"sum_logits": -1.4650468826293945, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4650468826293945, "logits_per_char": -0.7325234413146973, "num_chars": 2}, {"sum_logits": -1.3989338874816895, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.3989338874816895, "logits_per_char": -0.6994669437408447, "num_chars": 2}, {"sum_logits": -1.2479987144470215, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.2479987144470215, "logits_per_char": -0.6239993572235107, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 97, "native_id": 97, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4642078876495361, "incorrect_loss_raw": 1.3742584784825642, "correct_loss_per_char": 0.7321039438247681, "incorrect_loss_per_char": 0.6871292392412821, "correct_loss_per_token": 1.4642078876495361, "incorrect_loss_per_token": 1.3742584784825642, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5092365741729736, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.5092365741729736, "logits_per_char": -0.7546182870864868, "num_chars": 2}, {"sum_logits": -1.3441048860549927, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.3441048860549927, "logits_per_char": -0.6720524430274963, "num_chars": 2}, {"sum_logits": -1.4642078876495361, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.4642078876495361, "logits_per_char": -0.7321039438247681, "num_chars": 2}, {"sum_logits": -1.2694339752197266, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.2694339752197266, "logits_per_char": -0.6347169876098633, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 98, "native_id": 98, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2874724864959717, "incorrect_loss_raw": 1.4313629070917766, "correct_loss_per_char": 0.6437362432479858, "incorrect_loss_per_char": 0.7156814535458883, "correct_loss_per_token": 1.2874724864959717, "incorrect_loss_per_token": 1.4313629070917766, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2874724864959717, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": true, "logits_per_token": -1.2874724864959717, "logits_per_char": -0.6437362432479858, "num_chars": 2}, {"sum_logits": -1.3930150270462036, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.3930150270462036, "logits_per_char": -0.6965075135231018, "num_chars": 2}, {"sum_logits": -1.4767061471939087, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4767061471939087, "logits_per_char": -0.7383530735969543, "num_chars": 2}, {"sum_logits": -1.4243675470352173, "num_tokens": 1, "num_tokens_all": 343, "is_greedy": false, "logits_per_token": -1.4243675470352173, "logits_per_char": -0.7121837735176086, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 99, "native_id": 99, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4580116271972656, "incorrect_loss_raw": 1.3819926579793294, "correct_loss_per_char": 0.7290058135986328, "incorrect_loss_per_char": 0.6909963289896647, "correct_loss_per_token": 1.4580116271972656, "incorrect_loss_per_token": 1.3819926579793294, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.412323236465454, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.412323236465454, "logits_per_char": -0.706161618232727, "num_chars": 2}, {"sum_logits": -1.5293962955474854, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.5293962955474854, "logits_per_char": -0.7646981477737427, "num_chars": 2}, {"sum_logits": -1.4580116271972656, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": false, "logits_per_token": -1.4580116271972656, "logits_per_char": -0.7290058135986328, "num_chars": 2}, {"sum_logits": -1.2042584419250488, "num_tokens": 1, "num_tokens_all": 363, "is_greedy": true, "logits_per_token": -1.2042584419250488, "logits_per_char": -0.6021292209625244, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 100, "native_id": 100, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3114559650421143, "incorrect_loss_raw": 1.4217169284820557, "correct_loss_per_char": 0.6557279825210571, "incorrect_loss_per_char": 0.7108584642410278, "correct_loss_per_token": 1.3114559650421143, "incorrect_loss_per_token": 1.4217169284820557, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.543446660041809, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.543446660041809, "logits_per_char": -0.7717233300209045, "num_chars": 2}, {"sum_logits": -1.3849760293960571, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3849760293960571, "logits_per_char": -0.6924880146980286, "num_chars": 2}, {"sum_logits": -1.3367280960083008, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.3367280960083008, "logits_per_char": -0.6683640480041504, "num_chars": 2}, {"sum_logits": -1.3114559650421143, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.3114559650421143, "logits_per_char": -0.6557279825210571, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 101, "native_id": 101, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3538398742675781, "incorrect_loss_raw": 1.404577414194743, "correct_loss_per_char": 0.6769199371337891, "incorrect_loss_per_char": 0.7022887070973715, "correct_loss_per_token": 1.3538398742675781, "incorrect_loss_per_token": 1.404577414194743, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.479928970336914, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.479928970336914, "logits_per_char": -0.739964485168457, "num_chars": 2}, {"sum_logits": -1.390432357788086, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.390432357788086, "logits_per_char": -0.695216178894043, "num_chars": 2}, {"sum_logits": -1.3433709144592285, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": true, "logits_per_token": -1.3433709144592285, "logits_per_char": -0.6716854572296143, "num_chars": 2}, {"sum_logits": -1.3538398742675781, "num_tokens": 1, "num_tokens_all": 372, "is_greedy": false, "logits_per_token": -1.3538398742675781, "logits_per_char": -0.6769199371337891, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 102, "native_id": 102, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4367103576660156, "incorrect_loss_raw": 1.3781170050303142, "correct_loss_per_char": 0.7183551788330078, "incorrect_loss_per_char": 0.6890585025151571, "correct_loss_per_token": 1.4367103576660156, "incorrect_loss_per_token": 1.3781170050303142, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.338394284248352, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": true, "logits_per_token": -1.338394284248352, "logits_per_char": -0.669197142124176, "num_chars": 2}, {"sum_logits": -1.4484442472457886, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4484442472457886, "logits_per_char": -0.7242221236228943, "num_chars": 2}, {"sum_logits": -1.4367103576660156, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.4367103576660156, "logits_per_char": -0.7183551788330078, "num_chars": 2}, {"sum_logits": -1.3475124835968018, "num_tokens": 1, "num_tokens_all": 344, "is_greedy": false, "logits_per_token": -1.3475124835968018, "logits_per_char": -0.6737562417984009, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 103, "native_id": 103, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4508552551269531, "incorrect_loss_raw": 1.3813927968343098, "correct_loss_per_char": 0.7254276275634766, "incorrect_loss_per_char": 0.6906963984171549, "correct_loss_per_token": 1.4508552551269531, "incorrect_loss_per_token": 1.3813927968343098, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5442543029785156, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.5442543029785156, "logits_per_char": -0.7721271514892578, "num_chars": 2}, {"sum_logits": -1.4508552551269531, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.4508552551269531, "logits_per_char": -0.7254276275634766, "num_chars": 2}, {"sum_logits": -1.378764033317566, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": false, "logits_per_token": -1.378764033317566, "logits_per_char": -0.689382016658783, "num_chars": 2}, {"sum_logits": -1.2211600542068481, "num_tokens": 1, "num_tokens_all": 348, "is_greedy": true, "logits_per_token": -1.2211600542068481, "logits_per_char": -0.6105800271034241, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 104, "native_id": 104, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4914686679840088, "incorrect_loss_raw": 1.3600027163823445, "correct_loss_per_char": 0.7457343339920044, "incorrect_loss_per_char": 0.6800013581911722, "correct_loss_per_token": 1.4914686679840088, "incorrect_loss_per_token": 1.3600027163823445, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4914686679840088, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4914686679840088, "logits_per_char": -0.7457343339920044, "num_chars": 2}, {"sum_logits": -1.4168593883514404, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4168593883514404, "logits_per_char": -0.7084296941757202, "num_chars": 2}, {"sum_logits": -1.3654425144195557, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3654425144195557, "logits_per_char": -0.6827212572097778, "num_chars": 2}, {"sum_logits": -1.2977062463760376, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.2977062463760376, "logits_per_char": -0.6488531231880188, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 105, "native_id": 105, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.395822286605835, "incorrect_loss_raw": 1.3955620527267456, "correct_loss_per_char": 0.6979111433029175, "incorrect_loss_per_char": 0.6977810263633728, "correct_loss_per_token": 1.395822286605835, "incorrect_loss_per_token": 1.3955620527267456, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5477468967437744, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.5477468967437744, "logits_per_char": -0.7738734483718872, "num_chars": 2}, {"sum_logits": -1.289015531539917, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": true, "logits_per_token": -1.289015531539917, "logits_per_char": -0.6445077657699585, "num_chars": 2}, {"sum_logits": -1.395822286605835, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.395822286605835, "logits_per_char": -0.6979111433029175, "num_chars": 2}, {"sum_logits": -1.3499237298965454, "num_tokens": 1, "num_tokens_all": 387, "is_greedy": false, "logits_per_token": -1.3499237298965454, "logits_per_char": -0.6749618649482727, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 106, "native_id": 106, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4114506244659424, "incorrect_loss_raw": 1.3874467611312866, "correct_loss_per_char": 0.7057253122329712, "incorrect_loss_per_char": 0.6937233805656433, "correct_loss_per_token": 1.4114506244659424, "incorrect_loss_per_token": 1.3874467611312866, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3872020244598389, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3872020244598389, "logits_per_char": -0.6936010122299194, "num_chars": 2}, {"sum_logits": -1.3772624731063843, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.3772624731063843, "logits_per_char": -0.6886312365531921, "num_chars": 2}, {"sum_logits": -1.4114506244659424, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4114506244659424, "logits_per_char": -0.7057253122329712, "num_chars": 2}, {"sum_logits": -1.3978757858276367, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.3978757858276367, "logits_per_char": -0.6989378929138184, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 107, "native_id": 107, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.344086766242981, "incorrect_loss_raw": 1.4103563229242961, "correct_loss_per_char": 0.6720433831214905, "incorrect_loss_per_char": 0.7051781614621481, "correct_loss_per_token": 1.344086766242981, "incorrect_loss_per_token": 1.4103563229242961, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4658690690994263, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4658690690994263, "logits_per_char": -0.7329345345497131, "num_chars": 2}, {"sum_logits": -1.4605964422225952, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4605964422225952, "logits_per_char": -0.7302982211112976, "num_chars": 2}, {"sum_logits": -1.3046034574508667, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3046034574508667, "logits_per_char": -0.6523017287254333, "num_chars": 2}, {"sum_logits": -1.344086766242981, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.344086766242981, "logits_per_char": -0.6720433831214905, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 108, "native_id": 108, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3475254774093628, "incorrect_loss_raw": 1.4117767810821533, "correct_loss_per_char": 0.6737627387046814, "incorrect_loss_per_char": 0.7058883905410767, "correct_loss_per_token": 1.3475254774093628, "incorrect_loss_per_token": 1.4117767810821533, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3709301948547363, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.3709301948547363, "logits_per_char": -0.6854650974273682, "num_chars": 2}, {"sum_logits": -1.3560113906860352, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.3560113906860352, "logits_per_char": -0.6780056953430176, "num_chars": 2}, {"sum_logits": -1.5083887577056885, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": false, "logits_per_token": -1.5083887577056885, "logits_per_char": -0.7541943788528442, "num_chars": 2}, {"sum_logits": -1.3475254774093628, "num_tokens": 1, "num_tokens_all": 332, "is_greedy": true, "logits_per_token": -1.3475254774093628, "logits_per_char": -0.6737627387046814, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 109, "native_id": 109, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4159106016159058, "incorrect_loss_raw": 1.3864538669586182, "correct_loss_per_char": 0.7079553008079529, "incorrect_loss_per_char": 0.6932269334793091, "correct_loss_per_token": 1.4159106016159058, "incorrect_loss_per_token": 1.3864538669586182, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4159106016159058, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.4159106016159058, "logits_per_char": -0.7079553008079529, "num_chars": 2}, {"sum_logits": -1.3524104356765747, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": true, "logits_per_token": -1.3524104356765747, "logits_per_char": -0.6762052178382874, "num_chars": 2}, {"sum_logits": -1.421365737915039, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.421365737915039, "logits_per_char": -0.7106828689575195, "num_chars": 2}, {"sum_logits": -1.3855854272842407, "num_tokens": 1, "num_tokens_all": 374, "is_greedy": false, "logits_per_token": -1.3855854272842407, "logits_per_char": -0.6927927136421204, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 110, "native_id": 110, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3789061307907104, "incorrect_loss_raw": 1.3987061182657878, "correct_loss_per_char": 0.6894530653953552, "incorrect_loss_per_char": 0.6993530591328939, "correct_loss_per_token": 1.3789061307907104, "incorrect_loss_per_token": 1.3987061182657878, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.450645089149475, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.450645089149475, "logits_per_char": -0.7253225445747375, "num_chars": 2}, {"sum_logits": -1.4184496402740479, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.4184496402740479, "logits_per_char": -0.7092248201370239, "num_chars": 2}, {"sum_logits": -1.3789061307907104, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": false, "logits_per_token": -1.3789061307907104, "logits_per_char": -0.6894530653953552, "num_chars": 2}, {"sum_logits": -1.3270236253738403, "num_tokens": 1, "num_tokens_all": 373, "is_greedy": true, "logits_per_token": -1.3270236253738403, "logits_per_char": -0.6635118126869202, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 111, "native_id": 111, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3137973546981812, "incorrect_loss_raw": 1.4243181546529133, "correct_loss_per_char": 0.6568986773490906, "incorrect_loss_per_char": 0.7121590773264567, "correct_loss_per_token": 1.3137973546981812, "incorrect_loss_per_token": 1.4243181546529133, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5422616004943848, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.5422616004943848, "logits_per_char": -0.7711308002471924, "num_chars": 2}, {"sum_logits": -1.339667797088623, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.339667797088623, "logits_per_char": -0.6698338985443115, "num_chars": 2}, {"sum_logits": -1.3137973546981812, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": true, "logits_per_token": -1.3137973546981812, "logits_per_char": -0.6568986773490906, "num_chars": 2}, {"sum_logits": -1.3910250663757324, "num_tokens": 1, "num_tokens_all": 368, "is_greedy": false, "logits_per_token": -1.3910250663757324, "logits_per_char": -0.6955125331878662, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 112, "native_id": 112, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1661854982376099, "incorrect_loss_raw": 1.483367919921875, "correct_loss_per_char": 0.5830927491188049, "incorrect_loss_per_char": 0.7416839599609375, "correct_loss_per_token": 1.1661854982376099, "incorrect_loss_per_token": 1.483367919921875, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.574921727180481, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.574921727180481, "logits_per_char": -0.7874608635902405, "num_chars": 2}, {"sum_logits": -1.51437509059906, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.51437509059906, "logits_per_char": -0.75718754529953, "num_chars": 2}, {"sum_logits": -1.360806941986084, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.360806941986084, "logits_per_char": -0.680403470993042, "num_chars": 2}, {"sum_logits": -1.1661854982376099, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.1661854982376099, "logits_per_char": -0.5830927491188049, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 113, "native_id": 113, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4735543727874756, "incorrect_loss_raw": 1.3693673610687256, "correct_loss_per_char": 0.7367771863937378, "incorrect_loss_per_char": 0.6846836805343628, "correct_loss_per_token": 1.4735543727874756, "incorrect_loss_per_token": 1.3693673610687256, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4735543727874756, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4735543727874756, "logits_per_char": -0.7367771863937378, "num_chars": 2}, {"sum_logits": -1.3696928024291992, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.3696928024291992, "logits_per_char": -0.6848464012145996, "num_chars": 2}, {"sum_logits": -1.443652629852295, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.443652629852295, "logits_per_char": -0.7218263149261475, "num_chars": 2}, {"sum_logits": -1.2947566509246826, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.2947566509246826, "logits_per_char": -0.6473783254623413, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 114, "native_id": 114, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.57929265499115, "incorrect_loss_raw": 1.3772577047348022, "correct_loss_per_char": 0.789646327495575, "incorrect_loss_per_char": 0.6886288523674011, "correct_loss_per_token": 1.57929265499115, "incorrect_loss_per_token": 1.3772577047348022, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.57929265499115, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.57929265499115, "logits_per_char": -0.789646327495575, "num_chars": 2}, {"sum_logits": -1.6622905731201172, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.6622905731201172, "logits_per_char": -0.8311452865600586, "num_chars": 2}, {"sum_logits": -1.4658534526824951, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": false, "logits_per_token": -1.4658534526824951, "logits_per_char": -0.7329267263412476, "num_chars": 2}, {"sum_logits": -1.0036290884017944, "num_tokens": 1, "num_tokens_all": 356, "is_greedy": true, "logits_per_token": -1.0036290884017944, "logits_per_char": -0.5018145442008972, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 115, "native_id": 115, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4706449508666992, "incorrect_loss_raw": 1.3780476252237956, "correct_loss_per_char": 0.7353224754333496, "incorrect_loss_per_char": 0.6890238126118978, "correct_loss_per_token": 1.4706449508666992, "incorrect_loss_per_token": 1.3780476252237956, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5401611328125, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.5401611328125, "logits_per_char": -0.77008056640625, "num_chars": 2}, {"sum_logits": -1.4706449508666992, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4706449508666992, "logits_per_char": -0.7353224754333496, "num_chars": 2}, {"sum_logits": -1.3403801918029785, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3403801918029785, "logits_per_char": -0.6701900959014893, "num_chars": 2}, {"sum_logits": -1.2536015510559082, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.2536015510559082, "logits_per_char": -0.6268007755279541, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 116, "native_id": 116, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4725896120071411, "incorrect_loss_raw": 1.3721407254536946, "correct_loss_per_char": 0.7362948060035706, "incorrect_loss_per_char": 0.6860703627268473, "correct_loss_per_token": 1.4725896120071411, "incorrect_loss_per_token": 1.3721407254536946, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.287427306175232, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.287427306175232, "logits_per_char": -0.643713653087616, "num_chars": 2}, {"sum_logits": -1.4299776554107666, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4299776554107666, "logits_per_char": -0.7149888277053833, "num_chars": 2}, {"sum_logits": -1.3990172147750854, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.3990172147750854, "logits_per_char": -0.6995086073875427, "num_chars": 2}, {"sum_logits": -1.4725896120071411, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.4725896120071411, "logits_per_char": -0.7362948060035706, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 117, "native_id": 117, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5182386636734009, "incorrect_loss_raw": 1.36325470606486, "correct_loss_per_char": 0.7591193318367004, "incorrect_loss_per_char": 0.68162735303243, "correct_loss_per_token": 1.5182386636734009, "incorrect_loss_per_token": 1.36325470606486, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.543527364730835, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.543527364730835, "logits_per_char": -0.7717636823654175, "num_chars": 2}, {"sum_logits": -1.5182386636734009, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.5182386636734009, "logits_per_char": -0.7591193318367004, "num_chars": 2}, {"sum_logits": -1.310014009475708, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": false, "logits_per_token": -1.310014009475708, "logits_per_char": -0.655007004737854, "num_chars": 2}, {"sum_logits": -1.236222743988037, "num_tokens": 1, "num_tokens_all": 341, "is_greedy": true, "logits_per_token": -1.236222743988037, "logits_per_char": -0.6181113719940186, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 118, "native_id": 118, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.314634084701538, "incorrect_loss_raw": 1.426264762878418, "correct_loss_per_char": 0.657317042350769, "incorrect_loss_per_char": 0.713132381439209, "correct_loss_per_token": 1.314634084701538, "incorrect_loss_per_token": 1.426264762878418, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4951844215393066, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4951844215393066, "logits_per_char": -0.7475922107696533, "num_chars": 2}, {"sum_logits": -1.314634084701538, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.314634084701538, "logits_per_char": -0.657317042350769, "num_chars": 2}, {"sum_logits": -1.4760065078735352, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": false, "logits_per_token": -1.4760065078735352, "logits_per_char": -0.7380032539367676, "num_chars": 2}, {"sum_logits": -1.307603359222412, "num_tokens": 1, "num_tokens_all": 364, "is_greedy": true, "logits_per_token": -1.307603359222412, "logits_per_char": -0.653801679611206, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 119, "native_id": 119, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.359426736831665, "incorrect_loss_raw": 1.405613660812378, "correct_loss_per_char": 0.6797133684158325, "incorrect_loss_per_char": 0.702806830406189, "correct_loss_per_token": 1.359426736831665, "incorrect_loss_per_token": 1.405613660812378, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.394337773323059, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.394337773323059, "logits_per_char": -0.6971688866615295, "num_chars": 2}, {"sum_logits": -1.3556991815567017, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.3556991815567017, "logits_per_char": -0.6778495907783508, "num_chars": 2}, {"sum_logits": -1.466804027557373, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.466804027557373, "logits_per_char": -0.7334020137786865, "num_chars": 2}, {"sum_logits": -1.359426736831665, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.359426736831665, "logits_per_char": -0.6797133684158325, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 120, "native_id": 120, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4104812145233154, "incorrect_loss_raw": 1.389614423116048, "correct_loss_per_char": 0.7052406072616577, "incorrect_loss_per_char": 0.694807211558024, "correct_loss_per_token": 1.4104812145233154, "incorrect_loss_per_token": 1.389614423116048, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4958927631378174, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4958927631378174, "logits_per_char": -0.7479463815689087, "num_chars": 2}, {"sum_logits": -1.3495020866394043, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.3495020866394043, "logits_per_char": -0.6747510433197021, "num_chars": 2}, {"sum_logits": -1.4104812145233154, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": false, "logits_per_token": -1.4104812145233154, "logits_per_char": -0.7052406072616577, "num_chars": 2}, {"sum_logits": -1.3234484195709229, "num_tokens": 1, "num_tokens_all": 331, "is_greedy": true, "logits_per_token": -1.3234484195709229, "logits_per_char": -0.6617242097854614, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 121, "native_id": 121, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5426263809204102, "incorrect_loss_raw": 1.3548596302668254, "correct_loss_per_char": 0.7713131904602051, "incorrect_loss_per_char": 0.6774298151334127, "correct_loss_per_token": 1.5426263809204102, "incorrect_loss_per_token": 1.3548596302668254, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4854682683944702, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4854682683944702, "logits_per_char": -0.7427341341972351, "num_chars": 2}, {"sum_logits": -1.5426263809204102, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.5426263809204102, "logits_per_char": -0.7713131904602051, "num_chars": 2}, {"sum_logits": -1.4015165567398071, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": false, "logits_per_token": -1.4015165567398071, "logits_per_char": -0.7007582783699036, "num_chars": 2}, {"sum_logits": -1.1775940656661987, "num_tokens": 1, "num_tokens_all": 352, "is_greedy": true, "logits_per_token": -1.1775940656661987, "logits_per_char": -0.5887970328330994, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 122, "native_id": 122, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6574840545654297, "incorrect_loss_raw": 1.3257326285044353, "correct_loss_per_char": 0.8287420272827148, "incorrect_loss_per_char": 0.6628663142522176, "correct_loss_per_token": 1.6574840545654297, "incorrect_loss_per_token": 1.3257326285044353, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2140554189682007, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": true, "logits_per_token": -1.2140554189682007, "logits_per_char": -0.6070277094841003, "num_chars": 2}, {"sum_logits": -1.2660763263702393, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.2660763263702393, "logits_per_char": -0.6330381631851196, "num_chars": 2}, {"sum_logits": -1.6574840545654297, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.6574840545654297, "logits_per_char": -0.8287420272827148, "num_chars": 2}, {"sum_logits": -1.4970661401748657, "num_tokens": 1, "num_tokens_all": 339, "is_greedy": false, "logits_per_token": -1.4970661401748657, "logits_per_char": -0.7485330700874329, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 123, "native_id": 123, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3879419565200806, "incorrect_loss_raw": 1.3964499632517497, "correct_loss_per_char": 0.6939709782600403, "incorrect_loss_per_char": 0.6982249816258749, "correct_loss_per_token": 1.3879419565200806, "incorrect_loss_per_token": 1.3964499632517497, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3583554029464722, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": true, "logits_per_token": -1.3583554029464722, "logits_per_char": -0.6791777014732361, "num_chars": 2}, {"sum_logits": -1.3879419565200806, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3879419565200806, "logits_per_char": -0.6939709782600403, "num_chars": 2}, {"sum_logits": -1.4654452800750732, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.4654452800750732, "logits_per_char": -0.7327226400375366, "num_chars": 2}, {"sum_logits": -1.3655492067337036, "num_tokens": 1, "num_tokens_all": 346, "is_greedy": false, "logits_per_token": -1.3655492067337036, "logits_per_char": -0.6827746033668518, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 124, "native_id": 124, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3797414302825928, "incorrect_loss_raw": 1.3977922201156616, "correct_loss_per_char": 0.6898707151412964, "incorrect_loss_per_char": 0.6988961100578308, "correct_loss_per_token": 1.3797414302825928, "incorrect_loss_per_token": 1.3977922201156616, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3251923322677612, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": true, "logits_per_token": -1.3251923322677612, "logits_per_char": -0.6625961661338806, "num_chars": 2}, {"sum_logits": -1.4565564393997192, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4565564393997192, "logits_per_char": -0.7282782196998596, "num_chars": 2}, {"sum_logits": -1.4116278886795044, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.4116278886795044, "logits_per_char": -0.7058139443397522, "num_chars": 2}, {"sum_logits": -1.3797414302825928, "num_tokens": 1, "num_tokens_all": 338, "is_greedy": false, "logits_per_token": -1.3797414302825928, "logits_per_char": -0.6898707151412964, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 125, "native_id": 125, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3089714050292969, "incorrect_loss_raw": 1.4259440104166667, "correct_loss_per_char": 0.6544857025146484, "incorrect_loss_per_char": 0.7129720052083334, "correct_loss_per_token": 1.3089714050292969, "incorrect_loss_per_token": 1.4259440104166667, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4956705570220947, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4956705570220947, "logits_per_char": -0.7478352785110474, "num_chars": 2}, {"sum_logits": -1.3089714050292969, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.3089714050292969, "logits_per_char": -0.6544857025146484, "num_chars": 2}, {"sum_logits": -1.4768303632736206, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": false, "logits_per_token": -1.4768303632736206, "logits_per_char": -0.7384151816368103, "num_chars": 2}, {"sum_logits": -1.3053311109542847, "num_tokens": 1, "num_tokens_all": 378, "is_greedy": true, "logits_per_token": -1.3053311109542847, "logits_per_char": -0.6526655554771423, "num_chars": 2}], "label": 1, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 126, "native_id": 126, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.401370882987976, "incorrect_loss_raw": 1.4029128551483154, "correct_loss_per_char": 0.700685441493988, "incorrect_loss_per_char": 0.7014564275741577, "correct_loss_per_token": 1.401370882987976, "incorrect_loss_per_token": 1.4029128551483154, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5930497646331787, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.5930497646331787, "logits_per_char": -0.7965248823165894, "num_chars": 2}, {"sum_logits": -1.393678903579712, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.393678903579712, "logits_per_char": -0.696839451789856, "num_chars": 2}, {"sum_logits": -1.401370882987976, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": false, "logits_per_token": -1.401370882987976, "logits_per_char": -0.700685441493988, "num_chars": 2}, {"sum_logits": -1.2220098972320557, "num_tokens": 1, "num_tokens_all": 333, "is_greedy": true, "logits_per_token": -1.2220098972320557, "logits_per_char": -0.6110049486160278, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 127, "native_id": 127, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5046932697296143, "incorrect_loss_raw": 1.3666666746139526, "correct_loss_per_char": 0.7523466348648071, "incorrect_loss_per_char": 0.6833333373069763, "correct_loss_per_token": 1.5046932697296143, "incorrect_loss_per_token": 1.3666666746139526, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.456223964691162, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.456223964691162, "logits_per_char": -0.728111982345581, "num_chars": 2}, {"sum_logits": -1.449913501739502, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.449913501739502, "logits_per_char": -0.724956750869751, "num_chars": 2}, {"sum_logits": -1.5046932697296143, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.5046932697296143, "logits_per_char": -0.7523466348648071, "num_chars": 2}, {"sum_logits": -1.1938625574111938, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.1938625574111938, "logits_per_char": -0.5969312787055969, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 128, "native_id": 128, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3825273513793945, "incorrect_loss_raw": 1.400119423866272, "correct_loss_per_char": 0.6912636756896973, "incorrect_loss_per_char": 0.700059711933136, "correct_loss_per_token": 1.3825273513793945, "incorrect_loss_per_token": 1.400119423866272, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3825273513793945, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.3825273513793945, "logits_per_char": -0.6912636756896973, "num_chars": 2}, {"sum_logits": -1.3326566219329834, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": true, "logits_per_token": -1.3326566219329834, "logits_per_char": -0.6663283109664917, "num_chars": 2}, {"sum_logits": -1.4675174951553345, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.4675174951553345, "logits_per_char": -0.7337587475776672, "num_chars": 2}, {"sum_logits": -1.400184154510498, "num_tokens": 1, "num_tokens_all": 328, "is_greedy": false, "logits_per_token": -1.400184154510498, "logits_per_char": -0.700092077255249, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 129, "native_id": 129, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2544605731964111, "incorrect_loss_raw": 1.44717538356781, "correct_loss_per_char": 0.6272302865982056, "incorrect_loss_per_char": 0.723587691783905, "correct_loss_per_token": 1.2544605731964111, "incorrect_loss_per_token": 1.44717538356781, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2544605731964111, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": true, "logits_per_token": -1.2544605731964111, "logits_per_char": -0.6272302865982056, "num_chars": 2}, {"sum_logits": -1.362672209739685, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.362672209739685, "logits_per_char": -0.6813361048698425, "num_chars": 2}, {"sum_logits": -1.5780363082885742, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.5780363082885742, "logits_per_char": -0.7890181541442871, "num_chars": 2}, {"sum_logits": -1.400817632675171, "num_tokens": 1, "num_tokens_all": 334, "is_greedy": false, "logits_per_token": -1.400817632675171, "logits_per_char": -0.7004088163375854, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 130, "native_id": 130, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3188788890838623, "incorrect_loss_raw": 1.4161967436472576, "correct_loss_per_char": 0.6594394445419312, "incorrect_loss_per_char": 0.7080983718236288, "correct_loss_per_token": 1.3188788890838623, "incorrect_loss_per_token": 1.4161967436472576, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3188788890838623, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.3188788890838623, "logits_per_char": -0.6594394445419312, "num_chars": 2}, {"sum_logits": -1.4364731311798096, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4364731311798096, "logits_per_char": -0.7182365655899048, "num_chars": 2}, {"sum_logits": -1.400139331817627, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.400139331817627, "logits_per_char": -0.7000696659088135, "num_chars": 2}, {"sum_logits": -1.411977767944336, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.411977767944336, "logits_per_char": -0.705988883972168, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 131, "native_id": 131, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.294899344444275, "incorrect_loss_raw": 1.4342877864837646, "correct_loss_per_char": 0.6474496722221375, "incorrect_loss_per_char": 0.7171438932418823, "correct_loss_per_token": 1.294899344444275, "incorrect_loss_per_token": 1.4342877864837646, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3976092338562012, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.3976092338562012, "logits_per_char": -0.6988046169281006, "num_chars": 2}, {"sum_logits": -1.5887165069580078, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.5887165069580078, "logits_per_char": -0.7943582534790039, "num_chars": 2}, {"sum_logits": -1.316537618637085, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": false, "logits_per_token": -1.316537618637085, "logits_per_char": -0.6582688093185425, "num_chars": 2}, {"sum_logits": -1.294899344444275, "num_tokens": 1, "num_tokens_all": 329, "is_greedy": true, "logits_per_token": -1.294899344444275, "logits_per_char": -0.6474496722221375, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 132, "native_id": 132, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5455130338668823, "incorrect_loss_raw": 1.3456427653630574, "correct_loss_per_char": 0.7727565169334412, "incorrect_loss_per_char": 0.6728213826815287, "correct_loss_per_token": 1.5455130338668823, "incorrect_loss_per_token": 1.3456427653630574, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5455130338668823, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.5455130338668823, "logits_per_char": -0.7727565169334412, "num_chars": 2}, {"sum_logits": -1.3751883506774902, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3751883506774902, "logits_per_char": -0.6875941753387451, "num_chars": 2}, {"sum_logits": -1.3621010780334473, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": false, "logits_per_token": -1.3621010780334473, "logits_per_char": -0.6810505390167236, "num_chars": 2}, {"sum_logits": -1.2996388673782349, "num_tokens": 1, "num_tokens_all": 362, "is_greedy": true, "logits_per_token": -1.2996388673782349, "logits_per_char": -0.6498194336891174, "num_chars": 2}], "label": 0, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 133, "native_id": 133, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.41090726852417, "incorrect_loss_raw": 1.3950018882751465, "correct_loss_per_char": 0.705453634262085, "incorrect_loss_per_char": 0.6975009441375732, "correct_loss_per_token": 1.41090726852417, "incorrect_loss_per_token": 1.3950018882751465, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4933844804763794, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.4933844804763794, "logits_per_char": -0.7466922402381897, "num_chars": 2}, {"sum_logits": -1.434645414352417, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.434645414352417, "logits_per_char": -0.7173227071762085, "num_chars": 2}, {"sum_logits": -1.41090726852417, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": false, "logits_per_token": -1.41090726852417, "logits_per_char": -0.705453634262085, "num_chars": 2}, {"sum_logits": -1.256975769996643, "num_tokens": 1, "num_tokens_all": 335, "is_greedy": true, "logits_per_token": -1.256975769996643, "logits_per_char": -0.6284878849983215, "num_chars": 2}], "label": 2, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 134, "native_id": 134, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.34188711643219, "incorrect_loss_raw": 1.4110456705093384, "correct_loss_per_char": 0.670943558216095, "incorrect_loss_per_char": 0.7055228352546692, "correct_loss_per_token": 1.34188711643219, "incorrect_loss_per_token": 1.4110456705093384, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4761172533035278, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.4761172533035278, "logits_per_char": -0.7380586266517639, "num_chars": 2}, {"sum_logits": -1.401594877243042, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.401594877243042, "logits_per_char": -0.700797438621521, "num_chars": 2}, {"sum_logits": -1.3554248809814453, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": false, "logits_per_token": -1.3554248809814453, "logits_per_char": -0.6777124404907227, "num_chars": 2}, {"sum_logits": -1.34188711643219, "num_tokens": 1, "num_tokens_all": 358, "is_greedy": true, "logits_per_token": -1.34188711643219, "logits_per_char": -0.670943558216095, "num_chars": 2}], "label": 3, "task_hash": "ba9ed92a6ef8f2c40aa5551bfc77b5e7", "model_hash": "03418cf8091a9882619950ffb07429a5"}