{"doc_id": 0, "native_id": 3187, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3903560638427734, "incorrect_loss_raw": 0.31406545639038086, "correct_loss_per_char": 0.34758901596069336, "incorrect_loss_per_char": 0.10468848546346028, "correct_loss_per_token": 1.3903560638427734, "incorrect_loss_per_token": 0.31406545639038086, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3903560638427734, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.3903560638427734, "logits_per_char": -0.34758901596069336, "num_chars": 4}, {"sum_logits": -0.31406545639038086, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.31406545639038086, "logits_per_char": -0.10468848546346028, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 1, "native_id": 1805, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.41683000326156616, "incorrect_loss_raw": 1.2768173217773438, "correct_loss_per_char": 0.13894333442052206, "incorrect_loss_per_char": 0.31920433044433594, "correct_loss_per_token": 0.41683000326156616, "incorrect_loss_per_token": 1.2768173217773438, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2768173217773438, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -1.2768173217773438, "logits_per_char": -0.31920433044433594, "num_chars": 4}, {"sum_logits": -0.41683000326156616, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.41683000326156616, "logits_per_char": -0.13894333442052206, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 2, "native_id": 478, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6210074424743652, "incorrect_loss_raw": 0.8464442491531372, "correct_loss_per_char": 0.20700248082478842, "incorrect_loss_per_char": 0.2116110622882843, "correct_loss_per_token": 0.6210074424743652, "incorrect_loss_per_token": 0.8464442491531372, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8464442491531372, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -0.8464442491531372, "logits_per_char": -0.2116110622882843, "num_chars": 4}, {"sum_logits": -0.6210074424743652, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -0.6210074424743652, "logits_per_char": -0.20700248082478842, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 3, "native_id": 30, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5962608456611633, "incorrect_loss_raw": 0.8930549621582031, "correct_loss_per_char": 0.14906521141529083, "incorrect_loss_per_char": 0.2976849873860677, "correct_loss_per_token": 0.5962608456611633, "incorrect_loss_per_token": 0.8930549621582031, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5962608456611633, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": true, "logits_per_token": -0.5962608456611633, "logits_per_char": -0.14906521141529083, "num_chars": 4}, {"sum_logits": -0.8930549621582031, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": false, "logits_per_token": -0.8930549621582031, "logits_per_char": -0.2976849873860677, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 4, "native_id": 371, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5843005776405334, "incorrect_loss_raw": 0.8642643094062805, "correct_loss_per_char": 0.14607514441013336, "incorrect_loss_per_char": 0.2880881031354268, "correct_loss_per_token": 0.5843005776405334, "incorrect_loss_per_token": 0.8642643094062805, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5843005776405334, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": true, "logits_per_token": -0.5843005776405334, "logits_per_char": -0.14607514441013336, "num_chars": 4}, {"sum_logits": -0.8642643094062805, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": false, "logits_per_token": -0.8642643094062805, "logits_per_char": -0.2880881031354268, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 5, "native_id": 2384, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8538198471069336, "incorrect_loss_raw": 0.5952135324478149, "correct_loss_per_char": 0.2134549617767334, "incorrect_loss_per_char": 0.19840451081593832, "correct_loss_per_token": 0.8538198471069336, "incorrect_loss_per_token": 0.5952135324478149, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8538198471069336, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": false, "logits_per_token": -0.8538198471069336, "logits_per_char": -0.2134549617767334, "num_chars": 4}, {"sum_logits": -0.5952135324478149, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": true, "logits_per_token": -0.5952135324478149, "logits_per_char": -0.19840451081593832, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 6, "native_id": 143, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6056828498840332, "incorrect_loss_raw": 0.8726833462715149, "correct_loss_per_char": 0.1514207124710083, "incorrect_loss_per_char": 0.29089444875717163, "correct_loss_per_token": 0.6056828498840332, "incorrect_loss_per_token": 0.8726833462715149, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6056828498840332, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.6056828498840332, "logits_per_char": -0.1514207124710083, "num_chars": 4}, {"sum_logits": -0.8726833462715149, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -0.8726833462715149, "logits_per_char": -0.29089444875717163, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 7, "native_id": 2750, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4657641649246216, "incorrect_loss_raw": 1.079838752746582, "correct_loss_per_char": 0.1164410412311554, "incorrect_loss_per_char": 0.35994625091552734, "correct_loss_per_token": 0.4657641649246216, "incorrect_loss_per_token": 1.079838752746582, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4657641649246216, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -0.4657641649246216, "logits_per_char": -0.1164410412311554, "num_chars": 4}, {"sum_logits": -1.079838752746582, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.079838752746582, "logits_per_char": -0.35994625091552734, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 8, "native_id": 2838, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6319189071655273, "incorrect_loss_raw": 0.8202139139175415, "correct_loss_per_char": 0.21063963572184244, "incorrect_loss_per_char": 0.20505347847938538, "correct_loss_per_token": 0.6319189071655273, "incorrect_loss_per_token": 0.8202139139175415, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8202139139175415, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": false, "logits_per_token": -0.8202139139175415, "logits_per_char": -0.20505347847938538, "num_chars": 4}, {"sum_logits": -0.6319189071655273, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": true, "logits_per_token": -0.6319189071655273, "logits_per_char": -0.21063963572184244, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 9, "native_id": 343, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0301830768585205, "incorrect_loss_raw": 0.5420588850975037, "correct_loss_per_char": 0.2575457692146301, "incorrect_loss_per_char": 0.18068629503250122, "correct_loss_per_token": 1.0301830768585205, "incorrect_loss_per_token": 0.5420588850975037, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0301830768585205, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.0301830768585205, "logits_per_char": -0.2575457692146301, "num_chars": 4}, {"sum_logits": -0.5420588850975037, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": true, "logits_per_token": -0.5420588850975037, "logits_per_char": -0.18068629503250122, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 10, "native_id": 403, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.18991385400295258, "incorrect_loss_raw": 1.8886353969573975, "correct_loss_per_char": 0.06330461800098419, "incorrect_loss_per_char": 0.47215884923934937, "correct_loss_per_token": 0.18991385400295258, "incorrect_loss_per_token": 1.8886353969573975, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.8886353969573975, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -1.8886353969573975, "logits_per_char": -0.47215884923934937, "num_chars": 4}, {"sum_logits": -0.18991385400295258, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.18991385400295258, "logits_per_char": -0.06330461800098419, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 11, "native_id": 3139, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8782207369804382, "incorrect_loss_raw": 0.5974756479263306, "correct_loss_per_char": 0.21955518424510956, "incorrect_loss_per_char": 0.19915854930877686, "correct_loss_per_token": 0.8782207369804382, "incorrect_loss_per_token": 0.5974756479263306, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8782207369804382, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -0.8782207369804382, "logits_per_char": -0.21955518424510956, "num_chars": 4}, {"sum_logits": -0.5974756479263306, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.5974756479263306, "logits_per_char": -0.19915854930877686, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 12, "native_id": 1452, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.480978399515152, "incorrect_loss_raw": 1.089640498161316, "correct_loss_per_char": 0.120244599878788, "incorrect_loss_per_char": 0.3632134993871053, "correct_loss_per_token": 0.480978399515152, "incorrect_loss_per_token": 1.089640498161316, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.480978399515152, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.480978399515152, "logits_per_char": -0.120244599878788, "num_chars": 4}, {"sum_logits": -1.089640498161316, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -1.089640498161316, "logits_per_char": -0.3632134993871053, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 13, "native_id": 969, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.745479166507721, "incorrect_loss_raw": 0.7507926225662231, "correct_loss_per_char": 0.18636979162693024, "incorrect_loss_per_char": 0.2502642075220744, "correct_loss_per_token": 0.745479166507721, "incorrect_loss_per_token": 0.7507926225662231, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.745479166507721, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": true, "logits_per_token": -0.745479166507721, "logits_per_char": -0.18636979162693024, "num_chars": 4}, {"sum_logits": -0.7507926225662231, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": false, "logits_per_token": -0.7507926225662231, "logits_per_char": -0.2502642075220744, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 14, "native_id": 71, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5403327941894531, "incorrect_loss_raw": 0.2786718010902405, "correct_loss_per_char": 0.3850831985473633, "incorrect_loss_per_char": 0.0928906003634135, "correct_loss_per_token": 1.5403327941894531, "incorrect_loss_per_token": 0.2786718010902405, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5403327941894531, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.5403327941894531, "logits_per_char": -0.3850831985473633, "num_chars": 4}, {"sum_logits": -0.2786718010902405, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -0.2786718010902405, "logits_per_char": -0.0928906003634135, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 15, "native_id": 126, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23592938482761383, "incorrect_loss_raw": 1.8486857414245605, "correct_loss_per_char": 0.07864312827587128, "incorrect_loss_per_char": 0.46217143535614014, "correct_loss_per_token": 0.23592938482761383, "incorrect_loss_per_token": 1.8486857414245605, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.8486857414245605, "num_tokens": 1, "num_tokens_all": 855, "is_greedy": false, "logits_per_token": -1.8486857414245605, "logits_per_char": -0.46217143535614014, "num_chars": 4}, {"sum_logits": -0.23592938482761383, "num_tokens": 1, "num_tokens_all": 855, "is_greedy": true, "logits_per_token": -0.23592938482761383, "logits_per_char": -0.07864312827587128, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 16, "native_id": 3230, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0812556743621826, "incorrect_loss_raw": 0.4459047317504883, "correct_loss_per_char": 0.27031391859054565, "incorrect_loss_per_char": 0.1486349105834961, "correct_loss_per_token": 1.0812556743621826, "incorrect_loss_per_token": 0.4459047317504883, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0812556743621826, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": false, "logits_per_token": -1.0812556743621826, "logits_per_char": -0.27031391859054565, "num_chars": 4}, {"sum_logits": -0.4459047317504883, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": true, "logits_per_token": -0.4459047317504883, "logits_per_char": -0.1486349105834961, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 17, "native_id": 64, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0542882680892944, "incorrect_loss_raw": 0.6504790782928467, "correct_loss_per_char": 0.2635720670223236, "incorrect_loss_per_char": 0.21682635943094888, "correct_loss_per_token": 1.0542882680892944, "incorrect_loss_per_token": 0.6504790782928467, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0542882680892944, "num_tokens": 1, "num_tokens_all": 856, "is_greedy": false, "logits_per_token": -1.0542882680892944, "logits_per_char": -0.2635720670223236, "num_chars": 4}, {"sum_logits": -0.6504790782928467, "num_tokens": 1, "num_tokens_all": 856, "is_greedy": true, "logits_per_token": -0.6504790782928467, "logits_per_char": -0.21682635943094888, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 18, "native_id": 1417, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5025708675384521, "incorrect_loss_raw": 1.0758998394012451, "correct_loss_per_char": 0.12564271688461304, "incorrect_loss_per_char": 0.35863327980041504, "correct_loss_per_token": 0.5025708675384521, "incorrect_loss_per_token": 1.0758998394012451, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5025708675384521, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": true, "logits_per_token": -0.5025708675384521, "logits_per_char": -0.12564271688461304, "num_chars": 4}, {"sum_logits": -1.0758998394012451, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": false, "logits_per_token": -1.0758998394012451, "logits_per_char": -0.35863327980041504, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 19, "native_id": 2655, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0470234155654907, "incorrect_loss_raw": 0.46422988176345825, "correct_loss_per_char": 0.2617558538913727, "incorrect_loss_per_char": 0.15474329392115274, "correct_loss_per_token": 1.0470234155654907, "incorrect_loss_per_token": 0.46422988176345825, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0470234155654907, "num_tokens": 1, "num_tokens_all": 857, "is_greedy": false, "logits_per_token": -1.0470234155654907, "logits_per_char": -0.2617558538913727, "num_chars": 4}, {"sum_logits": -0.46422988176345825, "num_tokens": 1, "num_tokens_all": 857, "is_greedy": true, "logits_per_token": -0.46422988176345825, "logits_per_char": -0.15474329392115274, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 20, "native_id": 2552, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9002416133880615, "incorrect_loss_raw": 0.577521800994873, "correct_loss_per_char": 0.22506040334701538, "incorrect_loss_per_char": 0.19250726699829102, "correct_loss_per_token": 0.9002416133880615, "incorrect_loss_per_token": 0.577521800994873, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9002416133880615, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -0.9002416133880615, "logits_per_char": -0.22506040334701538, "num_chars": 4}, {"sum_logits": -0.577521800994873, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.577521800994873, "logits_per_char": -0.19250726699829102, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 21, "native_id": 1983, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2508273124694824, "incorrect_loss_raw": 0.4227178990840912, "correct_loss_per_char": 0.3127068281173706, "incorrect_loss_per_char": 0.14090596636136374, "correct_loss_per_token": 1.2508273124694824, "incorrect_loss_per_token": 0.4227178990840912, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2508273124694824, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.2508273124694824, "logits_per_char": -0.3127068281173706, "num_chars": 4}, {"sum_logits": -0.4227178990840912, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.4227178990840912, "logits_per_char": -0.14090596636136374, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 22, "native_id": 2522, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5365921854972839, "incorrect_loss_raw": 0.9391891956329346, "correct_loss_per_char": 0.13414804637432098, "incorrect_loss_per_char": 0.3130630652109782, "correct_loss_per_token": 0.5365921854972839, "incorrect_loss_per_token": 0.9391891956329346, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5365921854972839, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": true, "logits_per_token": -0.5365921854972839, "logits_per_char": -0.13414804637432098, "num_chars": 4}, {"sum_logits": -0.9391891956329346, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": false, "logits_per_token": -0.9391891956329346, "logits_per_char": -0.3130630652109782, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 23, "native_id": 1898, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4667080640792847, "incorrect_loss_raw": 0.35517022013664246, "correct_loss_per_char": 0.36667701601982117, "incorrect_loss_per_char": 0.11839007337888081, "correct_loss_per_token": 1.4667080640792847, "incorrect_loss_per_token": 0.35517022013664246, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4667080640792847, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.4667080640792847, "logits_per_char": -0.36667701601982117, "num_chars": 4}, {"sum_logits": -0.35517022013664246, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.35517022013664246, "logits_per_char": -0.11839007337888081, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 24, "native_id": 608, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5872540473937988, "incorrect_loss_raw": 0.25300538539886475, "correct_loss_per_char": 0.3968135118484497, "incorrect_loss_per_char": 0.08433512846628825, "correct_loss_per_token": 1.5872540473937988, "incorrect_loss_per_token": 0.25300538539886475, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5872540473937988, "num_tokens": 1, "num_tokens_all": 1015, "is_greedy": false, "logits_per_token": -1.5872540473937988, "logits_per_char": -0.3968135118484497, "num_chars": 4}, {"sum_logits": -0.25300538539886475, "num_tokens": 1, "num_tokens_all": 1015, "is_greedy": true, "logits_per_token": -0.25300538539886475, "logits_per_char": -0.08433512846628825, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 25, "native_id": 373, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4907849431037903, "incorrect_loss_raw": 1.0425690412521362, "correct_loss_per_char": 0.16359498103459677, "incorrect_loss_per_char": 0.26064226031303406, "correct_loss_per_token": 0.4907849431037903, "incorrect_loss_per_token": 1.0425690412521362, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0425690412521362, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": false, "logits_per_token": -1.0425690412521362, "logits_per_char": -0.26064226031303406, "num_chars": 4}, {"sum_logits": -0.4907849431037903, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": true, "logits_per_token": -0.4907849431037903, "logits_per_char": -0.16359498103459677, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 26, "native_id": 749, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0965994596481323, "incorrect_loss_raw": 0.4753938913345337, "correct_loss_per_char": 0.2741498649120331, "incorrect_loss_per_char": 0.15846463044484457, "correct_loss_per_token": 1.0965994596481323, "incorrect_loss_per_token": 0.4753938913345337, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0965994596481323, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": false, "logits_per_token": -1.0965994596481323, "logits_per_char": -0.2741498649120331, "num_chars": 4}, {"sum_logits": -0.4753938913345337, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": true, "logits_per_token": -0.4753938913345337, "logits_per_char": -0.15846463044484457, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 27, "native_id": 2922, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.126929521560669, "incorrect_loss_raw": 0.44310706853866577, "correct_loss_per_char": 0.28173238039016724, "incorrect_loss_per_char": 0.14770235617955527, "correct_loss_per_token": 1.126929521560669, "incorrect_loss_per_token": 0.44310706853866577, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.126929521560669, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.126929521560669, "logits_per_char": -0.28173238039016724, "num_chars": 4}, {"sum_logits": -0.44310706853866577, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.44310706853866577, "logits_per_char": -0.14770235617955527, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 28, "native_id": 468, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4046380817890167, "incorrect_loss_raw": 1.1680094003677368, "correct_loss_per_char": 0.10115952044725418, "incorrect_loss_per_char": 0.3893364667892456, "correct_loss_per_token": 0.4046380817890167, "incorrect_loss_per_token": 1.1680094003677368, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4046380817890167, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": true, "logits_per_token": -0.4046380817890167, "logits_per_char": -0.10115952044725418, "num_chars": 4}, {"sum_logits": -1.1680094003677368, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": false, "logits_per_token": -1.1680094003677368, "logits_per_char": -0.3893364667892456, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 29, "native_id": 59, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1717554330825806, "incorrect_loss_raw": 0.4023854732513428, "correct_loss_per_char": 0.29293885827064514, "incorrect_loss_per_char": 0.13412849108378092, "correct_loss_per_token": 1.1717554330825806, "incorrect_loss_per_token": 0.4023854732513428, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1717554330825806, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -1.1717554330825806, "logits_per_char": -0.29293885827064514, "num_chars": 4}, {"sum_logits": -0.4023854732513428, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": true, "logits_per_token": -0.4023854732513428, "logits_per_char": -0.13412849108378092, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 30, "native_id": 2060, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8807329535484314, "incorrect_loss_raw": 0.6806193590164185, "correct_loss_per_char": 0.2935776511828105, "incorrect_loss_per_char": 0.17015483975410461, "correct_loss_per_token": 0.8807329535484314, "incorrect_loss_per_token": 0.6806193590164185, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6806193590164185, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": true, "logits_per_token": -0.6806193590164185, "logits_per_char": -0.17015483975410461, "num_chars": 4}, {"sum_logits": -0.8807329535484314, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": false, "logits_per_token": -0.8807329535484314, "logits_per_char": -0.2935776511828105, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 31, "native_id": 1993, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6087661385536194, "incorrect_loss_raw": 0.9303579926490784, "correct_loss_per_char": 0.15219153463840485, "incorrect_loss_per_char": 0.3101193308830261, "correct_loss_per_token": 0.6087661385536194, "incorrect_loss_per_token": 0.9303579926490784, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6087661385536194, "num_tokens": 1, "num_tokens_all": 851, "is_greedy": true, "logits_per_token": -0.6087661385536194, "logits_per_char": -0.15219153463840485, "num_chars": 4}, {"sum_logits": -0.9303579926490784, "num_tokens": 1, "num_tokens_all": 851, "is_greedy": false, "logits_per_token": -0.9303579926490784, "logits_per_char": -0.3101193308830261, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 32, "native_id": 1023, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.439536452293396, "incorrect_loss_raw": 1.0783321857452393, "correct_loss_per_char": 0.109884113073349, "incorrect_loss_per_char": 0.35944406191507977, "correct_loss_per_token": 0.439536452293396, "incorrect_loss_per_token": 1.0783321857452393, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.439536452293396, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.439536452293396, "logits_per_char": -0.109884113073349, "num_chars": 4}, {"sum_logits": -1.0783321857452393, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -1.0783321857452393, "logits_per_char": -0.35944406191507977, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 33, "native_id": 264, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.153549075126648, "incorrect_loss_raw": 0.40954890847206116, "correct_loss_per_char": 0.288387268781662, "incorrect_loss_per_char": 0.13651630282402039, "correct_loss_per_token": 1.153549075126648, "incorrect_loss_per_token": 0.40954890847206116, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.153549075126648, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -1.153549075126648, "logits_per_char": -0.288387268781662, "num_chars": 4}, {"sum_logits": -0.40954890847206116, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": true, "logits_per_token": -0.40954890847206116, "logits_per_char": -0.13651630282402039, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 34, "native_id": 2733, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6124899983406067, "incorrect_loss_raw": 0.8818219900131226, "correct_loss_per_char": 0.20416333278020224, "incorrect_loss_per_char": 0.22045549750328064, "correct_loss_per_token": 0.6124899983406067, "incorrect_loss_per_token": 0.8818219900131226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8818219900131226, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": false, "logits_per_token": -0.8818219900131226, "logits_per_char": -0.22045549750328064, "num_chars": 4}, {"sum_logits": -0.6124899983406067, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": true, "logits_per_token": -0.6124899983406067, "logits_per_char": -0.20416333278020224, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 35, "native_id": 2216, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8455460667610168, "incorrect_loss_raw": 0.6271982789039612, "correct_loss_per_char": 0.28184868892033893, "incorrect_loss_per_char": 0.1567995697259903, "correct_loss_per_token": 0.8455460667610168, "incorrect_loss_per_token": 0.6271982789039612, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6271982789039612, "num_tokens": 1, "num_tokens_all": 1131, "is_greedy": true, "logits_per_token": -0.6271982789039612, "logits_per_char": -0.1567995697259903, "num_chars": 4}, {"sum_logits": -0.8455460667610168, "num_tokens": 1, "num_tokens_all": 1131, "is_greedy": false, "logits_per_token": -0.8455460667610168, "logits_per_char": -0.28184868892033893, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 36, "native_id": 1908, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5318212509155273, "incorrect_loss_raw": 0.9523489475250244, "correct_loss_per_char": 0.13295531272888184, "incorrect_loss_per_char": 0.3174496491750081, "correct_loss_per_token": 0.5318212509155273, "incorrect_loss_per_token": 0.9523489475250244, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5318212509155273, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": true, "logits_per_token": -0.5318212509155273, "logits_per_char": -0.13295531272888184, "num_chars": 4}, {"sum_logits": -0.9523489475250244, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": false, "logits_per_token": -0.9523489475250244, "logits_per_char": -0.3174496491750081, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 37, "native_id": 280, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8570078015327454, "incorrect_loss_raw": 0.6149948239326477, "correct_loss_per_char": 0.21425195038318634, "incorrect_loss_per_char": 0.2049982746442159, "correct_loss_per_token": 0.8570078015327454, "incorrect_loss_per_token": 0.6149948239326477, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8570078015327454, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -0.8570078015327454, "logits_per_char": -0.21425195038318634, "num_chars": 4}, {"sum_logits": -0.6149948239326477, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -0.6149948239326477, "logits_per_char": -0.2049982746442159, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 38, "native_id": 2463, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2515249252319336, "incorrect_loss_raw": 0.3587450385093689, "correct_loss_per_char": 0.3128812313079834, "incorrect_loss_per_char": 0.11958167950312297, "correct_loss_per_token": 1.2515249252319336, "incorrect_loss_per_token": 0.3587450385093689, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2515249252319336, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": false, "logits_per_token": -1.2515249252319336, "logits_per_char": -0.3128812313079834, "num_chars": 4}, {"sum_logits": -0.3587450385093689, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": true, "logits_per_token": -0.3587450385093689, "logits_per_char": -0.11958167950312297, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 39, "native_id": 2765, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7109465599060059, "incorrect_loss_raw": 0.2298761010169983, "correct_loss_per_char": 0.42773663997650146, "incorrect_loss_per_char": 0.07662536700566609, "correct_loss_per_token": 1.7109465599060059, "incorrect_loss_per_token": 0.2298761010169983, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7109465599060059, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.7109465599060059, "logits_per_char": -0.42773663997650146, "num_chars": 4}, {"sum_logits": -0.2298761010169983, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.2298761010169983, "logits_per_char": -0.07662536700566609, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 40, "native_id": 364, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7162277102470398, "incorrect_loss_raw": 0.757581353187561, "correct_loss_per_char": 0.17905692756175995, "incorrect_loss_per_char": 0.252527117729187, "correct_loss_per_token": 0.7162277102470398, "incorrect_loss_per_token": 0.757581353187561, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7162277102470398, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": true, "logits_per_token": -0.7162277102470398, "logits_per_char": -0.17905692756175995, "num_chars": 4}, {"sum_logits": -0.757581353187561, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": false, "logits_per_token": -0.757581353187561, "logits_per_char": -0.252527117729187, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 41, "native_id": 2109, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9188125729560852, "incorrect_loss_raw": 0.5851571559906006, "correct_loss_per_char": 0.2297031432390213, "incorrect_loss_per_char": 0.1950523853302002, "correct_loss_per_token": 0.9188125729560852, "incorrect_loss_per_token": 0.5851571559906006, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9188125729560852, "num_tokens": 1, "num_tokens_all": 994, "is_greedy": false, "logits_per_token": -0.9188125729560852, "logits_per_char": -0.2297031432390213, "num_chars": 4}, {"sum_logits": -0.5851571559906006, "num_tokens": 1, "num_tokens_all": 994, "is_greedy": true, "logits_per_token": -0.5851571559906006, "logits_per_char": -0.1950523853302002, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 42, "native_id": 2371, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6559273600578308, "incorrect_loss_raw": 0.8234481811523438, "correct_loss_per_char": 0.1639818400144577, "incorrect_loss_per_char": 0.27448272705078125, "correct_loss_per_token": 0.6559273600578308, "incorrect_loss_per_token": 0.8234481811523438, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6559273600578308, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": true, "logits_per_token": -0.6559273600578308, "logits_per_char": -0.1639818400144577, "num_chars": 4}, {"sum_logits": -0.8234481811523438, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -0.8234481811523438, "logits_per_char": -0.27448272705078125, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 43, "native_id": 188, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6246724128723145, "incorrect_loss_raw": 0.2798272371292114, "correct_loss_per_char": 0.4061681032180786, "incorrect_loss_per_char": 0.09327574570973714, "correct_loss_per_token": 1.6246724128723145, "incorrect_loss_per_token": 0.2798272371292114, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6246724128723145, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": false, "logits_per_token": -1.6246724128723145, "logits_per_char": -0.4061681032180786, "num_chars": 4}, {"sum_logits": -0.2798272371292114, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": true, "logits_per_token": -0.2798272371292114, "logits_per_char": -0.09327574570973714, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 44, "native_id": 1104, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3422391712665558, "incorrect_loss_raw": 1.33476984500885, "correct_loss_per_char": 0.1140797237555186, "incorrect_loss_per_char": 0.3336924612522125, "correct_loss_per_token": 0.3422391712665558, "incorrect_loss_per_token": 1.33476984500885, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.33476984500885, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.33476984500885, "logits_per_char": -0.3336924612522125, "num_chars": 4}, {"sum_logits": -0.3422391712665558, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -0.3422391712665558, "logits_per_char": -0.1140797237555186, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 45, "native_id": 2279, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7079389095306396, "incorrect_loss_raw": 0.7402549982070923, "correct_loss_per_char": 0.1769847273826599, "incorrect_loss_per_char": 0.24675166606903076, "correct_loss_per_token": 0.7079389095306396, "incorrect_loss_per_token": 0.7402549982070923, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7079389095306396, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": true, "logits_per_token": -0.7079389095306396, "logits_per_char": -0.1769847273826599, "num_chars": 4}, {"sum_logits": -0.7402549982070923, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": false, "logits_per_token": -0.7402549982070923, "logits_per_char": -0.24675166606903076, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 46, "native_id": 258, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6865130662918091, "incorrect_loss_raw": 0.7870759963989258, "correct_loss_per_char": 0.22883768876393637, "incorrect_loss_per_char": 0.19676899909973145, "correct_loss_per_token": 0.6865130662918091, "incorrect_loss_per_token": 0.7870759963989258, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7870759963989258, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -0.7870759963989258, "logits_per_char": -0.19676899909973145, "num_chars": 4}, {"sum_logits": -0.6865130662918091, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.6865130662918091, "logits_per_char": -0.22883768876393637, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 47, "native_id": 2640, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5195348262786865, "incorrect_loss_raw": 1.0179246664047241, "correct_loss_per_char": 0.17317827542622885, "incorrect_loss_per_char": 0.25448116660118103, "correct_loss_per_token": 0.5195348262786865, "incorrect_loss_per_token": 1.0179246664047241, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0179246664047241, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": false, "logits_per_token": -1.0179246664047241, "logits_per_char": -0.25448116660118103, "num_chars": 4}, {"sum_logits": -0.5195348262786865, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": true, "logits_per_token": -0.5195348262786865, "logits_per_char": -0.17317827542622885, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 48, "native_id": 1238, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9722627997398376, "incorrect_loss_raw": 0.5142123103141785, "correct_loss_per_char": 0.2430656999349594, "incorrect_loss_per_char": 0.17140410343805948, "correct_loss_per_token": 0.9722627997398376, "incorrect_loss_per_token": 0.5142123103141785, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9722627997398376, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": false, "logits_per_token": -0.9722627997398376, "logits_per_char": -0.2430656999349594, "num_chars": 4}, {"sum_logits": -0.5142123103141785, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": true, "logits_per_token": -0.5142123103141785, "logits_per_char": -0.17140410343805948, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 49, "native_id": 1970, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6574324369430542, "incorrect_loss_raw": 0.7764395475387573, "correct_loss_per_char": 0.21914414564768472, "incorrect_loss_per_char": 0.19410988688468933, "correct_loss_per_token": 0.6574324369430542, "incorrect_loss_per_token": 0.7764395475387573, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7764395475387573, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -0.7764395475387573, "logits_per_char": -0.19410988688468933, "num_chars": 4}, {"sum_logits": -0.6574324369430542, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -0.6574324369430542, "logits_per_char": -0.21914414564768472, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 50, "native_id": 1455, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25019505620002747, "incorrect_loss_raw": 1.6393795013427734, "correct_loss_per_char": 0.08339835206667583, "incorrect_loss_per_char": 0.40984487533569336, "correct_loss_per_token": 0.25019505620002747, "incorrect_loss_per_token": 1.6393795013427734, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6393795013427734, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": false, "logits_per_token": -1.6393795013427734, "logits_per_char": -0.40984487533569336, "num_chars": 4}, {"sum_logits": -0.25019505620002747, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": true, "logits_per_token": -0.25019505620002747, "logits_per_char": -0.08339835206667583, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 51, "native_id": 1091, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6502825021743774, "incorrect_loss_raw": 0.7917231321334839, "correct_loss_per_char": 0.21676083405812582, "incorrect_loss_per_char": 0.19793078303337097, "correct_loss_per_token": 0.6502825021743774, "incorrect_loss_per_token": 0.7917231321334839, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7917231321334839, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -0.7917231321334839, "logits_per_char": -0.19793078303337097, "num_chars": 4}, {"sum_logits": -0.6502825021743774, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": true, "logits_per_token": -0.6502825021743774, "logits_per_char": -0.21676083405812582, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 52, "native_id": 1020, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3662520945072174, "incorrect_loss_raw": 1.356650948524475, "correct_loss_per_char": 0.12208403150240581, "incorrect_loss_per_char": 0.3391627371311188, "correct_loss_per_token": 0.3662520945072174, "incorrect_loss_per_token": 1.356650948524475, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.356650948524475, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": false, "logits_per_token": -1.356650948524475, "logits_per_char": -0.3391627371311188, "num_chars": 4}, {"sum_logits": -0.3662520945072174, "num_tokens": 1, "num_tokens_all": 1073, "is_greedy": true, "logits_per_token": -0.3662520945072174, "logits_per_char": -0.12208403150240581, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 53, "native_id": 2684, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.44113805890083313, "incorrect_loss_raw": 1.1142460107803345, "correct_loss_per_char": 0.14704601963361105, "incorrect_loss_per_char": 0.2785615026950836, "correct_loss_per_token": 0.44113805890083313, "incorrect_loss_per_token": 1.1142460107803345, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1142460107803345, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.1142460107803345, "logits_per_char": -0.2785615026950836, "num_chars": 4}, {"sum_logits": -0.44113805890083313, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -0.44113805890083313, "logits_per_char": -0.14704601963361105, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 54, "native_id": 819, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1798458099365234, "incorrect_loss_raw": 0.16452082991600037, "correct_loss_per_char": 0.5449614524841309, "incorrect_loss_per_char": 0.054840276638666786, "correct_loss_per_token": 2.1798458099365234, "incorrect_loss_per_token": 0.16452082991600037, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -2.1798458099365234, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -2.1798458099365234, "logits_per_char": -0.5449614524841309, "num_chars": 4}, {"sum_logits": -0.16452082991600037, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": true, "logits_per_token": -0.16452082991600037, "logits_per_char": -0.054840276638666786, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 55, "native_id": 1857, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6074432134628296, "incorrect_loss_raw": 0.8935602903366089, "correct_loss_per_char": 0.1518608033657074, "incorrect_loss_per_char": 0.29785343011220294, "correct_loss_per_token": 0.6074432134628296, "incorrect_loss_per_token": 0.8935602903366089, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6074432134628296, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.6074432134628296, "logits_per_char": -0.1518608033657074, "num_chars": 4}, {"sum_logits": -0.8935602903366089, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -0.8935602903366089, "logits_per_char": -0.29785343011220294, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 56, "native_id": 2171, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5673055648803711, "incorrect_loss_raw": 0.9281737804412842, "correct_loss_per_char": 0.1891018549601237, "incorrect_loss_per_char": 0.23204344511032104, "correct_loss_per_token": 0.5673055648803711, "incorrect_loss_per_token": 0.9281737804412842, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9281737804412842, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": false, "logits_per_token": -0.9281737804412842, "logits_per_char": -0.23204344511032104, "num_chars": 4}, {"sum_logits": -0.5673055648803711, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": true, "logits_per_token": -0.5673055648803711, "logits_per_char": -0.1891018549601237, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 57, "native_id": 2725, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0918667316436768, "incorrect_loss_raw": 0.50432288646698, "correct_loss_per_char": 0.2729666829109192, "incorrect_loss_per_char": 0.16810762882232666, "correct_loss_per_token": 1.0918667316436768, "incorrect_loss_per_token": 0.50432288646698, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0918667316436768, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.0918667316436768, "logits_per_char": -0.2729666829109192, "num_chars": 4}, {"sum_logits": -0.50432288646698, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": true, "logits_per_token": -0.50432288646698, "logits_per_char": -0.16810762882232666, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 58, "native_id": 79, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.36555707454681396, "incorrect_loss_raw": 1.2782891988754272, "correct_loss_per_char": 0.12185235818227132, "incorrect_loss_per_char": 0.3195722997188568, "correct_loss_per_token": 0.36555707454681396, "incorrect_loss_per_token": 1.2782891988754272, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2782891988754272, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -1.2782891988754272, "logits_per_char": -0.3195722997188568, "num_chars": 4}, {"sum_logits": -0.36555707454681396, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.36555707454681396, "logits_per_char": -0.12185235818227132, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 59, "native_id": 2081, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6886530518531799, "incorrect_loss_raw": 0.7657096982002258, "correct_loss_per_char": 0.17216326296329498, "incorrect_loss_per_char": 0.25523656606674194, "correct_loss_per_token": 0.6886530518531799, "incorrect_loss_per_token": 0.7657096982002258, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6886530518531799, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": true, "logits_per_token": -0.6886530518531799, "logits_per_char": -0.17216326296329498, "num_chars": 4}, {"sum_logits": -0.7657096982002258, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": false, "logits_per_token": -0.7657096982002258, "logits_per_char": -0.25523656606674194, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 60, "native_id": 289, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.650038480758667, "incorrect_loss_raw": 0.9057948589324951, "correct_loss_per_char": 0.21667949358622232, "incorrect_loss_per_char": 0.22644871473312378, "correct_loss_per_token": 0.650038480758667, "incorrect_loss_per_token": 0.9057948589324951, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9057948589324951, "num_tokens": 1, "num_tokens_all": 1111, "is_greedy": false, "logits_per_token": -0.9057948589324951, "logits_per_char": -0.22644871473312378, "num_chars": 4}, {"sum_logits": -0.650038480758667, "num_tokens": 1, "num_tokens_all": 1111, "is_greedy": true, "logits_per_token": -0.650038480758667, "logits_per_char": -0.21667949358622232, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 61, "native_id": 23, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5791119933128357, "incorrect_loss_raw": 0.9194968342781067, "correct_loss_per_char": 0.14477799832820892, "incorrect_loss_per_char": 0.3064989447593689, "correct_loss_per_token": 0.5791119933128357, "incorrect_loss_per_token": 0.9194968342781067, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5791119933128357, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": true, "logits_per_token": -0.5791119933128357, "logits_per_char": -0.14477799832820892, "num_chars": 4}, {"sum_logits": -0.9194968342781067, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": false, "logits_per_token": -0.9194968342781067, "logits_per_char": -0.3064989447593689, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 62, "native_id": 1366, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5526452660560608, "incorrect_loss_raw": 0.9169782400131226, "correct_loss_per_char": 0.1842150886853536, "incorrect_loss_per_char": 0.22924456000328064, "correct_loss_per_token": 0.5526452660560608, "incorrect_loss_per_token": 0.9169782400131226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9169782400131226, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": false, "logits_per_token": -0.9169782400131226, "logits_per_char": -0.22924456000328064, "num_chars": 4}, {"sum_logits": -0.5526452660560608, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": true, "logits_per_token": -0.5526452660560608, "logits_per_char": -0.1842150886853536, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 63, "native_id": 588, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7629968523979187, "incorrect_loss_raw": 0.6774106025695801, "correct_loss_per_char": 0.2543322841326396, "incorrect_loss_per_char": 0.16935265064239502, "correct_loss_per_token": 0.7629968523979187, "incorrect_loss_per_token": 0.6774106025695801, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6774106025695801, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": true, "logits_per_token": -0.6774106025695801, "logits_per_char": -0.16935265064239502, "num_chars": 4}, {"sum_logits": -0.7629968523979187, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": false, "logits_per_token": -0.7629968523979187, "logits_per_char": -0.2543322841326396, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 64, "native_id": 2908, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.588043749332428, "incorrect_loss_raw": 0.8844346404075623, "correct_loss_per_char": 0.19601458311080933, "incorrect_loss_per_char": 0.22110866010189056, "correct_loss_per_token": 0.588043749332428, "incorrect_loss_per_token": 0.8844346404075623, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8844346404075623, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -0.8844346404075623, "logits_per_char": -0.22110866010189056, "num_chars": 4}, {"sum_logits": -0.588043749332428, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -0.588043749332428, "logits_per_char": -0.19601458311080933, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 65, "native_id": 1936, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8610772490501404, "incorrect_loss_raw": 0.6595261096954346, "correct_loss_per_char": 0.2152693122625351, "incorrect_loss_per_char": 0.21984203656514487, "correct_loss_per_token": 0.8610772490501404, "incorrect_loss_per_token": 0.6595261096954346, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8610772490501404, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": false, "logits_per_token": -0.8610772490501404, "logits_per_char": -0.2152693122625351, "num_chars": 4}, {"sum_logits": -0.6595261096954346, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": true, "logits_per_token": -0.6595261096954346, "logits_per_char": -0.21984203656514487, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 66, "native_id": 2692, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6872488856315613, "incorrect_loss_raw": 0.7936050891876221, "correct_loss_per_char": 0.17181222140789032, "incorrect_loss_per_char": 0.26453502972920734, "correct_loss_per_token": 0.6872488856315613, "incorrect_loss_per_token": 0.7936050891876221, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6872488856315613, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": true, "logits_per_token": -0.6872488856315613, "logits_per_char": -0.17181222140789032, "num_chars": 4}, {"sum_logits": -0.7936050891876221, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": false, "logits_per_token": -0.7936050891876221, "logits_per_char": -0.26453502972920734, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 67, "native_id": 1545, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8829950094223022, "incorrect_loss_raw": 0.5992491245269775, "correct_loss_per_char": 0.22074875235557556, "incorrect_loss_per_char": 0.19974970817565918, "correct_loss_per_token": 0.8829950094223022, "incorrect_loss_per_token": 0.5992491245269775, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8829950094223022, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": false, "logits_per_token": -0.8829950094223022, "logits_per_char": -0.22074875235557556, "num_chars": 4}, {"sum_logits": -0.5992491245269775, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": true, "logits_per_token": -0.5992491245269775, "logits_per_char": -0.19974970817565918, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 68, "native_id": 684, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5088107585906982, "incorrect_loss_raw": 0.9712965488433838, "correct_loss_per_char": 0.16960358619689941, "incorrect_loss_per_char": 0.24282413721084595, "correct_loss_per_token": 0.5088107585906982, "incorrect_loss_per_token": 0.9712965488433838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9712965488433838, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": false, "logits_per_token": -0.9712965488433838, "logits_per_char": -0.24282413721084595, "num_chars": 4}, {"sum_logits": -0.5088107585906982, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": true, "logits_per_token": -0.5088107585906982, "logits_per_char": -0.16960358619689941, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 69, "native_id": 221, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4876447021961212, "incorrect_loss_raw": 1.0672813653945923, "correct_loss_per_char": 0.16254823406537375, "incorrect_loss_per_char": 0.26682034134864807, "correct_loss_per_token": 0.4876447021961212, "incorrect_loss_per_token": 1.0672813653945923, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0672813653945923, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": false, "logits_per_token": -1.0672813653945923, "logits_per_char": -0.26682034134864807, "num_chars": 4}, {"sum_logits": -0.4876447021961212, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": true, "logits_per_token": -0.4876447021961212, "logits_per_char": -0.16254823406537375, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 70, "native_id": 312, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2605834901332855, "incorrect_loss_raw": 1.6022765636444092, "correct_loss_per_char": 0.08686116337776184, "incorrect_loss_per_char": 0.4005691409111023, "correct_loss_per_token": 0.2605834901332855, "incorrect_loss_per_token": 1.6022765636444092, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6022765636444092, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": false, "logits_per_token": -1.6022765636444092, "logits_per_char": -0.4005691409111023, "num_chars": 4}, {"sum_logits": -0.2605834901332855, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": true, "logits_per_token": -0.2605834901332855, "logits_per_char": -0.08686116337776184, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 71, "native_id": 2406, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.230811595916748, "incorrect_loss_raw": 0.3912098705768585, "correct_loss_per_char": 0.307702898979187, "incorrect_loss_per_char": 0.13040329019228616, "correct_loss_per_token": 1.230811595916748, "incorrect_loss_per_token": 0.3912098705768585, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.230811595916748, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.230811595916748, "logits_per_char": -0.307702898979187, "num_chars": 4}, {"sum_logits": -0.3912098705768585, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.3912098705768585, "logits_per_char": -0.13040329019228616, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 72, "native_id": 2033, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4936060309410095, "incorrect_loss_raw": 0.9864814281463623, "correct_loss_per_char": 0.12340150773525238, "incorrect_loss_per_char": 0.3288271427154541, "correct_loss_per_token": 0.4936060309410095, "incorrect_loss_per_token": 0.9864814281463623, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4936060309410095, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": true, "logits_per_token": -0.4936060309410095, "logits_per_char": -0.12340150773525238, "num_chars": 4}, {"sum_logits": -0.9864814281463623, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": false, "logits_per_token": -0.9864814281463623, "logits_per_char": -0.3288271427154541, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 73, "native_id": 671, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.35765939950942993, "incorrect_loss_raw": 1.3059675693511963, "correct_loss_per_char": 0.08941484987735748, "incorrect_loss_per_char": 0.43532252311706543, "correct_loss_per_token": 0.35765939950942993, "incorrect_loss_per_token": 1.3059675693511963, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.35765939950942993, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": true, "logits_per_token": -0.35765939950942993, "logits_per_char": -0.08941484987735748, "num_chars": 4}, {"sum_logits": -1.3059675693511963, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": false, "logits_per_token": -1.3059675693511963, "logits_per_char": -0.43532252311706543, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 74, "native_id": 308, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32576432824134827, "incorrect_loss_raw": 1.3450874090194702, "correct_loss_per_char": 0.10858810941378276, "incorrect_loss_per_char": 0.33627185225486755, "correct_loss_per_token": 0.32576432824134827, "incorrect_loss_per_token": 1.3450874090194702, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3450874090194702, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.3450874090194702, "logits_per_char": -0.33627185225486755, "num_chars": 4}, {"sum_logits": -0.32576432824134827, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.32576432824134827, "logits_per_char": -0.10858810941378276, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 75, "native_id": 2282, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3749453127384186, "incorrect_loss_raw": 1.2578116655349731, "correct_loss_per_char": 0.1249817709128062, "incorrect_loss_per_char": 0.3144529163837433, "correct_loss_per_token": 0.3749453127384186, "incorrect_loss_per_token": 1.2578116655349731, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2578116655349731, "num_tokens": 1, "num_tokens_all": 857, "is_greedy": false, "logits_per_token": -1.2578116655349731, "logits_per_char": -0.3144529163837433, "num_chars": 4}, {"sum_logits": -0.3749453127384186, "num_tokens": 1, "num_tokens_all": 857, "is_greedy": true, "logits_per_token": -0.3749453127384186, "logits_per_char": -0.1249817709128062, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 76, "native_id": 881, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8415786027908325, "incorrect_loss_raw": 0.6814905405044556, "correct_loss_per_char": 0.21039465069770813, "incorrect_loss_per_char": 0.2271635135014852, "correct_loss_per_token": 0.8415786027908325, "incorrect_loss_per_token": 0.6814905405044556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8415786027908325, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": false, "logits_per_token": -0.8415786027908325, "logits_per_char": -0.21039465069770813, "num_chars": 4}, {"sum_logits": -0.6814905405044556, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": true, "logits_per_token": -0.6814905405044556, "logits_per_char": -0.2271635135014852, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 77, "native_id": 590, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3098881244659424, "incorrect_loss_raw": 0.3401300013065338, "correct_loss_per_char": 0.3274720311164856, "incorrect_loss_per_char": 0.11337666710217793, "correct_loss_per_token": 1.3098881244659424, "incorrect_loss_per_token": 0.3401300013065338, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3098881244659424, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.3098881244659424, "logits_per_char": -0.3274720311164856, "num_chars": 4}, {"sum_logits": -0.3401300013065338, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.3401300013065338, "logits_per_char": -0.11337666710217793, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 78, "native_id": 111, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.48276740312576294, "incorrect_loss_raw": 1.1107892990112305, "correct_loss_per_char": 0.12069185078144073, "incorrect_loss_per_char": 0.37026309967041016, "correct_loss_per_token": 0.48276740312576294, "incorrect_loss_per_token": 1.1107892990112305, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.48276740312576294, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -0.48276740312576294, "logits_per_char": -0.12069185078144073, "num_chars": 4}, {"sum_logits": -1.1107892990112305, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.1107892990112305, "logits_per_char": -0.37026309967041016, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 79, "native_id": 1418, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9878749847412109, "incorrect_loss_raw": 0.521080732345581, "correct_loss_per_char": 0.24696874618530273, "incorrect_loss_per_char": 0.173693577448527, "correct_loss_per_token": 0.9878749847412109, "incorrect_loss_per_token": 0.521080732345581, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9878749847412109, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -0.9878749847412109, "logits_per_char": -0.24696874618530273, "num_chars": 4}, {"sum_logits": -0.521080732345581, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.521080732345581, "logits_per_char": -0.173693577448527, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 80, "native_id": 3157, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3836812674999237, "incorrect_loss_raw": 1.2424111366271973, "correct_loss_per_char": 0.09592031687498093, "incorrect_loss_per_char": 0.4141370455423991, "correct_loss_per_token": 0.3836812674999237, "incorrect_loss_per_token": 1.2424111366271973, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3836812674999237, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.3836812674999237, "logits_per_char": -0.09592031687498093, "num_chars": 4}, {"sum_logits": -1.2424111366271973, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.2424111366271973, "logits_per_char": -0.4141370455423991, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 81, "native_id": 454, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7272424101829529, "incorrect_loss_raw": 0.726404070854187, "correct_loss_per_char": 0.18181060254573822, "incorrect_loss_per_char": 0.242134690284729, "correct_loss_per_token": 0.7272424101829529, "incorrect_loss_per_token": 0.726404070854187, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7272424101829529, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": false, "logits_per_token": -0.7272424101829529, "logits_per_char": -0.18181060254573822, "num_chars": 4}, {"sum_logits": -0.726404070854187, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": true, "logits_per_token": -0.726404070854187, "logits_per_char": -0.242134690284729, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 82, "native_id": 2169, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2369050234556198, "incorrect_loss_raw": 1.6646193265914917, "correct_loss_per_char": 0.07896834115187328, "incorrect_loss_per_char": 0.4161548316478729, "correct_loss_per_token": 0.2369050234556198, "incorrect_loss_per_token": 1.6646193265914917, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6646193265914917, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.6646193265914917, "logits_per_char": -0.4161548316478729, "num_chars": 4}, {"sum_logits": -0.2369050234556198, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.2369050234556198, "logits_per_char": -0.07896834115187328, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 83, "native_id": 578, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.093262195587158, "incorrect_loss_raw": 0.18221628665924072, "correct_loss_per_char": 0.6977540651957194, "incorrect_loss_per_char": 0.04555407166481018, "correct_loss_per_token": 2.093262195587158, "incorrect_loss_per_token": 0.18221628665924072, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.18221628665924072, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": true, "logits_per_token": -0.18221628665924072, "logits_per_char": -0.04555407166481018, "num_chars": 4}, {"sum_logits": -2.093262195587158, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": false, "logits_per_token": -2.093262195587158, "logits_per_char": -0.6977540651957194, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 84, "native_id": 2746, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5059942603111267, "incorrect_loss_raw": 0.9944880604743958, "correct_loss_per_char": 0.12649856507778168, "incorrect_loss_per_char": 0.3314960201581319, "correct_loss_per_token": 0.5059942603111267, "incorrect_loss_per_token": 0.9944880604743958, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5059942603111267, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": true, "logits_per_token": -0.5059942603111267, "logits_per_char": -0.12649856507778168, "num_chars": 4}, {"sum_logits": -0.9944880604743958, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -0.9944880604743958, "logits_per_char": -0.3314960201581319, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 85, "native_id": 1250, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.182930827140808, "incorrect_loss_raw": 0.39389458298683167, "correct_loss_per_char": 0.295732706785202, "incorrect_loss_per_char": 0.13129819432894388, "correct_loss_per_token": 1.182930827140808, "incorrect_loss_per_token": 0.39389458298683167, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.182930827140808, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.182930827140808, "logits_per_char": -0.295732706785202, "num_chars": 4}, {"sum_logits": -0.39389458298683167, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.39389458298683167, "logits_per_char": -0.13129819432894388, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 86, "native_id": 1860, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3941032588481903, "incorrect_loss_raw": 1.2178086042404175, "correct_loss_per_char": 0.09852581471204758, "incorrect_loss_per_char": 0.4059362014134725, "correct_loss_per_token": 0.3941032588481903, "incorrect_loss_per_token": 1.2178086042404175, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3941032588481903, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.3941032588481903, "logits_per_char": -0.09852581471204758, "num_chars": 4}, {"sum_logits": -1.2178086042404175, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -1.2178086042404175, "logits_per_char": -0.4059362014134725, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 87, "native_id": 162, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.541815996170044, "incorrect_loss_raw": 1.0644700527191162, "correct_loss_per_char": 0.135453999042511, "incorrect_loss_per_char": 0.35482335090637207, "correct_loss_per_token": 0.541815996170044, "incorrect_loss_per_token": 1.0644700527191162, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.541815996170044, "num_tokens": 1, "num_tokens_all": 850, "is_greedy": true, "logits_per_token": -0.541815996170044, "logits_per_char": -0.135453999042511, "num_chars": 4}, {"sum_logits": -1.0644700527191162, "num_tokens": 1, "num_tokens_all": 850, "is_greedy": false, "logits_per_token": -1.0644700527191162, "logits_per_char": -0.35482335090637207, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 88, "native_id": 1704, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5610809326171875, "incorrect_loss_raw": 0.30136018991470337, "correct_loss_per_char": 0.3902702331542969, "incorrect_loss_per_char": 0.10045339663823445, "correct_loss_per_token": 1.5610809326171875, "incorrect_loss_per_token": 0.30136018991470337, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5610809326171875, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": false, "logits_per_token": -1.5610809326171875, "logits_per_char": -0.3902702331542969, "num_chars": 4}, {"sum_logits": -0.30136018991470337, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": true, "logits_per_token": -0.30136018991470337, "logits_per_char": -0.10045339663823445, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 89, "native_id": 1133, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8068769574165344, "incorrect_loss_raw": 0.6215472221374512, "correct_loss_per_char": 0.2017192393541336, "incorrect_loss_per_char": 0.2071824073791504, "correct_loss_per_token": 0.8068769574165344, "incorrect_loss_per_token": 0.6215472221374512, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8068769574165344, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -0.8068769574165344, "logits_per_char": -0.2017192393541336, "num_chars": 4}, {"sum_logits": -0.6215472221374512, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": true, "logits_per_token": -0.6215472221374512, "logits_per_char": -0.2071824073791504, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 90, "native_id": 2713, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4086620807647705, "incorrect_loss_raw": 0.3213902711868286, "correct_loss_per_char": 0.3521655201911926, "incorrect_loss_per_char": 0.10713009039560954, "correct_loss_per_token": 1.4086620807647705, "incorrect_loss_per_token": 0.3213902711868286, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4086620807647705, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": false, "logits_per_token": -1.4086620807647705, "logits_per_char": -0.3521655201911926, "num_chars": 4}, {"sum_logits": -0.3213902711868286, "num_tokens": 1, "num_tokens_all": 1008, "is_greedy": true, "logits_per_token": -0.3213902711868286, "logits_per_char": -0.10713009039560954, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 91, "native_id": 164, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26753368973731995, "incorrect_loss_raw": 1.5339456796646118, "correct_loss_per_char": 0.08917789657910664, "incorrect_loss_per_char": 0.38348641991615295, "correct_loss_per_token": 0.26753368973731995, "incorrect_loss_per_token": 1.5339456796646118, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5339456796646118, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.5339456796646118, "logits_per_char": -0.38348641991615295, "num_chars": 4}, {"sum_logits": -0.26753368973731995, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.26753368973731995, "logits_per_char": -0.08917789657910664, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 92, "native_id": 726, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1206862926483154, "incorrect_loss_raw": 0.5071056485176086, "correct_loss_per_char": 0.28017157316207886, "incorrect_loss_per_char": 0.16903521617253622, "correct_loss_per_token": 1.1206862926483154, "incorrect_loss_per_token": 0.5071056485176086, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1206862926483154, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": false, "logits_per_token": -1.1206862926483154, "logits_per_char": -0.28017157316207886, "num_chars": 4}, {"sum_logits": -0.5071056485176086, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": true, "logits_per_token": -0.5071056485176086, "logits_per_char": -0.16903521617253622, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 93, "native_id": 1112, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0607565641403198, "incorrect_loss_raw": 0.45065978169441223, "correct_loss_per_char": 0.26518914103507996, "incorrect_loss_per_char": 0.15021992723147073, "correct_loss_per_token": 1.0607565641403198, "incorrect_loss_per_token": 0.45065978169441223, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0607565641403198, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.0607565641403198, "logits_per_char": -0.26518914103507996, "num_chars": 4}, {"sum_logits": -0.45065978169441223, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -0.45065978169441223, "logits_per_char": -0.15021992723147073, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 94, "native_id": 633, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7718942165374756, "incorrect_loss_raw": 0.7412021160125732, "correct_loss_per_char": 0.2572980721791585, "incorrect_loss_per_char": 0.1853005290031433, "correct_loss_per_token": 0.7718942165374756, "incorrect_loss_per_token": 0.7412021160125732, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7412021160125732, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": true, "logits_per_token": -0.7412021160125732, "logits_per_char": -0.1853005290031433, "num_chars": 4}, {"sum_logits": -0.7718942165374756, "num_tokens": 1, "num_tokens_all": 1070, "is_greedy": false, "logits_per_token": -0.7718942165374756, "logits_per_char": -0.2572980721791585, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 95, "native_id": 1229, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8958373665809631, "incorrect_loss_raw": 0.5619755387306213, "correct_loss_per_char": 0.22395934164524078, "incorrect_loss_per_char": 0.18732517957687378, "correct_loss_per_token": 0.8958373665809631, "incorrect_loss_per_token": 0.5619755387306213, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8958373665809631, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": false, "logits_per_token": -0.8958373665809631, "logits_per_char": -0.22395934164524078, "num_chars": 4}, {"sum_logits": -0.5619755387306213, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": true, "logits_per_token": -0.5619755387306213, "logits_per_char": -0.18732517957687378, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 96, "native_id": 3175, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6669990420341492, "incorrect_loss_raw": 0.7598795890808105, "correct_loss_per_char": 0.1667497605085373, "incorrect_loss_per_char": 0.2532931963602702, "correct_loss_per_token": 0.6669990420341492, "incorrect_loss_per_token": 0.7598795890808105, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6669990420341492, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.6669990420341492, "logits_per_char": -0.1667497605085373, "num_chars": 4}, {"sum_logits": -0.7598795890808105, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -0.7598795890808105, "logits_per_char": -0.2532931963602702, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 97, "native_id": 1902, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.34206095337867737, "incorrect_loss_raw": 1.4581271409988403, "correct_loss_per_char": 0.11402031779289246, "incorrect_loss_per_char": 0.3645317852497101, "correct_loss_per_token": 0.34206095337867737, "incorrect_loss_per_token": 1.4581271409988403, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4581271409988403, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.4581271409988403, "logits_per_char": -0.3645317852497101, "num_chars": 4}, {"sum_logits": -0.34206095337867737, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": true, "logits_per_token": -0.34206095337867737, "logits_per_char": -0.11402031779289246, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 98, "native_id": 168, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4347979426383972, "incorrect_loss_raw": 1.1238508224487305, "correct_loss_per_char": 0.14493264754613241, "incorrect_loss_per_char": 0.2809627056121826, "correct_loss_per_token": 0.4347979426383972, "incorrect_loss_per_token": 1.1238508224487305, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1238508224487305, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.1238508224487305, "logits_per_char": -0.2809627056121826, "num_chars": 4}, {"sum_logits": -0.4347979426383972, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.4347979426383972, "logits_per_char": -0.14493264754613241, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 99, "native_id": 2306, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3460088074207306, "incorrect_loss_raw": 1.5478036403656006, "correct_loss_per_char": 0.08650220185518265, "incorrect_loss_per_char": 0.5159345467885336, "correct_loss_per_token": 0.3460088074207306, "incorrect_loss_per_token": 1.5478036403656006, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3460088074207306, "num_tokens": 1, "num_tokens_all": 835, "is_greedy": true, "logits_per_token": -0.3460088074207306, "logits_per_char": -0.08650220185518265, "num_chars": 4}, {"sum_logits": -1.5478036403656006, "num_tokens": 1, "num_tokens_all": 835, "is_greedy": false, "logits_per_token": -1.5478036403656006, "logits_per_char": -0.5159345467885336, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 100, "native_id": 1581, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0191881656646729, "incorrect_loss_raw": 0.4937582015991211, "correct_loss_per_char": 0.2547970414161682, "incorrect_loss_per_char": 0.16458606719970703, "correct_loss_per_token": 1.0191881656646729, "incorrect_loss_per_token": 0.4937582015991211, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0191881656646729, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.0191881656646729, "logits_per_char": -0.2547970414161682, "num_chars": 4}, {"sum_logits": -0.4937582015991211, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -0.4937582015991211, "logits_per_char": -0.16458606719970703, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 101, "native_id": 3130, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5813691020011902, "incorrect_loss_raw": 0.8712286353111267, "correct_loss_per_char": 0.19378970066706339, "incorrect_loss_per_char": 0.21780715882778168, "correct_loss_per_token": 0.5813691020011902, "incorrect_loss_per_token": 0.8712286353111267, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8712286353111267, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -0.8712286353111267, "logits_per_char": -0.21780715882778168, "num_chars": 4}, {"sum_logits": -0.5813691020011902, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.5813691020011902, "logits_per_char": -0.19378970066706339, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 102, "native_id": 1431, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6759895086288452, "incorrect_loss_raw": 0.22418661415576935, "correct_loss_per_char": 0.4189973771572113, "incorrect_loss_per_char": 0.07472887138525645, "correct_loss_per_token": 1.6759895086288452, "incorrect_loss_per_token": 0.22418661415576935, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6759895086288452, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": false, "logits_per_token": -1.6759895086288452, "logits_per_char": -0.4189973771572113, "num_chars": 4}, {"sum_logits": -0.22418661415576935, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": true, "logits_per_token": -0.22418661415576935, "logits_per_char": -0.07472887138525645, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 103, "native_id": 2031, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7189520597457886, "incorrect_loss_raw": 0.8281916379928589, "correct_loss_per_char": 0.17973801493644714, "incorrect_loss_per_char": 0.27606387933095294, "correct_loss_per_token": 0.7189520597457886, "incorrect_loss_per_token": 0.8281916379928589, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7189520597457886, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": true, "logits_per_token": -0.7189520597457886, "logits_per_char": -0.17973801493644714, "num_chars": 4}, {"sum_logits": -0.8281916379928589, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -0.8281916379928589, "logits_per_char": -0.27606387933095294, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 104, "native_id": 1399, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7047160267829895, "incorrect_loss_raw": 0.8376792669296265, "correct_loss_per_char": 0.2349053422609965, "incorrect_loss_per_char": 0.20941981673240662, "correct_loss_per_token": 0.7047160267829895, "incorrect_loss_per_token": 0.8376792669296265, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8376792669296265, "num_tokens": 1, "num_tokens_all": 875, "is_greedy": false, "logits_per_token": -0.8376792669296265, "logits_per_char": -0.20941981673240662, "num_chars": 4}, {"sum_logits": -0.7047160267829895, "num_tokens": 1, "num_tokens_all": 875, "is_greedy": true, "logits_per_token": -0.7047160267829895, "logits_per_char": -0.2349053422609965, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 105, "native_id": 2387, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1388649940490723, "incorrect_loss_raw": 0.4815676212310791, "correct_loss_per_char": 0.3796216646830241, "incorrect_loss_per_char": 0.12039190530776978, "correct_loss_per_token": 1.1388649940490723, "incorrect_loss_per_token": 0.4815676212310791, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4815676212310791, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.4815676212310791, "logits_per_char": -0.12039190530776978, "num_chars": 4}, {"sum_logits": -1.1388649940490723, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.1388649940490723, "logits_per_char": -0.3796216646830241, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 106, "native_id": 1917, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5019738078117371, "incorrect_loss_raw": 0.9890528917312622, "correct_loss_per_char": 0.12549345195293427, "incorrect_loss_per_char": 0.3296842972437541, "correct_loss_per_token": 0.5019738078117371, "incorrect_loss_per_token": 0.9890528917312622, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5019738078117371, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": true, "logits_per_token": -0.5019738078117371, "logits_per_char": -0.12549345195293427, "num_chars": 4}, {"sum_logits": -0.9890528917312622, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": false, "logits_per_token": -0.9890528917312622, "logits_per_char": -0.3296842972437541, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 107, "native_id": 1949, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9003143310546875, "incorrect_loss_raw": 0.5696232318878174, "correct_loss_per_char": 0.3001047770182292, "incorrect_loss_per_char": 0.14240580797195435, "correct_loss_per_token": 0.9003143310546875, "incorrect_loss_per_token": 0.5696232318878174, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5696232318878174, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -0.5696232318878174, "logits_per_char": -0.14240580797195435, "num_chars": 4}, {"sum_logits": -0.9003143310546875, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -0.9003143310546875, "logits_per_char": -0.3001047770182292, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 108, "native_id": 185, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5676702260971069, "incorrect_loss_raw": 0.9040296673774719, "correct_loss_per_char": 0.14191755652427673, "incorrect_loss_per_char": 0.3013432224591573, "correct_loss_per_token": 0.5676702260971069, "incorrect_loss_per_token": 0.9040296673774719, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5676702260971069, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.5676702260971069, "logits_per_char": -0.14191755652427673, "num_chars": 4}, {"sum_logits": -0.9040296673774719, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -0.9040296673774719, "logits_per_char": -0.3013432224591573, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 109, "native_id": 1928, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6912171840667725, "incorrect_loss_raw": 0.7513855695724487, "correct_loss_per_char": 0.17280429601669312, "incorrect_loss_per_char": 0.2504618565241496, "correct_loss_per_token": 0.6912171840667725, "incorrect_loss_per_token": 0.7513855695724487, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6912171840667725, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -0.6912171840667725, "logits_per_char": -0.17280429601669312, "num_chars": 4}, {"sum_logits": -0.7513855695724487, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -0.7513855695724487, "logits_per_char": -0.2504618565241496, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 110, "native_id": 2436, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4690033197402954, "incorrect_loss_raw": 0.29905590415000916, "correct_loss_per_char": 0.36725082993507385, "incorrect_loss_per_char": 0.09968530138333638, "correct_loss_per_token": 1.4690033197402954, "incorrect_loss_per_token": 0.29905590415000916, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4690033197402954, "num_tokens": 1, "num_tokens_all": 837, "is_greedy": false, "logits_per_token": -1.4690033197402954, "logits_per_char": -0.36725082993507385, "num_chars": 4}, {"sum_logits": -0.29905590415000916, "num_tokens": 1, "num_tokens_all": 837, "is_greedy": true, "logits_per_token": -0.29905590415000916, "logits_per_char": -0.09968530138333638, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 111, "native_id": 696, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9185104370117188, "incorrect_loss_raw": 0.6035606861114502, "correct_loss_per_char": 0.2296276092529297, "incorrect_loss_per_char": 0.2011868953704834, "correct_loss_per_token": 0.9185104370117188, "incorrect_loss_per_token": 0.6035606861114502, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9185104370117188, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -0.9185104370117188, "logits_per_char": -0.2296276092529297, "num_chars": 4}, {"sum_logits": -0.6035606861114502, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.6035606861114502, "logits_per_char": -0.2011868953704834, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 112, "native_id": 1800, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9652281403541565, "incorrect_loss_raw": 0.5457778573036194, "correct_loss_per_char": 0.24130703508853912, "incorrect_loss_per_char": 0.1819259524345398, "correct_loss_per_token": 0.9652281403541565, "incorrect_loss_per_token": 0.5457778573036194, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9652281403541565, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": false, "logits_per_token": -0.9652281403541565, "logits_per_char": -0.24130703508853912, "num_chars": 4}, {"sum_logits": -0.5457778573036194, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": true, "logits_per_token": -0.5457778573036194, "logits_per_char": -0.1819259524345398, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 113, "native_id": 3004, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3329521715641022, "incorrect_loss_raw": 1.4179706573486328, "correct_loss_per_char": 0.11098405718803406, "incorrect_loss_per_char": 0.3544926643371582, "correct_loss_per_token": 0.3329521715641022, "incorrect_loss_per_token": 1.4179706573486328, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4179706573486328, "num_tokens": 1, "num_tokens_all": 856, "is_greedy": false, "logits_per_token": -1.4179706573486328, "logits_per_char": -0.3544926643371582, "num_chars": 4}, {"sum_logits": -0.3329521715641022, "num_tokens": 1, "num_tokens_all": 856, "is_greedy": true, "logits_per_token": -0.3329521715641022, "logits_per_char": -0.11098405718803406, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 114, "native_id": 2126, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9340864419937134, "incorrect_loss_raw": 0.5862812399864197, "correct_loss_per_char": 0.23352161049842834, "incorrect_loss_per_char": 0.19542707999547324, "correct_loss_per_token": 0.9340864419937134, "incorrect_loss_per_token": 0.5862812399864197, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9340864419937134, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": false, "logits_per_token": -0.9340864419937134, "logits_per_char": -0.23352161049842834, "num_chars": 4}, {"sum_logits": -0.5862812399864197, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": true, "logits_per_token": -0.5862812399864197, "logits_per_char": -0.19542707999547324, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 115, "native_id": 1793, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4618214964866638, "incorrect_loss_raw": 1.1188795566558838, "correct_loss_per_char": 0.11545537412166595, "incorrect_loss_per_char": 0.37295985221862793, "correct_loss_per_token": 0.4618214964866638, "incorrect_loss_per_token": 1.1188795566558838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4618214964866638, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.4618214964866638, "logits_per_char": -0.11545537412166595, "num_chars": 4}, {"sum_logits": -1.1188795566558838, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -1.1188795566558838, "logits_per_char": -0.37295985221862793, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 116, "native_id": 1211, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0698281526565552, "incorrect_loss_raw": 0.581876814365387, "correct_loss_per_char": 0.35660938421885174, "incorrect_loss_per_char": 0.14546920359134674, "correct_loss_per_token": 1.0698281526565552, "incorrect_loss_per_token": 0.581876814365387, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.581876814365387, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.581876814365387, "logits_per_char": -0.14546920359134674, "num_chars": 4}, {"sum_logits": -1.0698281526565552, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.0698281526565552, "logits_per_char": -0.35660938421885174, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 117, "native_id": 1126, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6396675109863281, "incorrect_loss_raw": 0.8754814863204956, "correct_loss_per_char": 0.21322250366210938, "incorrect_loss_per_char": 0.2188703715801239, "correct_loss_per_token": 0.6396675109863281, "incorrect_loss_per_token": 0.8754814863204956, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8754814863204956, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -0.8754814863204956, "logits_per_char": -0.2188703715801239, "num_chars": 4}, {"sum_logits": -0.6396675109863281, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": true, "logits_per_token": -0.6396675109863281, "logits_per_char": -0.21322250366210938, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 118, "native_id": 507, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5958650708198547, "incorrect_loss_raw": 0.9307847023010254, "correct_loss_per_char": 0.1986216902732849, "incorrect_loss_per_char": 0.23269617557525635, "correct_loss_per_token": 0.5958650708198547, "incorrect_loss_per_token": 0.9307847023010254, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9307847023010254, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -0.9307847023010254, "logits_per_char": -0.23269617557525635, "num_chars": 4}, {"sum_logits": -0.5958650708198547, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -0.5958650708198547, "logits_per_char": -0.1986216902732849, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 119, "native_id": 760, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8595107793807983, "incorrect_loss_raw": 0.7513591051101685, "correct_loss_per_char": 0.21487769484519958, "incorrect_loss_per_char": 0.25045303503672284, "correct_loss_per_token": 0.8595107793807983, "incorrect_loss_per_token": 0.7513591051101685, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8595107793807983, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -0.8595107793807983, "logits_per_char": -0.21487769484519958, "num_chars": 4}, {"sum_logits": -0.7513591051101685, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": true, "logits_per_token": -0.7513591051101685, "logits_per_char": -0.25045303503672284, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 120, "native_id": 1705, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0819308757781982, "incorrect_loss_raw": 0.5157816410064697, "correct_loss_per_char": 0.27048271894454956, "incorrect_loss_per_char": 0.17192721366882324, "correct_loss_per_token": 1.0819308757781982, "incorrect_loss_per_token": 0.5157816410064697, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0819308757781982, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.0819308757781982, "logits_per_char": -0.27048271894454956, "num_chars": 4}, {"sum_logits": -0.5157816410064697, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": true, "logits_per_token": -0.5157816410064697, "logits_per_char": -0.17192721366882324, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 121, "native_id": 1786, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9621375799179077, "incorrect_loss_raw": 0.5907471179962158, "correct_loss_per_char": 0.24053439497947693, "incorrect_loss_per_char": 0.19691570599873862, "correct_loss_per_token": 0.9621375799179077, "incorrect_loss_per_token": 0.5907471179962158, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9621375799179077, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": false, "logits_per_token": -0.9621375799179077, "logits_per_char": -0.24053439497947693, "num_chars": 4}, {"sum_logits": -0.5907471179962158, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": true, "logits_per_token": -0.5907471179962158, "logits_per_char": -0.19691570599873862, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 122, "native_id": 489, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.396584689617157, "incorrect_loss_raw": 1.2607206106185913, "correct_loss_per_char": 0.13219489653905234, "incorrect_loss_per_char": 0.3151801526546478, "correct_loss_per_token": 0.396584689617157, "incorrect_loss_per_token": 1.2607206106185913, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2607206106185913, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.2607206106185913, "logits_per_char": -0.3151801526546478, "num_chars": 4}, {"sum_logits": -0.396584689617157, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.396584689617157, "logits_per_char": -0.13219489653905234, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 123, "native_id": 2170, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8118069767951965, "incorrect_loss_raw": 0.6856076121330261, "correct_loss_per_char": 0.27060232559839886, "incorrect_loss_per_char": 0.17140190303325653, "correct_loss_per_token": 0.8118069767951965, "incorrect_loss_per_token": 0.6856076121330261, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6856076121330261, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.6856076121330261, "logits_per_char": -0.17140190303325653, "num_chars": 4}, {"sum_logits": -0.8118069767951965, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -0.8118069767951965, "logits_per_char": -0.27060232559839886, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 124, "native_id": 422, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2686503231525421, "incorrect_loss_raw": 1.5558125972747803, "correct_loss_per_char": 0.08955010771751404, "incorrect_loss_per_char": 0.38895314931869507, "correct_loss_per_token": 0.2686503231525421, "incorrect_loss_per_token": 1.5558125972747803, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5558125972747803, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -1.5558125972747803, "logits_per_char": -0.38895314931869507, "num_chars": 4}, {"sum_logits": -0.2686503231525421, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": true, "logits_per_token": -0.2686503231525421, "logits_per_char": -0.08955010771751404, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 125, "native_id": 1987, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1264545917510986, "incorrect_loss_raw": 0.41979578137397766, "correct_loss_per_char": 0.28161364793777466, "incorrect_loss_per_char": 0.1399319271246592, "correct_loss_per_token": 1.1264545917510986, "incorrect_loss_per_token": 0.41979578137397766, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1264545917510986, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.1264545917510986, "logits_per_char": -0.28161364793777466, "num_chars": 4}, {"sum_logits": -0.41979578137397766, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.41979578137397766, "logits_per_char": -0.1399319271246592, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 126, "native_id": 1543, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6054129600524902, "incorrect_loss_raw": 0.8552595376968384, "correct_loss_per_char": 0.20180432001749674, "incorrect_loss_per_char": 0.2138148844242096, "correct_loss_per_token": 0.6054129600524902, "incorrect_loss_per_token": 0.8552595376968384, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8552595376968384, "num_tokens": 1, "num_tokens_all": 1012, "is_greedy": false, "logits_per_token": -0.8552595376968384, "logits_per_char": -0.2138148844242096, "num_chars": 4}, {"sum_logits": -0.6054129600524902, "num_tokens": 1, "num_tokens_all": 1012, "is_greedy": true, "logits_per_token": -0.6054129600524902, "logits_per_char": -0.20180432001749674, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 127, "native_id": 2688, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4336809813976288, "incorrect_loss_raw": 1.1047563552856445, "correct_loss_per_char": 0.14456032713254294, "incorrect_loss_per_char": 0.27618908882141113, "correct_loss_per_token": 0.4336809813976288, "incorrect_loss_per_token": 1.1047563552856445, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1047563552856445, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.1047563552856445, "logits_per_char": -0.27618908882141113, "num_chars": 4}, {"sum_logits": -0.4336809813976288, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.4336809813976288, "logits_per_char": -0.14456032713254294, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 128, "native_id": 1046, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8479976654052734, "incorrect_loss_raw": 0.6266862154006958, "correct_loss_per_char": 0.21199941635131836, "incorrect_loss_per_char": 0.20889540513356528, "correct_loss_per_token": 0.8479976654052734, "incorrect_loss_per_token": 0.6266862154006958, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8479976654052734, "num_tokens": 1, "num_tokens_all": 864, "is_greedy": false, "logits_per_token": -0.8479976654052734, "logits_per_char": -0.21199941635131836, "num_chars": 4}, {"sum_logits": -0.6266862154006958, "num_tokens": 1, "num_tokens_all": 864, "is_greedy": true, "logits_per_token": -0.6266862154006958, "logits_per_char": -0.20889540513356528, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 129, "native_id": 2625, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4928191900253296, "incorrect_loss_raw": 0.9964786767959595, "correct_loss_per_char": 0.16427306334177652, "incorrect_loss_per_char": 0.24911966919898987, "correct_loss_per_token": 0.4928191900253296, "incorrect_loss_per_token": 0.9964786767959595, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9964786767959595, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": false, "logits_per_token": -0.9964786767959595, "logits_per_char": -0.24911966919898987, "num_chars": 4}, {"sum_logits": -0.4928191900253296, "num_tokens": 1, "num_tokens_all": 1046, "is_greedy": true, "logits_per_token": -0.4928191900253296, "logits_per_char": -0.16427306334177652, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 130, "native_id": 784, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3664129078388214, "incorrect_loss_raw": 1.289329171180725, "correct_loss_per_char": 0.1221376359462738, "incorrect_loss_per_char": 0.3223322927951813, "correct_loss_per_token": 0.3664129078388214, "incorrect_loss_per_token": 1.289329171180725, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.289329171180725, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": false, "logits_per_token": -1.289329171180725, "logits_per_char": -0.3223322927951813, "num_chars": 4}, {"sum_logits": -0.3664129078388214, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": true, "logits_per_token": -0.3664129078388214, "logits_per_char": -0.1221376359462738, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 131, "native_id": 1414, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9639041423797607, "incorrect_loss_raw": 0.5447477102279663, "correct_loss_per_char": 0.24097603559494019, "incorrect_loss_per_char": 0.18158257007598877, "correct_loss_per_token": 0.9639041423797607, "incorrect_loss_per_token": 0.5447477102279663, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9639041423797607, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -0.9639041423797607, "logits_per_char": -0.24097603559494019, "num_chars": 4}, {"sum_logits": -0.5447477102279663, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.5447477102279663, "logits_per_char": -0.18158257007598877, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 132, "native_id": 443, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.573858916759491, "incorrect_loss_raw": 0.9923427104949951, "correct_loss_per_char": 0.14346472918987274, "incorrect_loss_per_char": 0.3307809034983317, "correct_loss_per_token": 0.573858916759491, "incorrect_loss_per_token": 0.9923427104949951, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.573858916759491, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": true, "logits_per_token": -0.573858916759491, "logits_per_char": -0.14346472918987274, "num_chars": 4}, {"sum_logits": -0.9923427104949951, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": false, "logits_per_token": -0.9923427104949951, "logits_per_char": -0.3307809034983317, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 133, "native_id": 2878, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1845563650131226, "incorrect_loss_raw": 0.4311871826648712, "correct_loss_per_char": 0.29613909125328064, "incorrect_loss_per_char": 0.1437290608882904, "correct_loss_per_token": 1.1845563650131226, "incorrect_loss_per_token": 0.4311871826648712, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1845563650131226, "num_tokens": 1, "num_tokens_all": 1202, "is_greedy": false, "logits_per_token": -1.1845563650131226, "logits_per_char": -0.29613909125328064, "num_chars": 4}, {"sum_logits": -0.4311871826648712, "num_tokens": 1, "num_tokens_all": 1202, "is_greedy": true, "logits_per_token": -0.4311871826648712, "logits_per_char": -0.1437290608882904, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 134, "native_id": 2867, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4997805058956146, "incorrect_loss_raw": 1.0401418209075928, "correct_loss_per_char": 0.16659350196520487, "incorrect_loss_per_char": 0.2600354552268982, "correct_loss_per_token": 0.4997805058956146, "incorrect_loss_per_token": 1.0401418209075928, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0401418209075928, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": false, "logits_per_token": -1.0401418209075928, "logits_per_char": -0.2600354552268982, "num_chars": 4}, {"sum_logits": -0.4997805058956146, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": true, "logits_per_token": -0.4997805058956146, "logits_per_char": -0.16659350196520487, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 135, "native_id": 643, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7638823390007019, "incorrect_loss_raw": 0.692507266998291, "correct_loss_per_char": 0.19097058475017548, "incorrect_loss_per_char": 0.23083575566609701, "correct_loss_per_token": 0.7638823390007019, "incorrect_loss_per_token": 0.692507266998291, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7638823390007019, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": false, "logits_per_token": -0.7638823390007019, "logits_per_char": -0.19097058475017548, "num_chars": 4}, {"sum_logits": -0.692507266998291, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": true, "logits_per_token": -0.692507266998291, "logits_per_char": -0.23083575566609701, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 136, "native_id": 2377, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4979325532913208, "incorrect_loss_raw": 1.114034652709961, "correct_loss_per_char": 0.1244831383228302, "incorrect_loss_per_char": 0.3713448842366536, "correct_loss_per_token": 0.4979325532913208, "incorrect_loss_per_token": 1.114034652709961, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4979325532913208, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": true, "logits_per_token": -0.4979325532913208, "logits_per_char": -0.1244831383228302, "num_chars": 4}, {"sum_logits": -1.114034652709961, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": false, "logits_per_token": -1.114034652709961, "logits_per_char": -0.3713448842366536, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 137, "native_id": 1103, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.38491106033325195, "incorrect_loss_raw": 1.2873648405075073, "correct_loss_per_char": 0.12830368677775064, "incorrect_loss_per_char": 0.32184121012687683, "correct_loss_per_token": 0.38491106033325195, "incorrect_loss_per_token": 1.2873648405075073, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2873648405075073, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.2873648405075073, "logits_per_char": -0.32184121012687683, "num_chars": 4}, {"sum_logits": -0.38491106033325195, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.38491106033325195, "logits_per_char": -0.12830368677775064, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 138, "native_id": 634, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4739835560321808, "incorrect_loss_raw": 1.064915418624878, "correct_loss_per_char": 0.1579945186773936, "incorrect_loss_per_char": 0.2662288546562195, "correct_loss_per_token": 0.4739835560321808, "incorrect_loss_per_token": 1.064915418624878, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.064915418624878, "num_tokens": 1, "num_tokens_all": 1026, "is_greedy": false, "logits_per_token": -1.064915418624878, "logits_per_char": -0.2662288546562195, "num_chars": 4}, {"sum_logits": -0.4739835560321808, "num_tokens": 1, "num_tokens_all": 1026, "is_greedy": true, "logits_per_token": -0.4739835560321808, "logits_per_char": -0.1579945186773936, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 139, "native_id": 2949, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9861138463020325, "incorrect_loss_raw": 0.5603093504905701, "correct_loss_per_char": 0.24652846157550812, "incorrect_loss_per_char": 0.1867697834968567, "correct_loss_per_token": 0.9861138463020325, "incorrect_loss_per_token": 0.5603093504905701, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9861138463020325, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -0.9861138463020325, "logits_per_char": -0.24652846157550812, "num_chars": 4}, {"sum_logits": -0.5603093504905701, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": true, "logits_per_token": -0.5603093504905701, "logits_per_char": -0.1867697834968567, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 140, "native_id": 1325, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30194923281669617, "incorrect_loss_raw": 1.5185651779174805, "correct_loss_per_char": 0.10064974427223206, "incorrect_loss_per_char": 0.3796412944793701, "correct_loss_per_token": 0.30194923281669617, "incorrect_loss_per_token": 1.5185651779174805, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5185651779174805, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.5185651779174805, "logits_per_char": -0.3796412944793701, "num_chars": 4}, {"sum_logits": -0.30194923281669617, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.30194923281669617, "logits_per_char": -0.10064974427223206, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 141, "native_id": 1829, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7244546413421631, "incorrect_loss_raw": 0.7174191474914551, "correct_loss_per_char": 0.18111366033554077, "incorrect_loss_per_char": 0.23913971583048502, "correct_loss_per_token": 0.7244546413421631, "incorrect_loss_per_token": 0.7174191474914551, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7244546413421631, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -0.7244546413421631, "logits_per_char": -0.18111366033554077, "num_chars": 4}, {"sum_logits": -0.7174191474914551, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.7174191474914551, "logits_per_char": -0.23913971583048502, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 142, "native_id": 2951, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5769140720367432, "incorrect_loss_raw": 0.9303750991821289, "correct_loss_per_char": 0.1442285180091858, "incorrect_loss_per_char": 0.31012503306070965, "correct_loss_per_token": 0.5769140720367432, "incorrect_loss_per_token": 0.9303750991821289, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5769140720367432, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": true, "logits_per_token": -0.5769140720367432, "logits_per_char": -0.1442285180091858, "num_chars": 4}, {"sum_logits": -0.9303750991821289, "num_tokens": 1, "num_tokens_all": 1092, "is_greedy": false, "logits_per_token": -0.9303750991821289, "logits_per_char": -0.31012503306070965, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 143, "native_id": 3209, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5727471113204956, "incorrect_loss_raw": 0.9912676811218262, "correct_loss_per_char": 0.1431867778301239, "incorrect_loss_per_char": 0.3304225603739421, "correct_loss_per_token": 0.5727471113204956, "incorrect_loss_per_token": 0.9912676811218262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5727471113204956, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": true, "logits_per_token": -0.5727471113204956, "logits_per_char": -0.1431867778301239, "num_chars": 4}, {"sum_logits": -0.9912676811218262, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": false, "logits_per_token": -0.9912676811218262, "logits_per_char": -0.3304225603739421, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 144, "native_id": 321, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9527298808097839, "incorrect_loss_raw": 0.521494448184967, "correct_loss_per_char": 0.23818247020244598, "incorrect_loss_per_char": 0.17383148272832236, "correct_loss_per_token": 0.9527298808097839, "incorrect_loss_per_token": 0.521494448184967, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9527298808097839, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": false, "logits_per_token": -0.9527298808097839, "logits_per_char": -0.23818247020244598, "num_chars": 4}, {"sum_logits": -0.521494448184967, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": true, "logits_per_token": -0.521494448184967, "logits_per_char": -0.17383148272832236, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 145, "native_id": 1618, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5477266311645508, "incorrect_loss_raw": 1.0005934238433838, "correct_loss_per_char": 0.18257554372151694, "incorrect_loss_per_char": 0.25014835596084595, "correct_loss_per_token": 0.5477266311645508, "incorrect_loss_per_token": 1.0005934238433838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0005934238433838, "num_tokens": 1, "num_tokens_all": 875, "is_greedy": false, "logits_per_token": -1.0005934238433838, "logits_per_char": -0.25014835596084595, "num_chars": 4}, {"sum_logits": -0.5477266311645508, "num_tokens": 1, "num_tokens_all": 875, "is_greedy": true, "logits_per_token": -0.5477266311645508, "logits_per_char": -0.18257554372151694, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 146, "native_id": 877, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9272712469100952, "incorrect_loss_raw": 0.5334062576293945, "correct_loss_per_char": 0.2318178117275238, "incorrect_loss_per_char": 0.17780208587646484, "correct_loss_per_token": 0.9272712469100952, "incorrect_loss_per_token": 0.5334062576293945, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9272712469100952, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -0.9272712469100952, "logits_per_char": -0.2318178117275238, "num_chars": 4}, {"sum_logits": -0.5334062576293945, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -0.5334062576293945, "logits_per_char": -0.17780208587646484, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 147, "native_id": 195, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1040477752685547, "incorrect_loss_raw": 0.4609415531158447, "correct_loss_per_char": 0.27601194381713867, "incorrect_loss_per_char": 0.15364718437194824, "correct_loss_per_token": 1.1040477752685547, "incorrect_loss_per_token": 0.4609415531158447, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1040477752685547, "num_tokens": 1, "num_tokens_all": 1270, "is_greedy": false, "logits_per_token": -1.1040477752685547, "logits_per_char": -0.27601194381713867, "num_chars": 4}, {"sum_logits": -0.4609415531158447, "num_tokens": 1, "num_tokens_all": 1270, "is_greedy": true, "logits_per_token": -0.4609415531158447, "logits_per_char": -0.15364718437194824, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 148, "native_id": 1172, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.470043420791626, "incorrect_loss_raw": 1.0713995695114136, "correct_loss_per_char": 0.15668114026387533, "incorrect_loss_per_char": 0.2678498923778534, "correct_loss_per_token": 0.470043420791626, "incorrect_loss_per_token": 1.0713995695114136, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0713995695114136, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": false, "logits_per_token": -1.0713995695114136, "logits_per_char": -0.2678498923778534, "num_chars": 4}, {"sum_logits": -0.470043420791626, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": true, "logits_per_token": -0.470043420791626, "logits_per_char": -0.15668114026387533, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 149, "native_id": 155, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9605197906494141, "incorrect_loss_raw": 0.5455634593963623, "correct_loss_per_char": 0.24012994766235352, "incorrect_loss_per_char": 0.1818544864654541, "correct_loss_per_token": 0.9605197906494141, "incorrect_loss_per_token": 0.5455634593963623, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9605197906494141, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": false, "logits_per_token": -0.9605197906494141, "logits_per_char": -0.24012994766235352, "num_chars": 4}, {"sum_logits": -0.5455634593963623, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": true, "logits_per_token": -0.5455634593963623, "logits_per_char": -0.1818544864654541, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 150, "native_id": 898, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1316165924072266, "incorrect_loss_raw": 0.593187689781189, "correct_loss_per_char": 0.28290414810180664, "incorrect_loss_per_char": 0.197729229927063, "correct_loss_per_token": 1.1316165924072266, "incorrect_loss_per_token": 0.593187689781189, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1316165924072266, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.1316165924072266, "logits_per_char": -0.28290414810180664, "num_chars": 4}, {"sum_logits": -0.593187689781189, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.593187689781189, "logits_per_char": -0.197729229927063, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 151, "native_id": 2075, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.346574068069458, "incorrect_loss_raw": 1.283718466758728, "correct_loss_per_char": 0.11552468935648601, "incorrect_loss_per_char": 0.320929616689682, "correct_loss_per_token": 0.346574068069458, "incorrect_loss_per_token": 1.283718466758728, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.283718466758728, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.283718466758728, "logits_per_char": -0.320929616689682, "num_chars": 4}, {"sum_logits": -0.346574068069458, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.346574068069458, "logits_per_char": -0.11552468935648601, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 152, "native_id": 359, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6013224124908447, "incorrect_loss_raw": 0.8608440160751343, "correct_loss_per_char": 0.2004408041636149, "incorrect_loss_per_char": 0.21521100401878357, "correct_loss_per_token": 0.6013224124908447, "incorrect_loss_per_token": 0.8608440160751343, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8608440160751343, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -0.8608440160751343, "logits_per_char": -0.21521100401878357, "num_chars": 4}, {"sum_logits": -0.6013224124908447, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -0.6013224124908447, "logits_per_char": -0.2004408041636149, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 153, "native_id": 2864, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0656054019927979, "incorrect_loss_raw": 0.46016308665275574, "correct_loss_per_char": 0.26640135049819946, "incorrect_loss_per_char": 0.15338769555091858, "correct_loss_per_token": 1.0656054019927979, "incorrect_loss_per_token": 0.46016308665275574, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0656054019927979, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.0656054019927979, "logits_per_char": -0.26640135049819946, "num_chars": 4}, {"sum_logits": -0.46016308665275574, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.46016308665275574, "logits_per_char": -0.15338769555091858, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 154, "native_id": 1298, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.205349087715149, "incorrect_loss_raw": 0.40919890999794006, "correct_loss_per_char": 0.30133727192878723, "incorrect_loss_per_char": 0.13639963666598, "correct_loss_per_token": 1.205349087715149, "incorrect_loss_per_token": 0.40919890999794006, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.205349087715149, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.205349087715149, "logits_per_char": -0.30133727192878723, "num_chars": 4}, {"sum_logits": -0.40919890999794006, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.40919890999794006, "logits_per_char": -0.13639963666598, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 155, "native_id": 1251, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7640126943588257, "incorrect_loss_raw": 0.6735663414001465, "correct_loss_per_char": 0.19100317358970642, "incorrect_loss_per_char": 0.22452211380004883, "correct_loss_per_token": 0.7640126943588257, "incorrect_loss_per_token": 0.6735663414001465, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7640126943588257, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": false, "logits_per_token": -0.7640126943588257, "logits_per_char": -0.19100317358970642, "num_chars": 4}, {"sum_logits": -0.6735663414001465, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": true, "logits_per_token": -0.6735663414001465, "logits_per_char": -0.22452211380004883, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 156, "native_id": 1887, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5950027108192444, "incorrect_loss_raw": 0.8677997589111328, "correct_loss_per_char": 0.1487506777048111, "incorrect_loss_per_char": 0.28926658630371094, "correct_loss_per_token": 0.5950027108192444, "incorrect_loss_per_token": 0.8677997589111328, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5950027108192444, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.5950027108192444, "logits_per_char": -0.1487506777048111, "num_chars": 4}, {"sum_logits": -0.8677997589111328, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -0.8677997589111328, "logits_per_char": -0.28926658630371094, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 157, "native_id": 271, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.807668924331665, "incorrect_loss_raw": 0.670609712600708, "correct_loss_per_char": 0.2692229747772217, "incorrect_loss_per_char": 0.167652428150177, "correct_loss_per_token": 0.807668924331665, "incorrect_loss_per_token": 0.670609712600708, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.670609712600708, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": true, "logits_per_token": -0.670609712600708, "logits_per_char": -0.167652428150177, "num_chars": 4}, {"sum_logits": -0.807668924331665, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": false, "logits_per_token": -0.807668924331665, "logits_per_char": -0.2692229747772217, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 158, "native_id": 2396, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6190438866615295, "incorrect_loss_raw": 0.8434093594551086, "correct_loss_per_char": 0.15476097166538239, "incorrect_loss_per_char": 0.2811364531517029, "correct_loss_per_token": 0.6190438866615295, "incorrect_loss_per_token": 0.8434093594551086, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6190438866615295, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.6190438866615295, "logits_per_char": -0.15476097166538239, "num_chars": 4}, {"sum_logits": -0.8434093594551086, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -0.8434093594551086, "logits_per_char": -0.2811364531517029, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 159, "native_id": 1054, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8363884091377258, "incorrect_loss_raw": 0.6065269112586975, "correct_loss_per_char": 0.20909710228443146, "incorrect_loss_per_char": 0.2021756370862325, "correct_loss_per_token": 0.8363884091377258, "incorrect_loss_per_token": 0.6065269112586975, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8363884091377258, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": false, "logits_per_token": -0.8363884091377258, "logits_per_char": -0.20909710228443146, "num_chars": 4}, {"sum_logits": -0.6065269112586975, "num_tokens": 1, "num_tokens_all": 1049, "is_greedy": true, "logits_per_token": -0.6065269112586975, "logits_per_char": -0.2021756370862325, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 160, "native_id": 299, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3085684776306152, "incorrect_loss_raw": 0.4233209192752838, "correct_loss_per_char": 0.3271421194076538, "incorrect_loss_per_char": 0.14110697309176126, "correct_loss_per_token": 1.3085684776306152, "incorrect_loss_per_token": 0.4233209192752838, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3085684776306152, "num_tokens": 1, "num_tokens_all": 850, "is_greedy": false, "logits_per_token": -1.3085684776306152, "logits_per_char": -0.3271421194076538, "num_chars": 4}, {"sum_logits": -0.4233209192752838, "num_tokens": 1, "num_tokens_all": 850, "is_greedy": true, "logits_per_token": -0.4233209192752838, "logits_per_char": -0.14110697309176126, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 161, "native_id": 2821, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5536377429962158, "incorrect_loss_raw": 0.9593722820281982, "correct_loss_per_char": 0.13840943574905396, "incorrect_loss_per_char": 0.3197907606760661, "correct_loss_per_token": 0.5536377429962158, "incorrect_loss_per_token": 0.9593722820281982, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5536377429962158, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": true, "logits_per_token": -0.5536377429962158, "logits_per_char": -0.13840943574905396, "num_chars": 4}, {"sum_logits": -0.9593722820281982, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -0.9593722820281982, "logits_per_char": -0.3197907606760661, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 162, "native_id": 1746, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.47835075855255127, "incorrect_loss_raw": 1.0954258441925049, "correct_loss_per_char": 0.11958768963813782, "incorrect_loss_per_char": 0.3651419480641683, "correct_loss_per_token": 0.47835075855255127, "incorrect_loss_per_token": 1.0954258441925049, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.47835075855255127, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -0.47835075855255127, "logits_per_char": -0.11958768963813782, "num_chars": 4}, {"sum_logits": -1.0954258441925049, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -1.0954258441925049, "logits_per_char": -0.3651419480641683, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 163, "native_id": 826, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5069265961647034, "incorrect_loss_raw": 1.0996474027633667, "correct_loss_per_char": 0.12673164904117584, "incorrect_loss_per_char": 0.36654913425445557, "correct_loss_per_token": 0.5069265961647034, "incorrect_loss_per_token": 1.0996474027633667, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5069265961647034, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.5069265961647034, "logits_per_char": -0.12673164904117584, "num_chars": 4}, {"sum_logits": -1.0996474027633667, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.0996474027633667, "logits_per_char": -0.36654913425445557, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 164, "native_id": 414, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2952064573764801, "incorrect_loss_raw": 1.603326678276062, "correct_loss_per_char": 0.0984021524588267, "incorrect_loss_per_char": 0.4008316695690155, "correct_loss_per_token": 0.2952064573764801, "incorrect_loss_per_token": 1.603326678276062, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.603326678276062, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -1.603326678276062, "logits_per_char": -0.4008316695690155, "num_chars": 4}, {"sum_logits": -0.2952064573764801, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": true, "logits_per_token": -0.2952064573764801, "logits_per_char": -0.0984021524588267, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 165, "native_id": 1624, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0797630548477173, "incorrect_loss_raw": 0.44824129343032837, "correct_loss_per_char": 0.2699407637119293, "incorrect_loss_per_char": 0.14941376447677612, "correct_loss_per_token": 1.0797630548477173, "incorrect_loss_per_token": 0.44824129343032837, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0797630548477173, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.0797630548477173, "logits_per_char": -0.2699407637119293, "num_chars": 4}, {"sum_logits": -0.44824129343032837, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.44824129343032837, "logits_per_char": -0.14941376447677612, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 166, "native_id": 797, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32934069633483887, "incorrect_loss_raw": 1.3455361127853394, "correct_loss_per_char": 0.10978023211161296, "incorrect_loss_per_char": 0.33638402819633484, "correct_loss_per_token": 0.32934069633483887, "incorrect_loss_per_token": 1.3455361127853394, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3455361127853394, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -1.3455361127853394, "logits_per_char": -0.33638402819633484, "num_chars": 4}, {"sum_logits": -0.32934069633483887, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.32934069633483887, "logits_per_char": -0.10978023211161296, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 167, "native_id": 2887, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.488958477973938, "incorrect_loss_raw": 1.056185245513916, "correct_loss_per_char": 0.1222396194934845, "incorrect_loss_per_char": 0.35206174850463867, "correct_loss_per_token": 0.488958477973938, "incorrect_loss_per_token": 1.056185245513916, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.488958477973938, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.488958477973938, "logits_per_char": -0.1222396194934845, "num_chars": 4}, {"sum_logits": -1.056185245513916, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -1.056185245513916, "logits_per_char": -0.35206174850463867, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 168, "native_id": 1882, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4831584393978119, "incorrect_loss_raw": 1.016178846359253, "correct_loss_per_char": 0.16105281313260397, "incorrect_loss_per_char": 0.25404471158981323, "correct_loss_per_token": 0.4831584393978119, "incorrect_loss_per_token": 1.016178846359253, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.016178846359253, "num_tokens": 1, "num_tokens_all": 874, "is_greedy": false, "logits_per_token": -1.016178846359253, "logits_per_char": -0.25404471158981323, "num_chars": 4}, {"sum_logits": -0.4831584393978119, "num_tokens": 1, "num_tokens_all": 874, "is_greedy": true, "logits_per_token": -0.4831584393978119, "logits_per_char": -0.16105281313260397, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 169, "native_id": 2050, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5577889084815979, "incorrect_loss_raw": 0.9543248414993286, "correct_loss_per_char": 0.18592963616053262, "incorrect_loss_per_char": 0.23858121037483215, "correct_loss_per_token": 0.5577889084815979, "incorrect_loss_per_token": 0.9543248414993286, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9543248414993286, "num_tokens": 1, "num_tokens_all": 1130, "is_greedy": false, "logits_per_token": -0.9543248414993286, "logits_per_char": -0.23858121037483215, "num_chars": 4}, {"sum_logits": -0.5577889084815979, "num_tokens": 1, "num_tokens_all": 1130, "is_greedy": true, "logits_per_token": -0.5577889084815979, "logits_per_char": -0.18592963616053262, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 170, "native_id": 967, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2908713817596436, "incorrect_loss_raw": 0.5699016451835632, "correct_loss_per_char": 0.3227178454399109, "incorrect_loss_per_char": 0.18996721506118774, "correct_loss_per_token": 1.2908713817596436, "incorrect_loss_per_token": 0.5699016451835632, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2908713817596436, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": false, "logits_per_token": -1.2908713817596436, "logits_per_char": -0.3227178454399109, "num_chars": 4}, {"sum_logits": -0.5699016451835632, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": true, "logits_per_token": -0.5699016451835632, "logits_per_char": -0.18996721506118774, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 171, "native_id": 1479, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4991956949234009, "incorrect_loss_raw": 1.0440332889556885, "correct_loss_per_char": 0.12479892373085022, "incorrect_loss_per_char": 0.3480110963185628, "correct_loss_per_token": 0.4991956949234009, "incorrect_loss_per_token": 1.0440332889556885, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4991956949234009, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": true, "logits_per_token": -0.4991956949234009, "logits_per_char": -0.12479892373085022, "num_chars": 4}, {"sum_logits": -1.0440332889556885, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": false, "logits_per_token": -1.0440332889556885, "logits_per_char": -0.3480110963185628, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 172, "native_id": 840, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5192917585372925, "incorrect_loss_raw": 1.1460387706756592, "correct_loss_per_char": 0.17309725284576416, "incorrect_loss_per_char": 0.2865096926689148, "correct_loss_per_token": 0.5192917585372925, "incorrect_loss_per_token": 1.1460387706756592, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1460387706756592, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -1.1460387706756592, "logits_per_char": -0.2865096926689148, "num_chars": 4}, {"sum_logits": -0.5192917585372925, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": true, "logits_per_token": -0.5192917585372925, "logits_per_char": -0.17309725284576416, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 173, "native_id": 3228, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7826120257377625, "incorrect_loss_raw": 0.7161519527435303, "correct_loss_per_char": 0.1956530064344406, "incorrect_loss_per_char": 0.23871731758117676, "correct_loss_per_token": 0.7826120257377625, "incorrect_loss_per_token": 0.7161519527435303, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7826120257377625, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -0.7826120257377625, "logits_per_char": -0.1956530064344406, "num_chars": 4}, {"sum_logits": -0.7161519527435303, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.7161519527435303, "logits_per_char": -0.23871731758117676, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 174, "native_id": 2877, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3777954578399658, "incorrect_loss_raw": 1.250575304031372, "correct_loss_per_char": 0.09444886445999146, "incorrect_loss_per_char": 0.416858434677124, "correct_loss_per_token": 0.3777954578399658, "incorrect_loss_per_token": 1.250575304031372, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3777954578399658, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.3777954578399658, "logits_per_char": -0.09444886445999146, "num_chars": 4}, {"sum_logits": -1.250575304031372, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -1.250575304031372, "logits_per_char": -0.416858434677124, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 175, "native_id": 1725, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6734845638275146, "incorrect_loss_raw": 0.7591229677200317, "correct_loss_per_char": 0.22449485460917154, "incorrect_loss_per_char": 0.18978074193000793, "correct_loss_per_token": 0.6734845638275146, "incorrect_loss_per_token": 0.7591229677200317, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7591229677200317, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": false, "logits_per_token": -0.7591229677200317, "logits_per_char": -0.18978074193000793, "num_chars": 4}, {"sum_logits": -0.6734845638275146, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": true, "logits_per_token": -0.6734845638275146, "logits_per_char": -0.22449485460917154, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 176, "native_id": 715, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0493898391723633, "incorrect_loss_raw": 0.5657996535301208, "correct_loss_per_char": 0.2623474597930908, "incorrect_loss_per_char": 0.18859988451004028, "correct_loss_per_token": 1.0493898391723633, "incorrect_loss_per_token": 0.5657996535301208, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0493898391723633, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.0493898391723633, "logits_per_char": -0.2623474597930908, "num_chars": 4}, {"sum_logits": -0.5657996535301208, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -0.5657996535301208, "logits_per_char": -0.18859988451004028, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 177, "native_id": 2394, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0676555633544922, "incorrect_loss_raw": 0.4691009521484375, "correct_loss_per_char": 0.26691389083862305, "incorrect_loss_per_char": 0.15636698404947916, "correct_loss_per_token": 1.0676555633544922, "incorrect_loss_per_token": 0.4691009521484375, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0676555633544922, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": false, "logits_per_token": -1.0676555633544922, "logits_per_char": -0.26691389083862305, "num_chars": 4}, {"sum_logits": -0.4691009521484375, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": true, "logits_per_token": -0.4691009521484375, "logits_per_char": -0.15636698404947916, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 178, "native_id": 832, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30745983123779297, "incorrect_loss_raw": 1.5901095867156982, "correct_loss_per_char": 0.10248661041259766, "incorrect_loss_per_char": 0.39752739667892456, "correct_loss_per_token": 0.30745983123779297, "incorrect_loss_per_token": 1.5901095867156982, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5901095867156982, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": false, "logits_per_token": -1.5901095867156982, "logits_per_char": -0.39752739667892456, "num_chars": 4}, {"sum_logits": -0.30745983123779297, "num_tokens": 1, "num_tokens_all": 1102, "is_greedy": true, "logits_per_token": -0.30745983123779297, "logits_per_char": -0.10248661041259766, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 179, "native_id": 1236, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0722020864486694, "incorrect_loss_raw": 0.4957042932510376, "correct_loss_per_char": 0.35740069548288983, "incorrect_loss_per_char": 0.1239260733127594, "correct_loss_per_token": 1.0722020864486694, "incorrect_loss_per_token": 0.4957042932510376, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4957042932510376, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.4957042932510376, "logits_per_char": -0.1239260733127594, "num_chars": 4}, {"sum_logits": -1.0722020864486694, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.0722020864486694, "logits_per_char": -0.35740069548288983, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 180, "native_id": 247, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8271899223327637, "incorrect_loss_raw": 0.2159910351037979, "correct_loss_per_char": 0.4567974805831909, "incorrect_loss_per_char": 0.07199701170126598, "correct_loss_per_token": 1.8271899223327637, "incorrect_loss_per_token": 0.2159910351037979, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.8271899223327637, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": false, "logits_per_token": -1.8271899223327637, "logits_per_char": -0.4567974805831909, "num_chars": 4}, {"sum_logits": -0.2159910351037979, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": true, "logits_per_token": -0.2159910351037979, "logits_per_char": -0.07199701170126598, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 181, "native_id": 1443, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5422999262809753, "incorrect_loss_raw": 0.9737991690635681, "correct_loss_per_char": 0.13557498157024384, "incorrect_loss_per_char": 0.3245997230211894, "correct_loss_per_token": 0.5422999262809753, "incorrect_loss_per_token": 0.9737991690635681, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5422999262809753, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.5422999262809753, "logits_per_char": -0.13557498157024384, "num_chars": 4}, {"sum_logits": -0.9737991690635681, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -0.9737991690635681, "logits_per_char": -0.3245997230211894, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 182, "native_id": 2188, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5730212330818176, "incorrect_loss_raw": 0.8989773988723755, "correct_loss_per_char": 0.1432553082704544, "incorrect_loss_per_char": 0.2996591329574585, "correct_loss_per_token": 0.5730212330818176, "incorrect_loss_per_token": 0.8989773988723755, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5730212330818176, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": true, "logits_per_token": -0.5730212330818176, "logits_per_char": -0.1432553082704544, "num_chars": 4}, {"sum_logits": -0.8989773988723755, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": false, "logits_per_token": -0.8989773988723755, "logits_per_char": -0.2996591329574585, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 183, "native_id": 626, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8191468119621277, "incorrect_loss_raw": 0.8189709782600403, "correct_loss_per_char": 0.20478670299053192, "incorrect_loss_per_char": 0.2729903260866801, "correct_loss_per_token": 0.8191468119621277, "incorrect_loss_per_token": 0.8189709782600403, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8191468119621277, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": false, "logits_per_token": -0.8191468119621277, "logits_per_char": -0.20478670299053192, "num_chars": 4}, {"sum_logits": -0.8189709782600403, "num_tokens": 1, "num_tokens_all": 1009, "is_greedy": true, "logits_per_token": -0.8189709782600403, "logits_per_char": -0.2729903260866801, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 184, "native_id": 2046, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.665145754814148, "incorrect_loss_raw": 0.8774400949478149, "correct_loss_per_char": 0.166286438703537, "incorrect_loss_per_char": 0.29248003164927167, "correct_loss_per_token": 0.665145754814148, "incorrect_loss_per_token": 0.8774400949478149, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.665145754814148, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.665145754814148, "logits_per_char": -0.166286438703537, "num_chars": 4}, {"sum_logits": -0.8774400949478149, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -0.8774400949478149, "logits_per_char": -0.29248003164927167, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 185, "native_id": 2248, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4090648591518402, "incorrect_loss_raw": 1.2651385068893433, "correct_loss_per_char": 0.10226621478796005, "incorrect_loss_per_char": 0.42171283562978107, "correct_loss_per_token": 0.4090648591518402, "incorrect_loss_per_token": 1.2651385068893433, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4090648591518402, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.4090648591518402, "logits_per_char": -0.10226621478796005, "num_chars": 4}, {"sum_logits": -1.2651385068893433, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.2651385068893433, "logits_per_char": -0.42171283562978107, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 186, "native_id": 1935, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9893782138824463, "incorrect_loss_raw": 0.49912089109420776, "correct_loss_per_char": 0.24734455347061157, "incorrect_loss_per_char": 0.16637363036473593, "correct_loss_per_token": 0.9893782138824463, "incorrect_loss_per_token": 0.49912089109420776, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9893782138824463, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -0.9893782138824463, "logits_per_char": -0.24734455347061157, "num_chars": 4}, {"sum_logits": -0.49912089109420776, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.49912089109420776, "logits_per_char": -0.16637363036473593, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 187, "native_id": 1367, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6571303009986877, "incorrect_loss_raw": 0.8086563944816589, "correct_loss_per_char": 0.16428257524967194, "incorrect_loss_per_char": 0.2695521314938863, "correct_loss_per_token": 0.6571303009986877, "incorrect_loss_per_token": 0.8086563944816589, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6571303009986877, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": true, "logits_per_token": -0.6571303009986877, "logits_per_char": -0.16428257524967194, "num_chars": 4}, {"sum_logits": -0.8086563944816589, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": false, "logits_per_token": -0.8086563944816589, "logits_per_char": -0.2695521314938863, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 188, "native_id": 568, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4454367458820343, "incorrect_loss_raw": 1.0864324569702148, "correct_loss_per_char": 0.14847891529401144, "incorrect_loss_per_char": 0.2716081142425537, "correct_loss_per_token": 0.4454367458820343, "incorrect_loss_per_token": 1.0864324569702148, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0864324569702148, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": false, "logits_per_token": -1.0864324569702148, "logits_per_char": -0.2716081142425537, "num_chars": 4}, {"sum_logits": -0.4454367458820343, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": true, "logits_per_token": -0.4454367458820343, "logits_per_char": -0.14847891529401144, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 189, "native_id": 536, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8835903406143188, "incorrect_loss_raw": 0.6122431755065918, "correct_loss_per_char": 0.29453011353810626, "incorrect_loss_per_char": 0.15306079387664795, "correct_loss_per_token": 0.8835903406143188, "incorrect_loss_per_token": 0.6122431755065918, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6122431755065918, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.6122431755065918, "logits_per_char": -0.15306079387664795, "num_chars": 4}, {"sum_logits": -0.8835903406143188, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -0.8835903406143188, "logits_per_char": -0.29453011353810626, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 190, "native_id": 196, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9185670614242554, "incorrect_loss_raw": 0.623327374458313, "correct_loss_per_char": 0.30618902047475177, "incorrect_loss_per_char": 0.15583184361457825, "correct_loss_per_token": 0.9185670614242554, "incorrect_loss_per_token": 0.623327374458313, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.623327374458313, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": true, "logits_per_token": -0.623327374458313, "logits_per_char": -0.15583184361457825, "num_chars": 4}, {"sum_logits": -0.9185670614242554, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -0.9185670614242554, "logits_per_char": -0.30618902047475177, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 191, "native_id": 2557, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7639560699462891, "incorrect_loss_raw": 0.7133424282073975, "correct_loss_per_char": 0.19098901748657227, "incorrect_loss_per_char": 0.23778080940246582, "correct_loss_per_token": 0.7639560699462891, "incorrect_loss_per_token": 0.7133424282073975, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7639560699462891, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -0.7639560699462891, "logits_per_char": -0.19098901748657227, "num_chars": 4}, {"sum_logits": -0.7133424282073975, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.7133424282073975, "logits_per_char": -0.23778080940246582, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 192, "native_id": 676, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.30346551537513733, "incorrect_loss_raw": 1.6191644668579102, "correct_loss_per_char": 0.10115517179171245, "incorrect_loss_per_char": 0.40479111671447754, "correct_loss_per_token": 0.30346551537513733, "incorrect_loss_per_token": 1.6191644668579102, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6191644668579102, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": false, "logits_per_token": -1.6191644668579102, "logits_per_char": -0.40479111671447754, "num_chars": 4}, {"sum_logits": -0.30346551537513733, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": true, "logits_per_token": -0.30346551537513733, "logits_per_char": -0.10115517179171245, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 193, "native_id": 593, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5148248076438904, "incorrect_loss_raw": 1.0038721561431885, "correct_loss_per_char": 0.17160826921463013, "incorrect_loss_per_char": 0.2509680390357971, "correct_loss_per_token": 0.5148248076438904, "incorrect_loss_per_token": 1.0038721561431885, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0038721561431885, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.0038721561431885, "logits_per_char": -0.2509680390357971, "num_chars": 4}, {"sum_logits": -0.5148248076438904, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.5148248076438904, "logits_per_char": -0.17160826921463013, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 194, "native_id": 2236, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5358010530471802, "incorrect_loss_raw": 0.9538822174072266, "correct_loss_per_char": 0.17860035101572672, "incorrect_loss_per_char": 0.23847055435180664, "correct_loss_per_token": 0.5358010530471802, "incorrect_loss_per_token": 0.9538822174072266, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9538822174072266, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -0.9538822174072266, "logits_per_char": -0.23847055435180664, "num_chars": 4}, {"sum_logits": -0.5358010530471802, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": true, "logits_per_token": -0.5358010530471802, "logits_per_char": -0.17860035101572672, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 195, "native_id": 285, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7548140287399292, "incorrect_loss_raw": 0.6871293783187866, "correct_loss_per_char": 0.1887035071849823, "incorrect_loss_per_char": 0.2290431261062622, "correct_loss_per_token": 0.7548140287399292, "incorrect_loss_per_token": 0.6871293783187866, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7548140287399292, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": false, "logits_per_token": -0.7548140287399292, "logits_per_char": -0.1887035071849823, "num_chars": 4}, {"sum_logits": -0.6871293783187866, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": true, "logits_per_token": -0.6871293783187866, "logits_per_char": -0.2290431261062622, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 196, "native_id": 2923, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7348135113716125, "incorrect_loss_raw": 0.7095862030982971, "correct_loss_per_char": 0.18370337784290314, "incorrect_loss_per_char": 0.23652873436609903, "correct_loss_per_token": 0.7348135113716125, "incorrect_loss_per_token": 0.7095862030982971, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7348135113716125, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -0.7348135113716125, "logits_per_char": -0.18370337784290314, "num_chars": 4}, {"sum_logits": -0.7095862030982971, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.7095862030982971, "logits_per_char": -0.23652873436609903, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 197, "native_id": 1332, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.423470377922058, "incorrect_loss_raw": 0.3040439784526825, "correct_loss_per_char": 0.3558675944805145, "incorrect_loss_per_char": 0.10134799281756084, "correct_loss_per_token": 1.423470377922058, "incorrect_loss_per_token": 0.3040439784526825, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.423470377922058, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": false, "logits_per_token": -1.423470377922058, "logits_per_char": -0.3558675944805145, "num_chars": 4}, {"sum_logits": -0.3040439784526825, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": true, "logits_per_token": -0.3040439784526825, "logits_per_char": -0.10134799281756084, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 198, "native_id": 700, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4756724834442139, "incorrect_loss_raw": 0.3138745427131653, "correct_loss_per_char": 0.36891812086105347, "incorrect_loss_per_char": 0.1046248475710551, "correct_loss_per_token": 1.4756724834442139, "incorrect_loss_per_token": 0.3138745427131653, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4756724834442139, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -1.4756724834442139, "logits_per_char": -0.36891812086105347, "num_chars": 4}, {"sum_logits": -0.3138745427131653, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.3138745427131653, "logits_per_char": -0.1046248475710551, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 199, "native_id": 6, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6657575368881226, "incorrect_loss_raw": 0.8152071833610535, "correct_loss_per_char": 0.16643938422203064, "incorrect_loss_per_char": 0.2717357277870178, "correct_loss_per_token": 0.6657575368881226, "incorrect_loss_per_token": 0.8152071833610535, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6657575368881226, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.6657575368881226, "logits_per_char": -0.16643938422203064, "num_chars": 4}, {"sum_logits": -0.8152071833610535, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -0.8152071833610535, "logits_per_char": -0.2717357277870178, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 200, "native_id": 2737, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5907094478607178, "incorrect_loss_raw": 0.8426298499107361, "correct_loss_per_char": 0.19690314928690592, "incorrect_loss_per_char": 0.21065746247768402, "correct_loss_per_token": 0.5907094478607178, "incorrect_loss_per_token": 0.8426298499107361, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8426298499107361, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": false, "logits_per_token": -0.8426298499107361, "logits_per_char": -0.21065746247768402, "num_chars": 4}, {"sum_logits": -0.5907094478607178, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": true, "logits_per_token": -0.5907094478607178, "logits_per_char": -0.19690314928690592, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 201, "native_id": 2763, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4577803313732147, "incorrect_loss_raw": 1.0713491439819336, "correct_loss_per_char": 0.11444508284330368, "incorrect_loss_per_char": 0.3571163813273112, "correct_loss_per_token": 0.4577803313732147, "incorrect_loss_per_token": 1.0713491439819336, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4577803313732147, "num_tokens": 1, "num_tokens_all": 1012, "is_greedy": true, "logits_per_token": -0.4577803313732147, "logits_per_char": -0.11444508284330368, "num_chars": 4}, {"sum_logits": -1.0713491439819336, "num_tokens": 1, "num_tokens_all": 1012, "is_greedy": false, "logits_per_token": -1.0713491439819336, "logits_per_char": -0.3571163813273112, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 202, "native_id": 249, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5242563486099243, "incorrect_loss_raw": 0.9739212989807129, "correct_loss_per_char": 0.13106408715248108, "incorrect_loss_per_char": 0.324640432993571, "correct_loss_per_token": 0.5242563486099243, "incorrect_loss_per_token": 0.9739212989807129, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5242563486099243, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.5242563486099243, "logits_per_char": -0.13106408715248108, "num_chars": 4}, {"sum_logits": -0.9739212989807129, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -0.9739212989807129, "logits_per_char": -0.324640432993571, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 203, "native_id": 2614, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.44122785329818726, "incorrect_loss_raw": 1.1397478580474854, "correct_loss_per_char": 0.11030696332454681, "incorrect_loss_per_char": 0.3799159526824951, "correct_loss_per_token": 0.44122785329818726, "incorrect_loss_per_token": 1.1397478580474854, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.44122785329818726, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": true, "logits_per_token": -0.44122785329818726, "logits_per_char": -0.11030696332454681, "num_chars": 4}, {"sum_logits": -1.1397478580474854, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": false, "logits_per_token": -1.1397478580474854, "logits_per_char": -0.3799159526824951, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 204, "native_id": 358, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6372919082641602, "incorrect_loss_raw": 0.8975323438644409, "correct_loss_per_char": 0.15932297706604004, "incorrect_loss_per_char": 0.29917744795481366, "correct_loss_per_token": 0.6372919082641602, "incorrect_loss_per_token": 0.8975323438644409, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6372919082641602, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": true, "logits_per_token": -0.6372919082641602, "logits_per_char": -0.15932297706604004, "num_chars": 4}, {"sum_logits": -0.8975323438644409, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -0.8975323438644409, "logits_per_char": -0.29917744795481366, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 205, "native_id": 607, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1257987022399902, "incorrect_loss_raw": 0.50019770860672, "correct_loss_per_char": 0.28144967555999756, "incorrect_loss_per_char": 0.16673256953557333, "correct_loss_per_token": 1.1257987022399902, "incorrect_loss_per_token": 0.50019770860672, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1257987022399902, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": false, "logits_per_token": -1.1257987022399902, "logits_per_char": -0.28144967555999756, "num_chars": 4}, {"sum_logits": -0.50019770860672, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": true, "logits_per_token": -0.50019770860672, "logits_per_char": -0.16673256953557333, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 206, "native_id": 888, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6006501913070679, "incorrect_loss_raw": 0.8563504219055176, "correct_loss_per_char": 0.15016254782676697, "incorrect_loss_per_char": 0.28545014063517254, "correct_loss_per_token": 0.6006501913070679, "incorrect_loss_per_token": 0.8563504219055176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6006501913070679, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": true, "logits_per_token": -0.6006501913070679, "logits_per_char": -0.15016254782676697, "num_chars": 4}, {"sum_logits": -0.8563504219055176, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -0.8563504219055176, "logits_per_char": -0.28545014063517254, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 207, "native_id": 163, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6842235326766968, "incorrect_loss_raw": 0.24074973165988922, "correct_loss_per_char": 0.4210558831691742, "incorrect_loss_per_char": 0.0802499105532964, "correct_loss_per_token": 1.6842235326766968, "incorrect_loss_per_token": 0.24074973165988922, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6842235326766968, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": false, "logits_per_token": -1.6842235326766968, "logits_per_char": -0.4210558831691742, "num_chars": 4}, {"sum_logits": -0.24074973165988922, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": true, "logits_per_token": -0.24074973165988922, "logits_per_char": -0.0802499105532964, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 208, "native_id": 1772, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2467830181121826, "incorrect_loss_raw": 0.41401857137680054, "correct_loss_per_char": 0.31169575452804565, "incorrect_loss_per_char": 0.1380061904589335, "correct_loss_per_token": 1.2467830181121826, "incorrect_loss_per_token": 0.41401857137680054, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2467830181121826, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.2467830181121826, "logits_per_char": -0.31169575452804565, "num_chars": 4}, {"sum_logits": -0.41401857137680054, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": true, "logits_per_token": -0.41401857137680054, "logits_per_char": -0.1380061904589335, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 209, "native_id": 1603, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4436414837837219, "incorrect_loss_raw": 1.086721420288086, "correct_loss_per_char": 0.14788049459457397, "incorrect_loss_per_char": 0.2716803550720215, "correct_loss_per_token": 0.4436414837837219, "incorrect_loss_per_token": 1.086721420288086, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.086721420288086, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.086721420288086, "logits_per_char": -0.2716803550720215, "num_chars": 4}, {"sum_logits": -0.4436414837837219, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -0.4436414837837219, "logits_per_char": -0.14788049459457397, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 210, "native_id": 3017, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7660324573516846, "incorrect_loss_raw": 0.6800025105476379, "correct_loss_per_char": 0.2553441524505615, "incorrect_loss_per_char": 0.17000062763690948, "correct_loss_per_token": 0.7660324573516846, "incorrect_loss_per_token": 0.6800025105476379, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6800025105476379, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": true, "logits_per_token": -0.6800025105476379, "logits_per_char": -0.17000062763690948, "num_chars": 4}, {"sum_logits": -0.7660324573516846, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": false, "logits_per_token": -0.7660324573516846, "logits_per_char": -0.2553441524505615, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 211, "native_id": 1328, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5885670781135559, "incorrect_loss_raw": 0.9028525352478027, "correct_loss_per_char": 0.19618902603785196, "incorrect_loss_per_char": 0.22571313381195068, "correct_loss_per_token": 0.5885670781135559, "incorrect_loss_per_token": 0.9028525352478027, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9028525352478027, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -0.9028525352478027, "logits_per_char": -0.22571313381195068, "num_chars": 4}, {"sum_logits": -0.5885670781135559, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.5885670781135559, "logits_per_char": -0.19618902603785196, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 212, "native_id": 848, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5277159214019775, "incorrect_loss_raw": 1.008960485458374, "correct_loss_per_char": 0.17590530713399252, "incorrect_loss_per_char": 0.2522401213645935, "correct_loss_per_token": 0.5277159214019775, "incorrect_loss_per_token": 1.008960485458374, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.008960485458374, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": false, "logits_per_token": -1.008960485458374, "logits_per_char": -0.2522401213645935, "num_chars": 4}, {"sum_logits": -0.5277159214019775, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": true, "logits_per_token": -0.5277159214019775, "logits_per_char": -0.17590530713399252, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 213, "native_id": 3068, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4715256690979004, "incorrect_loss_raw": 1.0309888124465942, "correct_loss_per_char": 0.15717522303263345, "incorrect_loss_per_char": 0.25774720311164856, "correct_loss_per_token": 0.4715256690979004, "incorrect_loss_per_token": 1.0309888124465942, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0309888124465942, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": false, "logits_per_token": -1.0309888124465942, "logits_per_char": -0.25774720311164856, "num_chars": 4}, {"sum_logits": -0.4715256690979004, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": true, "logits_per_token": -0.4715256690979004, "logits_per_char": -0.15717522303263345, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 214, "native_id": 1561, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 2.8858489990234375, "incorrect_loss_raw": 2.1817779541015625, "correct_loss_per_char": 0.7214622497558594, "incorrect_loss_per_char": 0.7272593180338541, "correct_loss_per_token": 2.8858489990234375, "incorrect_loss_per_token": 2.1817779541015625, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -2.8858489990234375, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -2.8858489990234375, "logits_per_char": -0.7214622497558594, "num_chars": 4}, {"sum_logits": -2.1817779541015625, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -2.1817779541015625, "logits_per_char": -0.7272593180338541, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 215, "native_id": 1147, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9013696908950806, "incorrect_loss_raw": 0.5771274566650391, "correct_loss_per_char": 0.30045656363169354, "incorrect_loss_per_char": 0.14428186416625977, "correct_loss_per_token": 0.9013696908950806, "incorrect_loss_per_token": 0.5771274566650391, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5771274566650391, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": true, "logits_per_token": -0.5771274566650391, "logits_per_char": -0.14428186416625977, "num_chars": 4}, {"sum_logits": -0.9013696908950806, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": false, "logits_per_token": -0.9013696908950806, "logits_per_char": -0.30045656363169354, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 216, "native_id": 2201, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.46707290410995483, "incorrect_loss_raw": 1.0381152629852295, "correct_loss_per_char": 0.11676822602748871, "incorrect_loss_per_char": 0.3460384209950765, "correct_loss_per_token": 0.46707290410995483, "incorrect_loss_per_token": 1.0381152629852295, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.46707290410995483, "num_tokens": 1, "num_tokens_all": 850, "is_greedy": true, "logits_per_token": -0.46707290410995483, "logits_per_char": -0.11676822602748871, "num_chars": 4}, {"sum_logits": -1.0381152629852295, "num_tokens": 1, "num_tokens_all": 850, "is_greedy": false, "logits_per_token": -1.0381152629852295, "logits_per_char": -0.3460384209950765, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 217, "native_id": 2588, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8690687417984009, "incorrect_loss_raw": 0.645725667476654, "correct_loss_per_char": 0.21726718544960022, "incorrect_loss_per_char": 0.21524188915888467, "correct_loss_per_token": 0.8690687417984009, "incorrect_loss_per_token": 0.645725667476654, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8690687417984009, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": false, "logits_per_token": -0.8690687417984009, "logits_per_char": -0.21726718544960022, "num_chars": 4}, {"sum_logits": -0.645725667476654, "num_tokens": 1, "num_tokens_all": 1057, "is_greedy": true, "logits_per_token": -0.645725667476654, "logits_per_char": -0.21524188915888467, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 218, "native_id": 1247, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5428634285926819, "incorrect_loss_raw": 1.0016772747039795, "correct_loss_per_char": 0.13571585714817047, "incorrect_loss_per_char": 0.3338924249013265, "correct_loss_per_token": 0.5428634285926819, "incorrect_loss_per_token": 1.0016772747039795, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5428634285926819, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.5428634285926819, "logits_per_char": -0.13571585714817047, "num_chars": 4}, {"sum_logits": -1.0016772747039795, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.0016772747039795, "logits_per_char": -0.3338924249013265, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 219, "native_id": 1728, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7056671380996704, "incorrect_loss_raw": 0.8335785865783691, "correct_loss_per_char": 0.1764167845249176, "incorrect_loss_per_char": 0.27785952885945636, "correct_loss_per_token": 0.7056671380996704, "incorrect_loss_per_token": 0.8335785865783691, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7056671380996704, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": true, "logits_per_token": -0.7056671380996704, "logits_per_char": -0.1764167845249176, "num_chars": 4}, {"sum_logits": -0.8335785865783691, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": false, "logits_per_token": -0.8335785865783691, "logits_per_char": -0.27785952885945636, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 220, "native_id": 1306, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.39836785197257996, "incorrect_loss_raw": 1.2958589792251587, "correct_loss_per_char": 0.09959196299314499, "incorrect_loss_per_char": 0.4319529930750529, "correct_loss_per_token": 0.39836785197257996, "incorrect_loss_per_token": 1.2958589792251587, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.39836785197257996, "num_tokens": 1, "num_tokens_all": 835, "is_greedy": true, "logits_per_token": -0.39836785197257996, "logits_per_char": -0.09959196299314499, "num_chars": 4}, {"sum_logits": -1.2958589792251587, "num_tokens": 1, "num_tokens_all": 835, "is_greedy": false, "logits_per_token": -1.2958589792251587, "logits_per_char": -0.4319529930750529, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 221, "native_id": 2806, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6391911506652832, "incorrect_loss_raw": 0.89666348695755, "correct_loss_per_char": 0.1597977876663208, "incorrect_loss_per_char": 0.29888782898585003, "correct_loss_per_token": 0.6391911506652832, "incorrect_loss_per_token": 0.89666348695755, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6391911506652832, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.6391911506652832, "logits_per_char": -0.1597977876663208, "num_chars": 4}, {"sum_logits": -0.89666348695755, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -0.89666348695755, "logits_per_char": -0.29888782898585003, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 222, "native_id": 2366, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8196989893913269, "incorrect_loss_raw": 0.6502754092216492, "correct_loss_per_char": 0.20492474734783173, "incorrect_loss_per_char": 0.21675846974054971, "correct_loss_per_token": 0.8196989893913269, "incorrect_loss_per_token": 0.6502754092216492, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8196989893913269, "num_tokens": 1, "num_tokens_all": 1207, "is_greedy": false, "logits_per_token": -0.8196989893913269, "logits_per_char": -0.20492474734783173, "num_chars": 4}, {"sum_logits": -0.6502754092216492, "num_tokens": 1, "num_tokens_all": 1207, "is_greedy": true, "logits_per_token": -0.6502754092216492, "logits_per_char": -0.21675846974054971, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 223, "native_id": 620, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8725107312202454, "incorrect_loss_raw": 0.6478210091590881, "correct_loss_per_char": 0.2908369104067485, "incorrect_loss_per_char": 0.16195525228977203, "correct_loss_per_token": 0.8725107312202454, "incorrect_loss_per_token": 0.6478210091590881, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6478210091590881, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.6478210091590881, "logits_per_char": -0.16195525228977203, "num_chars": 4}, {"sum_logits": -0.8725107312202454, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -0.8725107312202454, "logits_per_char": -0.2908369104067485, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 224, "native_id": 2181, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4548918604850769, "incorrect_loss_raw": 1.09053635597229, "correct_loss_per_char": 0.11372296512126923, "incorrect_loss_per_char": 0.36351211865743, "correct_loss_per_token": 0.4548918604850769, "incorrect_loss_per_token": 1.09053635597229, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4548918604850769, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": true, "logits_per_token": -0.4548918604850769, "logits_per_char": -0.11372296512126923, "num_chars": 4}, {"sum_logits": -1.09053635597229, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": false, "logits_per_token": -1.09053635597229, "logits_per_char": -0.36351211865743, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 225, "native_id": 380, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6767776012420654, "incorrect_loss_raw": 0.34565654397010803, "correct_loss_per_char": 0.41919440031051636, "incorrect_loss_per_char": 0.11521884799003601, "correct_loss_per_token": 1.6767776012420654, "incorrect_loss_per_token": 0.34565654397010803, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6767776012420654, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.6767776012420654, "logits_per_char": -0.41919440031051636, "num_chars": 4}, {"sum_logits": -0.34565654397010803, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": true, "logits_per_token": -0.34565654397010803, "logits_per_char": -0.11521884799003601, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 226, "native_id": 1066, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6087491512298584, "incorrect_loss_raw": 0.8450488448143005, "correct_loss_per_char": 0.1521872878074646, "incorrect_loss_per_char": 0.28168294827143353, "correct_loss_per_token": 0.6087491512298584, "incorrect_loss_per_token": 0.8450488448143005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6087491512298584, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.6087491512298584, "logits_per_char": -0.1521872878074646, "num_chars": 4}, {"sum_logits": -0.8450488448143005, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -0.8450488448143005, "logits_per_char": -0.28168294827143353, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 227, "native_id": 1138, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8858996629714966, "incorrect_loss_raw": 0.6151532530784607, "correct_loss_per_char": 0.22147491574287415, "incorrect_loss_per_char": 0.2050510843594869, "correct_loss_per_token": 0.8858996629714966, "incorrect_loss_per_token": 0.6151532530784607, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8858996629714966, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": false, "logits_per_token": -0.8858996629714966, "logits_per_char": -0.22147491574287415, "num_chars": 4}, {"sum_logits": -0.6151532530784607, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": true, "logits_per_token": -0.6151532530784607, "logits_per_char": -0.2050510843594869, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 228, "native_id": 1680, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7256884574890137, "incorrect_loss_raw": 0.7217400074005127, "correct_loss_per_char": 0.18142211437225342, "incorrect_loss_per_char": 0.24058000246683756, "correct_loss_per_token": 0.7256884574890137, "incorrect_loss_per_token": 0.7217400074005127, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7256884574890137, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": false, "logits_per_token": -0.7256884574890137, "logits_per_char": -0.18142211437225342, "num_chars": 4}, {"sum_logits": -0.7217400074005127, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": true, "logits_per_token": -0.7217400074005127, "logits_per_char": -0.24058000246683756, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 229, "native_id": 1638, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.243842214345932, "incorrect_loss_raw": 1.6210585832595825, "correct_loss_per_char": 0.08128073811531067, "incorrect_loss_per_char": 0.40526464581489563, "correct_loss_per_token": 0.243842214345932, "incorrect_loss_per_token": 1.6210585832595825, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6210585832595825, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -1.6210585832595825, "logits_per_char": -0.40526464581489563, "num_chars": 4}, {"sum_logits": -0.243842214345932, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -0.243842214345932, "logits_per_char": -0.08128073811531067, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 230, "native_id": 2314, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.706836998462677, "incorrect_loss_raw": 0.7660086750984192, "correct_loss_per_char": 0.17670924961566925, "incorrect_loss_per_char": 0.2553362250328064, "correct_loss_per_token": 0.706836998462677, "incorrect_loss_per_token": 0.7660086750984192, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.706836998462677, "num_tokens": 1, "num_tokens_all": 841, "is_greedy": true, "logits_per_token": -0.706836998462677, "logits_per_char": -0.17670924961566925, "num_chars": 4}, {"sum_logits": -0.7660086750984192, "num_tokens": 1, "num_tokens_all": 841, "is_greedy": false, "logits_per_token": -0.7660086750984192, "logits_per_char": -0.2553362250328064, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 231, "native_id": 3180, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6087499260902405, "incorrect_loss_raw": 0.8581721782684326, "correct_loss_per_char": 0.20291664203008017, "incorrect_loss_per_char": 0.21454304456710815, "correct_loss_per_token": 0.6087499260902405, "incorrect_loss_per_token": 0.8581721782684326, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8581721782684326, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": false, "logits_per_token": -0.8581721782684326, "logits_per_char": -0.21454304456710815, "num_chars": 4}, {"sum_logits": -0.6087499260902405, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": true, "logits_per_token": -0.6087499260902405, "logits_per_char": -0.20291664203008017, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 232, "native_id": 2153, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.278041124343872, "incorrect_loss_raw": 0.39868611097335815, "correct_loss_per_char": 0.319510281085968, "incorrect_loss_per_char": 0.13289537032445273, "correct_loss_per_token": 1.278041124343872, "incorrect_loss_per_token": 0.39868611097335815, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.278041124343872, "num_tokens": 1, "num_tokens_all": 1202, "is_greedy": false, "logits_per_token": -1.278041124343872, "logits_per_char": -0.319510281085968, "num_chars": 4}, {"sum_logits": -0.39868611097335815, "num_tokens": 1, "num_tokens_all": 1202, "is_greedy": true, "logits_per_token": -0.39868611097335815, "logits_per_char": -0.13289537032445273, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 233, "native_id": 465, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6890174150466919, "incorrect_loss_raw": 0.7461618185043335, "correct_loss_per_char": 0.17225435376167297, "incorrect_loss_per_char": 0.24872060616811117, "correct_loss_per_token": 0.6890174150466919, "incorrect_loss_per_token": 0.7461618185043335, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6890174150466919, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.6890174150466919, "logits_per_char": -0.17225435376167297, "num_chars": 4}, {"sum_logits": -0.7461618185043335, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -0.7461618185043335, "logits_per_char": -0.24872060616811117, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 234, "native_id": 2873, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6785613298416138, "incorrect_loss_raw": 0.7992160320281982, "correct_loss_per_char": 0.16964033246040344, "incorrect_loss_per_char": 0.2664053440093994, "correct_loss_per_token": 0.6785613298416138, "incorrect_loss_per_token": 0.7992160320281982, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6785613298416138, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": true, "logits_per_token": -0.6785613298416138, "logits_per_char": -0.16964033246040344, "num_chars": 4}, {"sum_logits": -0.7992160320281982, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": false, "logits_per_token": -0.7992160320281982, "logits_per_char": -0.2664053440093994, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 235, "native_id": 1537, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3357250988483429, "incorrect_loss_raw": 1.3160624504089355, "correct_loss_per_char": 0.11190836628278096, "incorrect_loss_per_char": 0.3290156126022339, "correct_loss_per_token": 0.3357250988483429, "incorrect_loss_per_token": 1.3160624504089355, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3160624504089355, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": false, "logits_per_token": -1.3160624504089355, "logits_per_char": -0.3290156126022339, "num_chars": 4}, {"sum_logits": -0.3357250988483429, "num_tokens": 1, "num_tokens_all": 944, "is_greedy": true, "logits_per_token": -0.3357250988483429, "logits_per_char": -0.11190836628278096, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 236, "native_id": 1123, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.0309460163116455, "incorrect_loss_raw": 0.1827072650194168, "correct_loss_per_char": 0.5077365040779114, "incorrect_loss_per_char": 0.06090242167313894, "correct_loss_per_token": 2.0309460163116455, "incorrect_loss_per_token": 0.1827072650194168, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -2.0309460163116455, "num_tokens": 1, "num_tokens_all": 853, "is_greedy": false, "logits_per_token": -2.0309460163116455, "logits_per_char": -0.5077365040779114, "num_chars": 4}, {"sum_logits": -0.1827072650194168, "num_tokens": 1, "num_tokens_all": 853, "is_greedy": true, "logits_per_token": -0.1827072650194168, "logits_per_char": -0.06090242167313894, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 237, "native_id": 876, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5514367818832397, "incorrect_loss_raw": 0.9144743084907532, "correct_loss_per_char": 0.18381226062774658, "incorrect_loss_per_char": 0.2286185771226883, "correct_loss_per_token": 0.5514367818832397, "incorrect_loss_per_token": 0.9144743084907532, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9144743084907532, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": false, "logits_per_token": -0.9144743084907532, "logits_per_char": -0.2286185771226883, "num_chars": 4}, {"sum_logits": -0.5514367818832397, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": true, "logits_per_token": -0.5514367818832397, "logits_per_char": -0.18381226062774658, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 238, "native_id": 1218, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4168090224266052, "incorrect_loss_raw": 1.2704119682312012, "correct_loss_per_char": 0.1389363408088684, "incorrect_loss_per_char": 0.3176029920578003, "correct_loss_per_token": 0.4168090224266052, "incorrect_loss_per_token": 1.2704119682312012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2704119682312012, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.2704119682312012, "logits_per_char": -0.3176029920578003, "num_chars": 4}, {"sum_logits": -0.4168090224266052, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": true, "logits_per_token": -0.4168090224266052, "logits_per_char": -0.1389363408088684, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 239, "native_id": 2933, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7274548411369324, "incorrect_loss_raw": 0.8159781098365784, "correct_loss_per_char": 0.1818637102842331, "incorrect_loss_per_char": 0.27199270327885944, "correct_loss_per_token": 0.7274548411369324, "incorrect_loss_per_token": 0.8159781098365784, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7274548411369324, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": true, "logits_per_token": -0.7274548411369324, "logits_per_char": -0.1818637102842331, "num_chars": 4}, {"sum_logits": -0.8159781098365784, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": false, "logits_per_token": -0.8159781098365784, "logits_per_char": -0.27199270327885944, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 240, "native_id": 3198, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0787489414215088, "incorrect_loss_raw": 0.48041826486587524, "correct_loss_per_char": 0.2696872353553772, "incorrect_loss_per_char": 0.1601394216219584, "correct_loss_per_token": 1.0787489414215088, "incorrect_loss_per_token": 0.48041826486587524, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0787489414215088, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": false, "logits_per_token": -1.0787489414215088, "logits_per_char": -0.2696872353553772, "num_chars": 4}, {"sum_logits": -0.48041826486587524, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": true, "logits_per_token": -0.48041826486587524, "logits_per_char": -0.1601394216219584, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 241, "native_id": 1631, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.164431095123291, "incorrect_loss_raw": 0.3984268307685852, "correct_loss_per_char": 0.29110777378082275, "incorrect_loss_per_char": 0.1328089435895284, "correct_loss_per_token": 1.164431095123291, "incorrect_loss_per_token": 0.3984268307685852, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.164431095123291, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": false, "logits_per_token": -1.164431095123291, "logits_per_char": -0.29110777378082275, "num_chars": 4}, {"sum_logits": -0.3984268307685852, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": true, "logits_per_token": -0.3984268307685852, "logits_per_char": -0.1328089435895284, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 242, "native_id": 215, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7535731196403503, "incorrect_loss_raw": 0.6844152212142944, "correct_loss_per_char": 0.18839327991008759, "incorrect_loss_per_char": 0.2281384070714315, "correct_loss_per_token": 0.7535731196403503, "incorrect_loss_per_token": 0.6844152212142944, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7535731196403503, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": false, "logits_per_token": -0.7535731196403503, "logits_per_char": -0.18839327991008759, "num_chars": 4}, {"sum_logits": -0.6844152212142944, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": true, "logits_per_token": -0.6844152212142944, "logits_per_char": -0.2281384070714315, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 243, "native_id": 3167, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5600565075874329, "incorrect_loss_raw": 1.012245535850525, "correct_loss_per_char": 0.14001412689685822, "incorrect_loss_per_char": 0.3374151786168416, "correct_loss_per_token": 0.5600565075874329, "incorrect_loss_per_token": 1.012245535850525, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5600565075874329, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -0.5600565075874329, "logits_per_char": -0.14001412689685822, "num_chars": 4}, {"sum_logits": -1.012245535850525, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.012245535850525, "logits_per_char": -0.3374151786168416, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 244, "native_id": 1804, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1669588088989258, "incorrect_loss_raw": 0.4137788414955139, "correct_loss_per_char": 0.3889862696329753, "incorrect_loss_per_char": 0.10344471037387848, "correct_loss_per_token": 1.1669588088989258, "incorrect_loss_per_token": 0.4137788414955139, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4137788414955139, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.4137788414955139, "logits_per_char": -0.10344471037387848, "num_chars": 4}, {"sum_logits": -1.1669588088989258, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.1669588088989258, "logits_per_char": -0.3889862696329753, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 245, "native_id": 952, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5385356545448303, "incorrect_loss_raw": 0.9805735349655151, "correct_loss_per_char": 0.17951188484827676, "incorrect_loss_per_char": 0.24514338374137878, "correct_loss_per_token": 0.5385356545448303, "incorrect_loss_per_token": 0.9805735349655151, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9805735349655151, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": false, "logits_per_token": -0.9805735349655151, "logits_per_char": -0.24514338374137878, "num_chars": 4}, {"sum_logits": -0.5385356545448303, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": true, "logits_per_token": -0.5385356545448303, "logits_per_char": -0.17951188484827676, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 246, "native_id": 2009, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7242030501365662, "incorrect_loss_raw": 0.7421116828918457, "correct_loss_per_char": 0.18105076253414154, "incorrect_loss_per_char": 0.24737056096394858, "correct_loss_per_token": 0.7242030501365662, "incorrect_loss_per_token": 0.7421116828918457, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7242030501365662, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": true, "logits_per_token": -0.7242030501365662, "logits_per_char": -0.18105076253414154, "num_chars": 4}, {"sum_logits": -0.7421116828918457, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": false, "logits_per_token": -0.7421116828918457, "logits_per_char": -0.24737056096394858, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 247, "native_id": 677, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6679973602294922, "incorrect_loss_raw": 0.8920076489448547, "correct_loss_per_char": 0.22266578674316406, "incorrect_loss_per_char": 0.22300191223621368, "correct_loss_per_token": 0.6679973602294922, "incorrect_loss_per_token": 0.8920076489448547, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8920076489448547, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -0.8920076489448547, "logits_per_char": -0.22300191223621368, "num_chars": 4}, {"sum_logits": -0.6679973602294922, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.6679973602294922, "logits_per_char": -0.22266578674316406, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 248, "native_id": 1558, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6164164543151855, "incorrect_loss_raw": 0.8352956771850586, "correct_loss_per_char": 0.20547215143839517, "incorrect_loss_per_char": 0.20882391929626465, "correct_loss_per_token": 0.6164164543151855, "incorrect_loss_per_token": 0.8352956771850586, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8352956771850586, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -0.8352956771850586, "logits_per_char": -0.20882391929626465, "num_chars": 4}, {"sum_logits": -0.6164164543151855, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.6164164543151855, "logits_per_char": -0.20547215143839517, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 249, "native_id": 1261, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.521812379360199, "incorrect_loss_raw": 0.9589327573776245, "correct_loss_per_char": 0.13045309484004974, "incorrect_loss_per_char": 0.3196442524592082, "correct_loss_per_token": 0.521812379360199, "incorrect_loss_per_token": 0.9589327573776245, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.521812379360199, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": true, "logits_per_token": -0.521812379360199, "logits_per_char": -0.13045309484004974, "num_chars": 4}, {"sum_logits": -0.9589327573776245, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -0.9589327573776245, "logits_per_char": -0.3196442524592082, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 250, "native_id": 10, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6173228025436401, "incorrect_loss_raw": 0.9282541275024414, "correct_loss_per_char": 0.15433070063591003, "incorrect_loss_per_char": 0.3094180425008138, "correct_loss_per_token": 0.6173228025436401, "incorrect_loss_per_token": 0.9282541275024414, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6173228025436401, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -0.6173228025436401, "logits_per_char": -0.15433070063591003, "num_chars": 4}, {"sum_logits": -0.9282541275024414, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -0.9282541275024414, "logits_per_char": -0.3094180425008138, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 251, "native_id": 300, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.31091538071632385, "incorrect_loss_raw": 1.3874375820159912, "correct_loss_per_char": 0.10363846023877461, "incorrect_loss_per_char": 0.3468593955039978, "correct_loss_per_token": 0.31091538071632385, "incorrect_loss_per_token": 1.3874375820159912, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3874375820159912, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": false, "logits_per_token": -1.3874375820159912, "logits_per_char": -0.3468593955039978, "num_chars": 4}, {"sum_logits": -0.31091538071632385, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": true, "logits_per_token": -0.31091538071632385, "logits_per_char": -0.10363846023877461, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 252, "native_id": 1966, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0735814571380615, "incorrect_loss_raw": 0.459614098072052, "correct_loss_per_char": 0.2683953642845154, "incorrect_loss_per_char": 0.15320469935735068, "correct_loss_per_token": 1.0735814571380615, "incorrect_loss_per_token": 0.459614098072052, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0735814571380615, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.0735814571380615, "logits_per_char": -0.2683953642845154, "num_chars": 4}, {"sum_logits": -0.459614098072052, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.459614098072052, "logits_per_char": -0.15320469935735068, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 253, "native_id": 1617, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.39646631479263306, "incorrect_loss_raw": 1.252455472946167, "correct_loss_per_char": 0.13215543826421103, "incorrect_loss_per_char": 0.31311386823654175, "correct_loss_per_token": 0.39646631479263306, "incorrect_loss_per_token": 1.252455472946167, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.252455472946167, "num_tokens": 1, "num_tokens_all": 838, "is_greedy": false, "logits_per_token": -1.252455472946167, "logits_per_char": -0.31311386823654175, "num_chars": 4}, {"sum_logits": -0.39646631479263306, "num_tokens": 1, "num_tokens_all": 838, "is_greedy": true, "logits_per_token": -0.39646631479263306, "logits_per_char": -0.13215543826421103, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 254, "native_id": 1222, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0795776844024658, "incorrect_loss_raw": 0.4639199376106262, "correct_loss_per_char": 0.26989442110061646, "incorrect_loss_per_char": 0.15463997920354208, "correct_loss_per_token": 1.0795776844024658, "incorrect_loss_per_token": 0.4639199376106262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0795776844024658, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -1.0795776844024658, "logits_per_char": -0.26989442110061646, "num_chars": 4}, {"sum_logits": -0.4639199376106262, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -0.4639199376106262, "logits_per_char": -0.15463997920354208, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 255, "native_id": 1756, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2488446235656738, "incorrect_loss_raw": 0.3792606294155121, "correct_loss_per_char": 0.31221115589141846, "incorrect_loss_per_char": 0.12642020980517069, "correct_loss_per_token": 1.2488446235656738, "incorrect_loss_per_token": 0.3792606294155121, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2488446235656738, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.2488446235656738, "logits_per_char": -0.31221115589141846, "num_chars": 4}, {"sum_logits": -0.3792606294155121, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": true, "logits_per_token": -0.3792606294155121, "logits_per_char": -0.12642020980517069, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 256, "native_id": 2796, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8515312671661377, "incorrect_loss_raw": 0.6117013096809387, "correct_loss_per_char": 0.21288281679153442, "incorrect_loss_per_char": 0.2039004365603129, "correct_loss_per_token": 0.8515312671661377, "incorrect_loss_per_token": 0.6117013096809387, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8515312671661377, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -0.8515312671661377, "logits_per_char": -0.21288281679153442, "num_chars": 4}, {"sum_logits": -0.6117013096809387, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.6117013096809387, "logits_per_char": -0.2039004365603129, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 257, "native_id": 1964, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5988556742668152, "incorrect_loss_raw": 0.8962539434432983, "correct_loss_per_char": 0.1497139185667038, "incorrect_loss_per_char": 0.29875131448109943, "correct_loss_per_token": 0.5988556742668152, "incorrect_loss_per_token": 0.8962539434432983, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5988556742668152, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": true, "logits_per_token": -0.5988556742668152, "logits_per_char": -0.1497139185667038, "num_chars": 4}, {"sum_logits": -0.8962539434432983, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": false, "logits_per_token": -0.8962539434432983, "logits_per_char": -0.29875131448109943, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 258, "native_id": 3150, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7352309823036194, "incorrect_loss_raw": 0.7014316916465759, "correct_loss_per_char": 0.18380774557590485, "incorrect_loss_per_char": 0.23381056388219199, "correct_loss_per_token": 0.7352309823036194, "incorrect_loss_per_token": 0.7014316916465759, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7352309823036194, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": false, "logits_per_token": -0.7352309823036194, "logits_per_char": -0.18380774557590485, "num_chars": 4}, {"sum_logits": -0.7014316916465759, "num_tokens": 1, "num_tokens_all": 993, "is_greedy": true, "logits_per_token": -0.7014316916465759, "logits_per_char": -0.23381056388219199, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 259, "native_id": 1640, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.283718466758728, "incorrect_loss_raw": 0.36566171050071716, "correct_loss_per_char": 0.320929616689682, "incorrect_loss_per_char": 0.12188723683357239, "correct_loss_per_token": 1.283718466758728, "incorrect_loss_per_token": 0.36566171050071716, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.283718466758728, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.283718466758728, "logits_per_char": -0.320929616689682, "num_chars": 4}, {"sum_logits": -0.36566171050071716, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.36566171050071716, "logits_per_char": -0.12188723683357239, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 260, "native_id": 2573, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2728915512561798, "incorrect_loss_raw": 1.5466358661651611, "correct_loss_per_char": 0.06822288781404495, "incorrect_loss_per_char": 0.5155452887217203, "correct_loss_per_token": 0.2728915512561798, "incorrect_loss_per_token": 1.5466358661651611, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2728915512561798, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": true, "logits_per_token": -0.2728915512561798, "logits_per_char": -0.06822288781404495, "num_chars": 4}, {"sum_logits": -1.5466358661651611, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.5466358661651611, "logits_per_char": -0.5155452887217203, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 261, "native_id": 1957, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4787902235984802, "incorrect_loss_raw": 1.0478671789169312, "correct_loss_per_char": 0.1595967411994934, "incorrect_loss_per_char": 0.2619667947292328, "correct_loss_per_token": 0.4787902235984802, "incorrect_loss_per_token": 1.0478671789169312, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0478671789169312, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": false, "logits_per_token": -1.0478671789169312, "logits_per_char": -0.2619667947292328, "num_chars": 4}, {"sum_logits": -0.4787902235984802, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": true, "logits_per_token": -0.4787902235984802, "logits_per_char": -0.1595967411994934, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 262, "native_id": 3134, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7977225184440613, "incorrect_loss_raw": 0.7010698318481445, "correct_loss_per_char": 0.19943062961101532, "incorrect_loss_per_char": 0.2336899439493815, "correct_loss_per_token": 0.7977225184440613, "incorrect_loss_per_token": 0.7010698318481445, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7977225184440613, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": false, "logits_per_token": -0.7977225184440613, "logits_per_char": -0.19943062961101532, "num_chars": 4}, {"sum_logits": -0.7010698318481445, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": true, "logits_per_token": -0.7010698318481445, "logits_per_char": -0.2336899439493815, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 263, "native_id": 1152, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7424911260604858, "incorrect_loss_raw": 0.21234382688999176, "correct_loss_per_char": 0.43562278151512146, "incorrect_loss_per_char": 0.07078127562999725, "correct_loss_per_token": 1.7424911260604858, "incorrect_loss_per_token": 0.21234382688999176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7424911260604858, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": false, "logits_per_token": -1.7424911260604858, "logits_per_char": -0.43562278151512146, "num_chars": 4}, {"sum_logits": -0.21234382688999176, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": true, "logits_per_token": -0.21234382688999176, "logits_per_char": -0.07078127562999725, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 264, "native_id": 2422, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4751889109611511, "incorrect_loss_raw": 1.084197998046875, "correct_loss_per_char": 0.11879722774028778, "incorrect_loss_per_char": 0.3613993326822917, "correct_loss_per_token": 0.4751889109611511, "incorrect_loss_per_token": 1.084197998046875, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4751889109611511, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": true, "logits_per_token": -0.4751889109611511, "logits_per_char": -0.11879722774028778, "num_chars": 4}, {"sum_logits": -1.084197998046875, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": false, "logits_per_token": -1.084197998046875, "logits_per_char": -0.3613993326822917, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 265, "native_id": 1513, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8728872537612915, "incorrect_loss_raw": 0.5698087811470032, "correct_loss_per_char": 0.21822181344032288, "incorrect_loss_per_char": 0.18993626038233438, "correct_loss_per_token": 0.8728872537612915, "incorrect_loss_per_token": 0.5698087811470032, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8728872537612915, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -0.8728872537612915, "logits_per_char": -0.21822181344032288, "num_chars": 4}, {"sum_logits": -0.5698087811470032, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.5698087811470032, "logits_per_char": -0.18993626038233438, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 266, "native_id": 2683, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.49337685108184814, "incorrect_loss_raw": 1.1150407791137695, "correct_loss_per_char": 0.12334421277046204, "incorrect_loss_per_char": 0.37168025970458984, "correct_loss_per_token": 0.49337685108184814, "incorrect_loss_per_token": 1.1150407791137695, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.49337685108184814, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": true, "logits_per_token": -0.49337685108184814, "logits_per_char": -0.12334421277046204, "num_chars": 4}, {"sum_logits": -1.1150407791137695, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": false, "logits_per_token": -1.1150407791137695, "logits_per_char": -0.37168025970458984, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 267, "native_id": 2459, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8965144157409668, "incorrect_loss_raw": 0.5690922737121582, "correct_loss_per_char": 0.2241286039352417, "incorrect_loss_per_char": 0.1896974245707194, "correct_loss_per_token": 0.8965144157409668, "incorrect_loss_per_token": 0.5690922737121582, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8965144157409668, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -0.8965144157409668, "logits_per_char": -0.2241286039352417, "num_chars": 4}, {"sum_logits": -0.5690922737121582, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.5690922737121582, "logits_per_char": -0.1896974245707194, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 268, "native_id": 1419, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3005430698394775, "incorrect_loss_raw": 0.3428446352481842, "correct_loss_per_char": 0.3251357674598694, "incorrect_loss_per_char": 0.11428154508272807, "correct_loss_per_token": 1.3005430698394775, "incorrect_loss_per_token": 0.3428446352481842, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3005430698394775, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -1.3005430698394775, "logits_per_char": -0.3251357674598694, "num_chars": 4}, {"sum_logits": -0.3428446352481842, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.3428446352481842, "logits_per_char": -0.11428154508272807, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 269, "native_id": 844, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7187685966491699, "incorrect_loss_raw": 0.7663872241973877, "correct_loss_per_char": 0.17969214916229248, "incorrect_loss_per_char": 0.2554624080657959, "correct_loss_per_token": 0.7187685966491699, "incorrect_loss_per_token": 0.7663872241973877, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7187685966491699, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": true, "logits_per_token": -0.7187685966491699, "logits_per_char": -0.17969214916229248, "num_chars": 4}, {"sum_logits": -0.7663872241973877, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": false, "logits_per_token": -0.7663872241973877, "logits_per_char": -0.2554624080657959, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 270, "native_id": 692, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8214107751846313, "incorrect_loss_raw": 0.6235162019729614, "correct_loss_per_char": 0.20535269379615784, "incorrect_loss_per_char": 0.20783873399098715, "correct_loss_per_token": 0.8214107751846313, "incorrect_loss_per_token": 0.6235162019729614, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8214107751846313, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -0.8214107751846313, "logits_per_char": -0.20535269379615784, "num_chars": 4}, {"sum_logits": -0.6235162019729614, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.6235162019729614, "logits_per_char": -0.20783873399098715, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 271, "native_id": 2125, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1542328596115112, "incorrect_loss_raw": 0.4039017856121063, "correct_loss_per_char": 0.2885582149028778, "incorrect_loss_per_char": 0.13463392853736877, "correct_loss_per_token": 1.1542328596115112, "incorrect_loss_per_token": 0.4039017856121063, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1542328596115112, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.1542328596115112, "logits_per_char": -0.2885582149028778, "num_chars": 4}, {"sum_logits": -0.4039017856121063, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -0.4039017856121063, "logits_per_char": -0.13463392853736877, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 272, "native_id": 2326, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.959970235824585, "incorrect_loss_raw": 0.5342625379562378, "correct_loss_per_char": 0.23999255895614624, "incorrect_loss_per_char": 0.17808751265207926, "correct_loss_per_token": 0.959970235824585, "incorrect_loss_per_token": 0.5342625379562378, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.959970235824585, "num_tokens": 1, "num_tokens_all": 1002, "is_greedy": false, "logits_per_token": -0.959970235824585, "logits_per_char": -0.23999255895614624, "num_chars": 4}, {"sum_logits": -0.5342625379562378, "num_tokens": 1, "num_tokens_all": 1002, "is_greedy": true, "logits_per_token": -0.5342625379562378, "logits_per_char": -0.17808751265207926, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 273, "native_id": 1873, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5721011757850647, "incorrect_loss_raw": 0.87823086977005, "correct_loss_per_char": 0.14302529394626617, "incorrect_loss_per_char": 0.29274362325668335, "correct_loss_per_token": 0.5721011757850647, "incorrect_loss_per_token": 0.87823086977005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5721011757850647, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.5721011757850647, "logits_per_char": -0.14302529394626617, "num_chars": 4}, {"sum_logits": -0.87823086977005, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -0.87823086977005, "logits_per_char": -0.29274362325668335, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 274, "native_id": 3069, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.38988715410232544, "incorrect_loss_raw": 1.3658068180084229, "correct_loss_per_char": 0.09747178852558136, "incorrect_loss_per_char": 0.45526893933614093, "correct_loss_per_token": 0.38988715410232544, "incorrect_loss_per_token": 1.3658068180084229, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.38988715410232544, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.38988715410232544, "logits_per_char": -0.09747178852558136, "num_chars": 4}, {"sum_logits": -1.3658068180084229, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -1.3658068180084229, "logits_per_char": -0.45526893933614093, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 275, "native_id": 1943, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.48788559436798096, "incorrect_loss_raw": 0.9946516752243042, "correct_loss_per_char": 0.12197139859199524, "incorrect_loss_per_char": 0.3315505584081014, "correct_loss_per_token": 0.48788559436798096, "incorrect_loss_per_token": 0.9946516752243042, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.48788559436798096, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": true, "logits_per_token": -0.48788559436798096, "logits_per_char": -0.12197139859199524, "num_chars": 4}, {"sum_logits": -0.9946516752243042, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": false, "logits_per_token": -0.9946516752243042, "logits_per_char": -0.3315505584081014, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 276, "native_id": 2702, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.42027246952056885, "incorrect_loss_raw": 1.1338703632354736, "correct_loss_per_char": 0.14009082317352295, "incorrect_loss_per_char": 0.2834675908088684, "correct_loss_per_token": 0.42027246952056885, "incorrect_loss_per_token": 1.1338703632354736, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1338703632354736, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": false, "logits_per_token": -1.1338703632354736, "logits_per_char": -0.2834675908088684, "num_chars": 4}, {"sum_logits": -0.42027246952056885, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": true, "logits_per_token": -0.42027246952056885, "logits_per_char": -0.14009082317352295, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 277, "native_id": 115, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6353291273117065, "incorrect_loss_raw": 0.8219523429870605, "correct_loss_per_char": 0.15883228182792664, "incorrect_loss_per_char": 0.2739841143290202, "correct_loss_per_token": 0.6353291273117065, "incorrect_loss_per_token": 0.8219523429870605, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6353291273117065, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.6353291273117065, "logits_per_char": -0.15883228182792664, "num_chars": 4}, {"sum_logits": -0.8219523429870605, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -0.8219523429870605, "logits_per_char": -0.2739841143290202, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 278, "native_id": 2971, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2447139471769333, "incorrect_loss_raw": 1.6503477096557617, "correct_loss_per_char": 0.08157131572564442, "incorrect_loss_per_char": 0.41258692741394043, "correct_loss_per_token": 0.2447139471769333, "incorrect_loss_per_token": 1.6503477096557617, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6503477096557617, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": false, "logits_per_token": -1.6503477096557617, "logits_per_char": -0.41258692741394043, "num_chars": 4}, {"sum_logits": -0.2447139471769333, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": true, "logits_per_token": -0.2447139471769333, "logits_per_char": -0.08157131572564442, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 279, "native_id": 1916, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.42925235629081726, "incorrect_loss_raw": 1.1040078401565552, "correct_loss_per_char": 0.14308411876360574, "incorrect_loss_per_char": 0.2760019600391388, "correct_loss_per_token": 0.42925235629081726, "incorrect_loss_per_token": 1.1040078401565552, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1040078401565552, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.1040078401565552, "logits_per_char": -0.2760019600391388, "num_chars": 4}, {"sum_logits": -0.42925235629081726, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -0.42925235629081726, "logits_per_char": -0.14308411876360574, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 280, "native_id": 2706, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4670867323875427, "incorrect_loss_raw": 1.0528435707092285, "correct_loss_per_char": 0.11677168309688568, "incorrect_loss_per_char": 0.35094785690307617, "correct_loss_per_token": 0.4670867323875427, "incorrect_loss_per_token": 1.0528435707092285, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4670867323875427, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": true, "logits_per_token": -0.4670867323875427, "logits_per_char": -0.11677168309688568, "num_chars": 4}, {"sum_logits": -1.0528435707092285, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": false, "logits_per_token": -1.0528435707092285, "logits_per_char": -0.35094785690307617, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 281, "native_id": 424, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5152474641799927, "incorrect_loss_raw": 0.9549064636230469, "correct_loss_per_char": 0.12881186604499817, "incorrect_loss_per_char": 0.3183021545410156, "correct_loss_per_token": 0.5152474641799927, "incorrect_loss_per_token": 0.9549064636230469, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5152474641799927, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.5152474641799927, "logits_per_char": -0.12881186604499817, "num_chars": 4}, {"sum_logits": -0.9549064636230469, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -0.9549064636230469, "logits_per_char": -0.3183021545410156, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 282, "native_id": 110, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.669147253036499, "incorrect_loss_raw": 0.7679613828659058, "correct_loss_per_char": 0.16728681325912476, "incorrect_loss_per_char": 0.25598712762196857, "correct_loss_per_token": 0.669147253036499, "incorrect_loss_per_token": 0.7679613828659058, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.669147253036499, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": true, "logits_per_token": -0.669147253036499, "logits_per_char": -0.16728681325912476, "num_chars": 4}, {"sum_logits": -0.7679613828659058, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -0.7679613828659058, "logits_per_char": -0.25598712762196857, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 283, "native_id": 1501, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4254109859466553, "incorrect_loss_raw": 1.102776050567627, "correct_loss_per_char": 0.10635274648666382, "incorrect_loss_per_char": 0.36759201685587567, "correct_loss_per_token": 0.4254109859466553, "incorrect_loss_per_token": 1.102776050567627, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4254109859466553, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.4254109859466553, "logits_per_char": -0.10635274648666382, "num_chars": 4}, {"sum_logits": -1.102776050567627, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.102776050567627, "logits_per_char": -0.36759201685587567, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 284, "native_id": 1948, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5239448547363281, "incorrect_loss_raw": 0.9737346172332764, "correct_loss_per_char": 0.17464828491210938, "incorrect_loss_per_char": 0.2434336543083191, "correct_loss_per_token": 0.5239448547363281, "incorrect_loss_per_token": 0.9737346172332764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9737346172332764, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": false, "logits_per_token": -0.9737346172332764, "logits_per_char": -0.2434336543083191, "num_chars": 4}, {"sum_logits": -0.5239448547363281, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": true, "logits_per_token": -0.5239448547363281, "logits_per_char": -0.17464828491210938, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 285, "native_id": 267, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0157172679901123, "incorrect_loss_raw": 0.48832768201828003, "correct_loss_per_char": 0.2539293169975281, "incorrect_loss_per_char": 0.16277589400609335, "correct_loss_per_token": 1.0157172679901123, "incorrect_loss_per_token": 0.48832768201828003, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0157172679901123, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.0157172679901123, "logits_per_char": -0.2539293169975281, "num_chars": 4}, {"sum_logits": -0.48832768201828003, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -0.48832768201828003, "logits_per_char": -0.16277589400609335, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 286, "native_id": 573, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6579414010047913, "incorrect_loss_raw": 0.861657977104187, "correct_loss_per_char": 0.16448535025119781, "incorrect_loss_per_char": 0.2872193257013957, "correct_loss_per_token": 0.6579414010047913, "incorrect_loss_per_token": 0.861657977104187, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6579414010047913, "num_tokens": 1, "num_tokens_all": 874, "is_greedy": true, "logits_per_token": -0.6579414010047913, "logits_per_char": -0.16448535025119781, "num_chars": 4}, {"sum_logits": -0.861657977104187, "num_tokens": 1, "num_tokens_all": 874, "is_greedy": false, "logits_per_token": -0.861657977104187, "logits_per_char": -0.2872193257013957, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 287, "native_id": 2408, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.25000280141830444, "incorrect_loss_raw": 1.6600079536437988, "correct_loss_per_char": 0.06250070035457611, "incorrect_loss_per_char": 0.553335984547933, "correct_loss_per_token": 0.25000280141830444, "incorrect_loss_per_token": 1.6600079536437988, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.25000280141830444, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.25000280141830444, "logits_per_char": -0.06250070035457611, "num_chars": 4}, {"sum_logits": -1.6600079536437988, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.6600079536437988, "logits_per_char": -0.553335984547933, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 288, "native_id": 1358, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.233912467956543, "incorrect_loss_raw": 0.45568662881851196, "correct_loss_per_char": 0.41130415598551434, "incorrect_loss_per_char": 0.11392165720462799, "correct_loss_per_token": 1.233912467956543, "incorrect_loss_per_token": 0.45568662881851196, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.45568662881851196, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.45568662881851196, "logits_per_char": -0.11392165720462799, "num_chars": 4}, {"sum_logits": -1.233912467956543, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.233912467956543, "logits_per_char": -0.41130415598551434, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 289, "native_id": 1429, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6509156823158264, "incorrect_loss_raw": 0.8152725696563721, "correct_loss_per_char": 0.1627289205789566, "incorrect_loss_per_char": 0.2717575232187907, "correct_loss_per_token": 0.6509156823158264, "incorrect_loss_per_token": 0.8152725696563721, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6509156823158264, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": true, "logits_per_token": -0.6509156823158264, "logits_per_char": -0.1627289205789566, "num_chars": 4}, {"sum_logits": -0.8152725696563721, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": false, "logits_per_token": -0.8152725696563721, "logits_per_char": -0.2717575232187907, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 290, "native_id": 1186, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6236785650253296, "incorrect_loss_raw": 0.27485349774360657, "correct_loss_per_char": 0.4059196412563324, "incorrect_loss_per_char": 0.0916178325812022, "correct_loss_per_token": 1.6236785650253296, "incorrect_loss_per_token": 0.27485349774360657, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6236785650253296, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": false, "logits_per_token": -1.6236785650253296, "logits_per_char": -0.4059196412563324, "num_chars": 4}, {"sum_logits": -0.27485349774360657, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": true, "logits_per_token": -0.27485349774360657, "logits_per_char": -0.0916178325812022, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 291, "native_id": 1223, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6498448252677917, "incorrect_loss_raw": 0.8453885912895203, "correct_loss_per_char": 0.16246120631694794, "incorrect_loss_per_char": 0.2817961970965068, "correct_loss_per_token": 0.6498448252677917, "incorrect_loss_per_token": 0.8453885912895203, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6498448252677917, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": true, "logits_per_token": -0.6498448252677917, "logits_per_char": -0.16246120631694794, "num_chars": 4}, {"sum_logits": -0.8453885912895203, "num_tokens": 1, "num_tokens_all": 997, "is_greedy": false, "logits_per_token": -0.8453885912895203, "logits_per_char": -0.2817961970965068, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 292, "native_id": 2791, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21455594897270203, "incorrect_loss_raw": 1.7731342315673828, "correct_loss_per_char": 0.07151864965756734, "incorrect_loss_per_char": 0.4432835578918457, "correct_loss_per_token": 0.21455594897270203, "incorrect_loss_per_token": 1.7731342315673828, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7731342315673828, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": false, "logits_per_token": -1.7731342315673828, "logits_per_char": -0.4432835578918457, "num_chars": 4}, {"sum_logits": -0.21455594897270203, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": true, "logits_per_token": -0.21455594897270203, "logits_per_char": -0.07151864965756734, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 293, "native_id": 2810, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.820860743522644, "incorrect_loss_raw": 0.6572290062904358, "correct_loss_per_char": 0.205215185880661, "incorrect_loss_per_char": 0.21907633543014526, "correct_loss_per_token": 0.820860743522644, "incorrect_loss_per_token": 0.6572290062904358, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.820860743522644, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -0.820860743522644, "logits_per_char": -0.205215185880661, "num_chars": 4}, {"sum_logits": -0.6572290062904358, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -0.6572290062904358, "logits_per_char": -0.21907633543014526, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 294, "native_id": 2388, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9525973796844482, "incorrect_loss_raw": 0.5946245193481445, "correct_loss_per_char": 0.23814934492111206, "incorrect_loss_per_char": 0.1982081731160482, "correct_loss_per_token": 0.9525973796844482, "incorrect_loss_per_token": 0.5946245193481445, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9525973796844482, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -0.9525973796844482, "logits_per_char": -0.23814934492111206, "num_chars": 4}, {"sum_logits": -0.5946245193481445, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -0.5946245193481445, "logits_per_char": -0.1982081731160482, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 295, "native_id": 1354, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1385222673416138, "incorrect_loss_raw": 0.4296759068965912, "correct_loss_per_char": 0.28463056683540344, "incorrect_loss_per_char": 0.14322530229886374, "correct_loss_per_token": 1.1385222673416138, "incorrect_loss_per_token": 0.4296759068965912, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1385222673416138, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.1385222673416138, "logits_per_char": -0.28463056683540344, "num_chars": 4}, {"sum_logits": -0.4296759068965912, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.4296759068965912, "logits_per_char": -0.14322530229886374, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 296, "native_id": 2305, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5036894679069519, "incorrect_loss_raw": 1.1057157516479492, "correct_loss_per_char": 0.12592236697673798, "incorrect_loss_per_char": 0.3685719172159831, "correct_loss_per_token": 0.5036894679069519, "incorrect_loss_per_token": 1.1057157516479492, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5036894679069519, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.5036894679069519, "logits_per_char": -0.12592236697673798, "num_chars": 4}, {"sum_logits": -1.1057157516479492, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.1057157516479492, "logits_per_char": -0.3685719172159831, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 297, "native_id": 1203, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.132709264755249, "incorrect_loss_raw": 0.42550307512283325, "correct_loss_per_char": 0.28317731618881226, "incorrect_loss_per_char": 0.14183435837427774, "correct_loss_per_token": 1.132709264755249, "incorrect_loss_per_token": 0.42550307512283325, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.132709264755249, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": false, "logits_per_token": -1.132709264755249, "logits_per_char": -0.28317731618881226, "num_chars": 4}, {"sum_logits": -0.42550307512283325, "num_tokens": 1, "num_tokens_all": 1052, "is_greedy": true, "logits_per_token": -0.42550307512283325, "logits_per_char": -0.14183435837427774, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 298, "native_id": 2304, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.772199273109436, "incorrect_loss_raw": 0.6687880754470825, "correct_loss_per_char": 0.193049818277359, "incorrect_loss_per_char": 0.22292935848236084, "correct_loss_per_token": 0.772199273109436, "incorrect_loss_per_token": 0.6687880754470825, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.772199273109436, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -0.772199273109436, "logits_per_char": -0.193049818277359, "num_chars": 4}, {"sum_logits": -0.6687880754470825, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.6687880754470825, "logits_per_char": -0.22292935848236084, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 299, "native_id": 796, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6740450859069824, "incorrect_loss_raw": 0.8113222122192383, "correct_loss_per_char": 0.1685112714767456, "incorrect_loss_per_char": 0.2704407374064128, "correct_loss_per_token": 0.6740450859069824, "incorrect_loss_per_token": 0.8113222122192383, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6740450859069824, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": true, "logits_per_token": -0.6740450859069824, "logits_per_char": -0.1685112714767456, "num_chars": 4}, {"sum_logits": -0.8113222122192383, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": false, "logits_per_token": -0.8113222122192383, "logits_per_char": -0.2704407374064128, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 300, "native_id": 2085, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.14323875308036804, "incorrect_loss_raw": 2.5386714935302734, "correct_loss_per_char": 0.047746251026789345, "incorrect_loss_per_char": 0.6346678733825684, "correct_loss_per_token": 0.14323875308036804, "incorrect_loss_per_token": 2.5386714935302734, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -2.5386714935302734, "num_tokens": 1, "num_tokens_all": 843, "is_greedy": false, "logits_per_token": -2.5386714935302734, "logits_per_char": -0.6346678733825684, "num_chars": 4}, {"sum_logits": -0.14323875308036804, "num_tokens": 1, "num_tokens_all": 843, "is_greedy": true, "logits_per_token": -0.14323875308036804, "logits_per_char": -0.047746251026789345, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 301, "native_id": 1142, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.929816722869873, "incorrect_loss_raw": 0.5506457090377808, "correct_loss_per_char": 0.23245418071746826, "incorrect_loss_per_char": 0.18354856967926025, "correct_loss_per_token": 0.929816722869873, "incorrect_loss_per_token": 0.5506457090377808, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.929816722869873, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -0.929816722869873, "logits_per_char": -0.23245418071746826, "num_chars": 4}, {"sum_logits": -0.5506457090377808, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.5506457090377808, "logits_per_char": -0.18354856967926025, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 302, "native_id": 296, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6876532435417175, "incorrect_loss_raw": 0.7703683376312256, "correct_loss_per_char": 0.17191331088542938, "incorrect_loss_per_char": 0.2567894458770752, "correct_loss_per_token": 0.6876532435417175, "incorrect_loss_per_token": 0.7703683376312256, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6876532435417175, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.6876532435417175, "logits_per_char": -0.17191331088542938, "num_chars": 4}, {"sum_logits": -0.7703683376312256, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -0.7703683376312256, "logits_per_char": -0.2567894458770752, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 303, "native_id": 2187, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1870732307434082, "incorrect_loss_raw": 0.48455074429512024, "correct_loss_per_char": 0.29676830768585205, "incorrect_loss_per_char": 0.16151691476504007, "correct_loss_per_token": 1.1870732307434082, "incorrect_loss_per_token": 0.48455074429512024, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1870732307434082, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.1870732307434082, "logits_per_char": -0.29676830768585205, "num_chars": 4}, {"sum_logits": -0.48455074429512024, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.48455074429512024, "logits_per_char": -0.16151691476504007, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 304, "native_id": 2840, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0832571983337402, "incorrect_loss_raw": 0.45730137825012207, "correct_loss_per_char": 0.27081429958343506, "incorrect_loss_per_char": 0.15243379275004068, "correct_loss_per_token": 1.0832571983337402, "incorrect_loss_per_token": 0.45730137825012207, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0832571983337402, "num_tokens": 1, "num_tokens_all": 1025, "is_greedy": false, "logits_per_token": -1.0832571983337402, "logits_per_char": -0.27081429958343506, "num_chars": 4}, {"sum_logits": -0.45730137825012207, "num_tokens": 1, "num_tokens_all": 1025, "is_greedy": true, "logits_per_token": -0.45730137825012207, "logits_per_char": -0.15243379275004068, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 305, "native_id": 2466, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6590691208839417, "incorrect_loss_raw": 1.0025837421417236, "correct_loss_per_char": 0.1647672802209854, "incorrect_loss_per_char": 0.3341945807139079, "correct_loss_per_token": 0.6590691208839417, "incorrect_loss_per_token": 1.0025837421417236, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6590691208839417, "num_tokens": 1, "num_tokens_all": 838, "is_greedy": true, "logits_per_token": -0.6590691208839417, "logits_per_char": -0.1647672802209854, "num_chars": 4}, {"sum_logits": -1.0025837421417236, "num_tokens": 1, "num_tokens_all": 838, "is_greedy": false, "logits_per_token": -1.0025837421417236, "logits_per_char": -0.3341945807139079, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 306, "native_id": 835, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4120588004589081, "incorrect_loss_raw": 1.2256791591644287, "correct_loss_per_char": 0.10301470011472702, "incorrect_loss_per_char": 0.40855971972147626, "correct_loss_per_token": 0.4120588004589081, "incorrect_loss_per_token": 1.2256791591644287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4120588004589081, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -0.4120588004589081, "logits_per_char": -0.10301470011472702, "num_chars": 4}, {"sum_logits": -1.2256791591644287, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -1.2256791591644287, "logits_per_char": -0.40855971972147626, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 307, "native_id": 1391, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6083632111549377, "incorrect_loss_raw": 0.8356404304504395, "correct_loss_per_char": 0.2027877370516459, "incorrect_loss_per_char": 0.20891010761260986, "correct_loss_per_token": 0.6083632111549377, "incorrect_loss_per_token": 0.8356404304504395, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8356404304504395, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -0.8356404304504395, "logits_per_char": -0.20891010761260986, "num_chars": 4}, {"sum_logits": -0.6083632111549377, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.6083632111549377, "logits_per_char": -0.2027877370516459, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 308, "native_id": 2090, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8110262751579285, "incorrect_loss_raw": 0.6941836476325989, "correct_loss_per_char": 0.2703420917193095, "incorrect_loss_per_char": 0.17354591190814972, "correct_loss_per_token": 0.8110262751579285, "incorrect_loss_per_token": 0.6941836476325989, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6941836476325989, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": true, "logits_per_token": -0.6941836476325989, "logits_per_char": -0.17354591190814972, "num_chars": 4}, {"sum_logits": -0.8110262751579285, "num_tokens": 1, "num_tokens_all": 970, "is_greedy": false, "logits_per_token": -0.8110262751579285, "logits_per_char": -0.2703420917193095, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 309, "native_id": 1369, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1676502227783203, "incorrect_loss_raw": 0.4550324082374573, "correct_loss_per_char": 0.2919125556945801, "incorrect_loss_per_char": 0.15167746941248575, "correct_loss_per_token": 1.1676502227783203, "incorrect_loss_per_token": 0.4550324082374573, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1676502227783203, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": false, "logits_per_token": -1.1676502227783203, "logits_per_char": -0.2919125556945801, "num_chars": 4}, {"sum_logits": -0.4550324082374573, "num_tokens": 1, "num_tokens_all": 1127, "is_greedy": true, "logits_per_token": -0.4550324082374573, "logits_per_char": -0.15167746941248575, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 310, "native_id": 1315, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5351934432983398, "incorrect_loss_raw": 0.9189417362213135, "correct_loss_per_char": 0.13379836082458496, "incorrect_loss_per_char": 0.3063139120737712, "correct_loss_per_token": 0.5351934432983398, "incorrect_loss_per_token": 0.9189417362213135, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5351934432983398, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.5351934432983398, "logits_per_char": -0.13379836082458496, "num_chars": 4}, {"sum_logits": -0.9189417362213135, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -0.9189417362213135, "logits_per_char": -0.3063139120737712, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 311, "native_id": 1876, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.44250616431236267, "incorrect_loss_raw": 1.1228407621383667, "correct_loss_per_char": 0.14750205477078757, "incorrect_loss_per_char": 0.2807101905345917, "correct_loss_per_token": 0.44250616431236267, "incorrect_loss_per_token": 1.1228407621383667, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1228407621383667, "num_tokens": 1, "num_tokens_all": 847, "is_greedy": false, "logits_per_token": -1.1228407621383667, "logits_per_char": -0.2807101905345917, "num_chars": 4}, {"sum_logits": -0.44250616431236267, "num_tokens": 1, "num_tokens_all": 847, "is_greedy": true, "logits_per_token": -0.44250616431236267, "logits_per_char": -0.14750205477078757, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 312, "native_id": 1095, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7148954272270203, "incorrect_loss_raw": 0.7557398080825806, "correct_loss_per_char": 0.17872385680675507, "incorrect_loss_per_char": 0.25191326936086017, "correct_loss_per_token": 0.7148954272270203, "incorrect_loss_per_token": 0.7557398080825806, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7148954272270203, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -0.7148954272270203, "logits_per_char": -0.17872385680675507, "num_chars": 4}, {"sum_logits": -0.7557398080825806, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -0.7557398080825806, "logits_per_char": -0.25191326936086017, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 313, "native_id": 347, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7916710376739502, "incorrect_loss_raw": 0.7809218168258667, "correct_loss_per_char": 0.19791775941848755, "incorrect_loss_per_char": 0.2603072722752889, "correct_loss_per_token": 0.7916710376739502, "incorrect_loss_per_token": 0.7809218168258667, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7916710376739502, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": false, "logits_per_token": -0.7916710376739502, "logits_per_char": -0.19791775941848755, "num_chars": 4}, {"sum_logits": -0.7809218168258667, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": true, "logits_per_token": -0.7809218168258667, "logits_per_char": -0.2603072722752889, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 314, "native_id": 2159, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6157922148704529, "incorrect_loss_raw": 1.0376832485198975, "correct_loss_per_char": 0.15394805371761322, "incorrect_loss_per_char": 0.34589441617329914, "correct_loss_per_token": 0.6157922148704529, "incorrect_loss_per_token": 1.0376832485198975, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6157922148704529, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": true, "logits_per_token": -0.6157922148704529, "logits_per_char": -0.15394805371761322, "num_chars": 4}, {"sum_logits": -1.0376832485198975, "num_tokens": 1, "num_tokens_all": 1066, "is_greedy": false, "logits_per_token": -1.0376832485198975, "logits_per_char": -0.34589441617329914, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 315, "native_id": 2413, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5935730338096619, "incorrect_loss_raw": 0.921276330947876, "correct_loss_per_char": 0.14839325845241547, "incorrect_loss_per_char": 0.3070921103159587, "correct_loss_per_token": 0.5935730338096619, "incorrect_loss_per_token": 0.921276330947876, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5935730338096619, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": true, "logits_per_token": -0.5935730338096619, "logits_per_char": -0.14839325845241547, "num_chars": 4}, {"sum_logits": -0.921276330947876, "num_tokens": 1, "num_tokens_all": 1087, "is_greedy": false, "logits_per_token": -0.921276330947876, "logits_per_char": -0.3070921103159587, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 316, "native_id": 2386, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7189618945121765, "incorrect_loss_raw": 0.7367262244224548, "correct_loss_per_char": 0.17974047362804413, "incorrect_loss_per_char": 0.24557540814081827, "correct_loss_per_token": 0.7189618945121765, "incorrect_loss_per_token": 0.7367262244224548, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7189618945121765, "num_tokens": 1, "num_tokens_all": 838, "is_greedy": true, "logits_per_token": -0.7189618945121765, "logits_per_char": -0.17974047362804413, "num_chars": 4}, {"sum_logits": -0.7367262244224548, "num_tokens": 1, "num_tokens_all": 838, "is_greedy": false, "logits_per_token": -0.7367262244224548, "logits_per_char": -0.24557540814081827, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 317, "native_id": 2245, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5013113021850586, "incorrect_loss_raw": 1.0082905292510986, "correct_loss_per_char": 0.16710376739501953, "incorrect_loss_per_char": 0.25207263231277466, "correct_loss_per_token": 0.5013113021850586, "incorrect_loss_per_token": 1.0082905292510986, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0082905292510986, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -1.0082905292510986, "logits_per_char": -0.25207263231277466, "num_chars": 4}, {"sum_logits": -0.5013113021850586, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.5013113021850586, "logits_per_char": -0.16710376739501953, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 318, "native_id": 3147, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8088254928588867, "incorrect_loss_raw": 0.7016075849533081, "correct_loss_per_char": 0.20220637321472168, "incorrect_loss_per_char": 0.23386919498443604, "correct_loss_per_token": 0.8088254928588867, "incorrect_loss_per_token": 0.7016075849533081, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8088254928588867, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -0.8088254928588867, "logits_per_char": -0.20220637321472168, "num_chars": 4}, {"sum_logits": -0.7016075849533081, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.7016075849533081, "logits_per_char": -0.23386919498443604, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 319, "native_id": 1004, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3916503190994263, "incorrect_loss_raw": 0.31094875931739807, "correct_loss_per_char": 0.34791257977485657, "incorrect_loss_per_char": 0.10364958643913269, "correct_loss_per_token": 1.3916503190994263, "incorrect_loss_per_token": 0.31094875931739807, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3916503190994263, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.3916503190994263, "logits_per_char": -0.34791257977485657, "num_chars": 4}, {"sum_logits": -0.31094875931739807, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.31094875931739807, "logits_per_char": -0.10364958643913269, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 320, "native_id": 1053, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1439077854156494, "incorrect_loss_raw": 0.4291420578956604, "correct_loss_per_char": 0.3813025951385498, "incorrect_loss_per_char": 0.1072855144739151, "correct_loss_per_token": 1.1439077854156494, "incorrect_loss_per_token": 0.4291420578956604, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4291420578956604, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.4291420578956604, "logits_per_char": -0.1072855144739151, "num_chars": 4}, {"sum_logits": -1.1439077854156494, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.1439077854156494, "logits_per_char": -0.3813025951385498, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 321, "native_id": 1523, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.29858842492103577, "incorrect_loss_raw": 1.4386019706726074, "correct_loss_per_char": 0.09952947497367859, "incorrect_loss_per_char": 0.35965049266815186, "correct_loss_per_token": 0.29858842492103577, "incorrect_loss_per_token": 1.4386019706726074, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4386019706726074, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": false, "logits_per_token": -1.4386019706726074, "logits_per_char": -0.35965049266815186, "num_chars": 4}, {"sum_logits": -0.29858842492103577, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": true, "logits_per_token": -0.29858842492103577, "logits_per_char": -0.09952947497367859, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 322, "native_id": 561, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9446811676025391, "incorrect_loss_raw": 0.6202782392501831, "correct_loss_per_char": 0.23617029190063477, "incorrect_loss_per_char": 0.20675941308339438, "correct_loss_per_token": 0.9446811676025391, "incorrect_loss_per_token": 0.6202782392501831, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9446811676025391, "num_tokens": 1, "num_tokens_all": 1430, "is_greedy": false, "logits_per_token": -0.9446811676025391, "logits_per_char": -0.23617029190063477, "num_chars": 4}, {"sum_logits": -0.6202782392501831, "num_tokens": 1, "num_tokens_all": 1430, "is_greedy": true, "logits_per_token": -0.6202782392501831, "logits_per_char": -0.20675941308339438, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 323, "native_id": 116, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9257920980453491, "incorrect_loss_raw": 0.5580078363418579, "correct_loss_per_char": 0.23144802451133728, "incorrect_loss_per_char": 0.18600261211395264, "correct_loss_per_token": 0.9257920980453491, "incorrect_loss_per_token": 0.5580078363418579, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9257920980453491, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": false, "logits_per_token": -0.9257920980453491, "logits_per_char": -0.23144802451133728, "num_chars": 4}, {"sum_logits": -0.5580078363418579, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": true, "logits_per_token": -0.5580078363418579, "logits_per_char": -0.18600261211395264, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 324, "native_id": 1616, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7303224802017212, "incorrect_loss_raw": 0.7846428155899048, "correct_loss_per_char": 0.1825806200504303, "incorrect_loss_per_char": 0.26154760519663495, "correct_loss_per_token": 0.7303224802017212, "incorrect_loss_per_token": 0.7846428155899048, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7303224802017212, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.7303224802017212, "logits_per_char": -0.1825806200504303, "num_chars": 4}, {"sum_logits": -0.7846428155899048, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -0.7846428155899048, "logits_per_char": -0.26154760519663495, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 325, "native_id": 153, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3895241916179657, "incorrect_loss_raw": 1.2050437927246094, "correct_loss_per_char": 0.12984139720598856, "incorrect_loss_per_char": 0.30126094818115234, "correct_loss_per_token": 0.3895241916179657, "incorrect_loss_per_token": 1.2050437927246094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2050437927246094, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": false, "logits_per_token": -1.2050437927246094, "logits_per_char": -0.30126094818115234, "num_chars": 4}, {"sum_logits": -0.3895241916179657, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": true, "logits_per_token": -0.3895241916179657, "logits_per_char": -0.12984139720598856, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 326, "native_id": 2722, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7020402550697327, "incorrect_loss_raw": 0.7501352429389954, "correct_loss_per_char": 0.17551006376743317, "incorrect_loss_per_char": 0.2500450809796651, "correct_loss_per_token": 0.7020402550697327, "incorrect_loss_per_token": 0.7501352429389954, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7020402550697327, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": true, "logits_per_token": -0.7020402550697327, "logits_per_char": -0.17551006376743317, "num_chars": 4}, {"sum_logits": -0.7501352429389954, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": false, "logits_per_token": -0.7501352429389954, "logits_per_char": -0.2500450809796651, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 327, "native_id": 180, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8997936248779297, "incorrect_loss_raw": 0.6100375652313232, "correct_loss_per_char": 0.22494840621948242, "incorrect_loss_per_char": 0.20334585507710776, "correct_loss_per_token": 0.8997936248779297, "incorrect_loss_per_token": 0.6100375652313232, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8997936248779297, "num_tokens": 1, "num_tokens_all": 874, "is_greedy": false, "logits_per_token": -0.8997936248779297, "logits_per_char": -0.22494840621948242, "num_chars": 4}, {"sum_logits": -0.6100375652313232, "num_tokens": 1, "num_tokens_all": 874, "is_greedy": true, "logits_per_token": -0.6100375652313232, "logits_per_char": -0.20334585507710776, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 328, "native_id": 854, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7514277100563049, "incorrect_loss_raw": 0.7101580500602722, "correct_loss_per_char": 0.18785692751407623, "incorrect_loss_per_char": 0.23671935002009073, "correct_loss_per_token": 0.7514277100563049, "incorrect_loss_per_token": 0.7101580500602722, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7514277100563049, "num_tokens": 1, "num_tokens_all": 857, "is_greedy": false, "logits_per_token": -0.7514277100563049, "logits_per_char": -0.18785692751407623, "num_chars": 4}, {"sum_logits": -0.7101580500602722, "num_tokens": 1, "num_tokens_all": 857, "is_greedy": true, "logits_per_token": -0.7101580500602722, "logits_per_char": -0.23671935002009073, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 329, "native_id": 2730, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9386836290359497, "incorrect_loss_raw": 0.5248842835426331, "correct_loss_per_char": 0.23467090725898743, "incorrect_loss_per_char": 0.17496142784754434, "correct_loss_per_token": 0.9386836290359497, "incorrect_loss_per_token": 0.5248842835426331, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9386836290359497, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -0.9386836290359497, "logits_per_char": -0.23467090725898743, "num_chars": 4}, {"sum_logits": -0.5248842835426331, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.5248842835426331, "logits_per_char": -0.17496142784754434, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 330, "native_id": 3131, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6748319268226624, "incorrect_loss_raw": 0.7853004336357117, "correct_loss_per_char": 0.1687079817056656, "incorrect_loss_per_char": 0.26176681121190387, "correct_loss_per_token": 0.6748319268226624, "incorrect_loss_per_token": 0.7853004336357117, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6748319268226624, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": true, "logits_per_token": -0.6748319268226624, "logits_per_char": -0.1687079817056656, "num_chars": 4}, {"sum_logits": -0.7853004336357117, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -0.7853004336357117, "logits_per_char": -0.26176681121190387, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 331, "native_id": 1282, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2684986591339111, "incorrect_loss_raw": 0.35500389337539673, "correct_loss_per_char": 0.3171246647834778, "incorrect_loss_per_char": 0.11833463112513225, "correct_loss_per_token": 1.2684986591339111, "incorrect_loss_per_token": 0.35500389337539673, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2684986591339111, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": false, "logits_per_token": -1.2684986591339111, "logits_per_char": -0.3171246647834778, "num_chars": 4}, {"sum_logits": -0.35500389337539673, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": true, "logits_per_token": -0.35500389337539673, "logits_per_char": -0.11833463112513225, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 332, "native_id": 2112, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.38569748401641846, "incorrect_loss_raw": 1.2511800527572632, "correct_loss_per_char": 0.1285658280054728, "incorrect_loss_per_char": 0.3127950131893158, "correct_loss_per_token": 0.38569748401641846, "incorrect_loss_per_token": 1.2511800527572632, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2511800527572632, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.2511800527572632, "logits_per_char": -0.3127950131893158, "num_chars": 4}, {"sum_logits": -0.38569748401641846, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -0.38569748401641846, "logits_per_char": -0.1285658280054728, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 333, "native_id": 3219, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8433829545974731, "incorrect_loss_raw": 0.6982854604721069, "correct_loss_per_char": 0.2108457386493683, "incorrect_loss_per_char": 0.232761820157369, "correct_loss_per_token": 0.8433829545974731, "incorrect_loss_per_token": 0.6982854604721069, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8433829545974731, "num_tokens": 1, "num_tokens_all": 1192, "is_greedy": false, "logits_per_token": -0.8433829545974731, "logits_per_char": -0.2108457386493683, "num_chars": 4}, {"sum_logits": -0.6982854604721069, "num_tokens": 1, "num_tokens_all": 1192, "is_greedy": true, "logits_per_token": -0.6982854604721069, "logits_per_char": -0.232761820157369, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 334, "native_id": 1779, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.776828408241272, "incorrect_loss_raw": 0.7051176428794861, "correct_loss_per_char": 0.194207102060318, "incorrect_loss_per_char": 0.23503921429316202, "correct_loss_per_token": 0.776828408241272, "incorrect_loss_per_token": 0.7051176428794861, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.776828408241272, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -0.776828408241272, "logits_per_char": -0.194207102060318, "num_chars": 4}, {"sum_logits": -0.7051176428794861, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.7051176428794861, "logits_per_char": -0.23503921429316202, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 335, "native_id": 2110, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8665580749511719, "incorrect_loss_raw": 0.6247076988220215, "correct_loss_per_char": 0.21663951873779297, "incorrect_loss_per_char": 0.20823589960734049, "correct_loss_per_token": 0.8665580749511719, "incorrect_loss_per_token": 0.6247076988220215, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8665580749511719, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": false, "logits_per_token": -0.8665580749511719, "logits_per_char": -0.21663951873779297, "num_chars": 4}, {"sum_logits": -0.6247076988220215, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": true, "logits_per_token": -0.6247076988220215, "logits_per_char": -0.20823589960734049, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 336, "native_id": 282, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7024474143981934, "incorrect_loss_raw": 0.2468765377998352, "correct_loss_per_char": 0.5674824714660645, "incorrect_loss_per_char": 0.0617191344499588, "correct_loss_per_token": 1.7024474143981934, "incorrect_loss_per_token": 0.2468765377998352, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.2468765377998352, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": true, "logits_per_token": -0.2468765377998352, "logits_per_char": -0.0617191344499588, "num_chars": 4}, {"sum_logits": -1.7024474143981934, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": false, "logits_per_token": -1.7024474143981934, "logits_per_char": -0.5674824714660645, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 337, "native_id": 1249, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6476815938949585, "incorrect_loss_raw": 0.8472892045974731, "correct_loss_per_char": 0.16192039847373962, "incorrect_loss_per_char": 0.2824297348658244, "correct_loss_per_token": 0.6476815938949585, "incorrect_loss_per_token": 0.8472892045974731, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6476815938949585, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": true, "logits_per_token": -0.6476815938949585, "logits_per_char": -0.16192039847373962, "num_chars": 4}, {"sum_logits": -0.8472892045974731, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": false, "logits_per_token": -0.8472892045974731, "logits_per_char": -0.2824297348658244, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 338, "native_id": 1070, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7741693258285522, "incorrect_loss_raw": 0.6678707599639893, "correct_loss_per_char": 0.19354233145713806, "incorrect_loss_per_char": 0.22262358665466309, "correct_loss_per_token": 0.7741693258285522, "incorrect_loss_per_token": 0.6678707599639893, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7741693258285522, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": false, "logits_per_token": -0.7741693258285522, "logits_per_char": -0.19354233145713806, "num_chars": 4}, {"sum_logits": -0.6678707599639893, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": true, "logits_per_token": -0.6678707599639893, "logits_per_char": -0.22262358665466309, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 339, "native_id": 2859, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.43836134672164917, "incorrect_loss_raw": 1.0759631395339966, "correct_loss_per_char": 0.10959033668041229, "incorrect_loss_per_char": 0.3586543798446655, "correct_loss_per_token": 0.43836134672164917, "incorrect_loss_per_token": 1.0759631395339966, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.43836134672164917, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": true, "logits_per_token": -0.43836134672164917, "logits_per_char": -0.10959033668041229, "num_chars": 4}, {"sum_logits": -1.0759631395339966, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": false, "logits_per_token": -1.0759631395339966, "logits_per_char": -0.3586543798446655, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 340, "native_id": 1988, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2726783752441406, "incorrect_loss_raw": 0.3612546920776367, "correct_loss_per_char": 0.31816959381103516, "incorrect_loss_per_char": 0.12041823069254558, "correct_loss_per_token": 1.2726783752441406, "incorrect_loss_per_token": 0.3612546920776367, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2726783752441406, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": false, "logits_per_token": -1.2726783752441406, "logits_per_char": -0.31816959381103516, "num_chars": 4}, {"sum_logits": -0.3612546920776367, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": true, "logits_per_token": -0.3612546920776367, "logits_per_char": -0.12041823069254558, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 341, "native_id": 2374, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.436436265707016, "incorrect_loss_raw": 1.1789497137069702, "correct_loss_per_char": 0.109109066426754, "incorrect_loss_per_char": 0.3929832379023234, "correct_loss_per_token": 0.436436265707016, "incorrect_loss_per_token": 1.1789497137069702, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.436436265707016, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": true, "logits_per_token": -0.436436265707016, "logits_per_char": -0.109109066426754, "num_chars": 4}, {"sum_logits": -1.1789497137069702, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": false, "logits_per_token": -1.1789497137069702, "logits_per_char": -0.3929832379023234, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 342, "native_id": 899, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4982936382293701, "incorrect_loss_raw": 0.9890269041061401, "correct_loss_per_char": 0.16609787940979004, "incorrect_loss_per_char": 0.24725672602653503, "correct_loss_per_token": 0.4982936382293701, "incorrect_loss_per_token": 0.9890269041061401, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9890269041061401, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -0.9890269041061401, "logits_per_char": -0.24725672602653503, "num_chars": 4}, {"sum_logits": -0.4982936382293701, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.4982936382293701, "logits_per_char": -0.16609787940979004, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 343, "native_id": 1424, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7411239147186279, "incorrect_loss_raw": 0.7034169435501099, "correct_loss_per_char": 0.18528097867965698, "incorrect_loss_per_char": 0.23447231451670328, "correct_loss_per_token": 0.7411239147186279, "incorrect_loss_per_token": 0.7034169435501099, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7411239147186279, "num_tokens": 1, "num_tokens_all": 1140, "is_greedy": false, "logits_per_token": -0.7411239147186279, "logits_per_char": -0.18528097867965698, "num_chars": 4}, {"sum_logits": -0.7034169435501099, "num_tokens": 1, "num_tokens_all": 1140, "is_greedy": true, "logits_per_token": -0.7034169435501099, "logits_per_char": -0.23447231451670328, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 344, "native_id": 2065, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8870530128479004, "incorrect_loss_raw": 0.5963655710220337, "correct_loss_per_char": 0.2217632532119751, "incorrect_loss_per_char": 0.19878852367401123, "correct_loss_per_token": 0.8870530128479004, "incorrect_loss_per_token": 0.5963655710220337, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8870530128479004, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": false, "logits_per_token": -0.8870530128479004, "logits_per_char": -0.2217632532119751, "num_chars": 4}, {"sum_logits": -0.5963655710220337, "num_tokens": 1, "num_tokens_all": 1042, "is_greedy": true, "logits_per_token": -0.5963655710220337, "logits_per_char": -0.19878852367401123, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 345, "native_id": 339, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4960079789161682, "incorrect_loss_raw": 1.0354516506195068, "correct_loss_per_char": 0.16533599297205606, "incorrect_loss_per_char": 0.2588629126548767, "correct_loss_per_token": 0.4960079789161682, "incorrect_loss_per_token": 1.0354516506195068, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0354516506195068, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.0354516506195068, "logits_per_char": -0.2588629126548767, "num_chars": 4}, {"sum_logits": -0.4960079789161682, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -0.4960079789161682, "logits_per_char": -0.16533599297205606, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 346, "native_id": 2675, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28688758611679077, "incorrect_loss_raw": 1.5818564891815186, "correct_loss_per_char": 0.07172189652919769, "incorrect_loss_per_char": 0.5272854963938395, "correct_loss_per_token": 0.28688758611679077, "incorrect_loss_per_token": 1.5818564891815186, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.28688758611679077, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -0.28688758611679077, "logits_per_char": -0.07172189652919769, "num_chars": 4}, {"sum_logits": -1.5818564891815186, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.5818564891815186, "logits_per_char": -0.5272854963938395, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 347, "native_id": 74, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.41634199023246765, "incorrect_loss_raw": 1.1558014154434204, "correct_loss_per_char": 0.10408549755811691, "incorrect_loss_per_char": 0.38526713848114014, "correct_loss_per_token": 0.41634199023246765, "incorrect_loss_per_token": 1.1558014154434204, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.41634199023246765, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": true, "logits_per_token": -0.41634199023246765, "logits_per_char": -0.10408549755811691, "num_chars": 4}, {"sum_logits": -1.1558014154434204, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": false, "logits_per_token": -1.1558014154434204, "logits_per_char": -0.38526713848114014, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 348, "native_id": 3013, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3059841394424438, "incorrect_loss_raw": 0.37125861644744873, "correct_loss_per_char": 0.43532804648081463, "incorrect_loss_per_char": 0.09281465411186218, "correct_loss_per_token": 1.3059841394424438, "incorrect_loss_per_token": 0.37125861644744873, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.37125861644744873, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.37125861644744873, "logits_per_char": -0.09281465411186218, "num_chars": 4}, {"sum_logits": -1.3059841394424438, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -1.3059841394424438, "logits_per_char": -0.43532804648081463, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 349, "native_id": 3111, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3043416738510132, "incorrect_loss_raw": 1.406123399734497, "correct_loss_per_char": 0.1014472246170044, "incorrect_loss_per_char": 0.35153084993362427, "correct_loss_per_token": 0.3043416738510132, "incorrect_loss_per_token": 1.406123399734497, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.406123399734497, "num_tokens": 1, "num_tokens_all": 1140, "is_greedy": false, "logits_per_token": -1.406123399734497, "logits_per_char": -0.35153084993362427, "num_chars": 4}, {"sum_logits": -0.3043416738510132, "num_tokens": 1, "num_tokens_all": 1140, "is_greedy": true, "logits_per_token": -0.3043416738510132, "logits_per_char": -0.1014472246170044, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 350, "native_id": 1356, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4721096456050873, "incorrect_loss_raw": 1.0374218225479126, "correct_loss_per_char": 0.15736988186836243, "incorrect_loss_per_char": 0.25935545563697815, "correct_loss_per_token": 0.4721096456050873, "incorrect_loss_per_token": 1.0374218225479126, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0374218225479126, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.0374218225479126, "logits_per_char": -0.25935545563697815, "num_chars": 4}, {"sum_logits": -0.4721096456050873, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.4721096456050873, "logits_per_char": -0.15736988186836243, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 351, "native_id": 2310, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5716234445571899, "incorrect_loss_raw": 0.8966721296310425, "correct_loss_per_char": 0.19054114818572998, "incorrect_loss_per_char": 0.22416803240776062, "correct_loss_per_token": 0.5716234445571899, "incorrect_loss_per_token": 0.8966721296310425, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8966721296310425, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": false, "logits_per_token": -0.8966721296310425, "logits_per_char": -0.22416803240776062, "num_chars": 4}, {"sum_logits": -0.5716234445571899, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": true, "logits_per_token": -0.5716234445571899, "logits_per_char": -0.19054114818572998, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 352, "native_id": 940, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.44013211131095886, "incorrect_loss_raw": 1.2590419054031372, "correct_loss_per_char": 0.11003302782773972, "incorrect_loss_per_char": 0.4196806351343791, "correct_loss_per_token": 0.44013211131095886, "incorrect_loss_per_token": 1.2590419054031372, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.44013211131095886, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": true, "logits_per_token": -0.44013211131095886, "logits_per_char": -0.11003302782773972, "num_chars": 4}, {"sum_logits": -1.2590419054031372, "num_tokens": 1, "num_tokens_all": 978, "is_greedy": false, "logits_per_token": -1.2590419054031372, "logits_per_char": -0.4196806351343791, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 353, "native_id": 665, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7711426019668579, "incorrect_loss_raw": 0.7281618118286133, "correct_loss_per_char": 0.25704753398895264, "incorrect_loss_per_char": 0.18204045295715332, "correct_loss_per_token": 0.7711426019668579, "incorrect_loss_per_token": 0.7281618118286133, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7281618118286133, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -0.7281618118286133, "logits_per_char": -0.18204045295715332, "num_chars": 4}, {"sum_logits": -0.7711426019668579, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -0.7711426019668579, "logits_per_char": -0.25704753398895264, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 354, "native_id": 3008, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9517775177955627, "incorrect_loss_raw": 0.5726171135902405, "correct_loss_per_char": 0.23794437944889069, "incorrect_loss_per_char": 0.19087237119674683, "correct_loss_per_token": 0.9517775177955627, "incorrect_loss_per_token": 0.5726171135902405, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9517775177955627, "num_tokens": 1, "num_tokens_all": 858, "is_greedy": false, "logits_per_token": -0.9517775177955627, "logits_per_char": -0.23794437944889069, "num_chars": 4}, {"sum_logits": -0.5726171135902405, "num_tokens": 1, "num_tokens_all": 858, "is_greedy": true, "logits_per_token": -0.5726171135902405, "logits_per_char": -0.19087237119674683, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 355, "native_id": 2045, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6927033066749573, "incorrect_loss_raw": 0.7820674180984497, "correct_loss_per_char": 0.23090110222498575, "incorrect_loss_per_char": 0.19551685452461243, "correct_loss_per_token": 0.6927033066749573, "incorrect_loss_per_token": 0.7820674180984497, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7820674180984497, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": false, "logits_per_token": -0.7820674180984497, "logits_per_char": -0.19551685452461243, "num_chars": 4}, {"sum_logits": -0.6927033066749573, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": true, "logits_per_token": -0.6927033066749573, "logits_per_char": -0.23090110222498575, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 356, "native_id": 2805, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6841364502906799, "incorrect_loss_raw": 0.8398680090904236, "correct_loss_per_char": 0.17103411257266998, "incorrect_loss_per_char": 0.2799560030301412, "correct_loss_per_token": 0.6841364502906799, "incorrect_loss_per_token": 0.8398680090904236, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6841364502906799, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": true, "logits_per_token": -0.6841364502906799, "logits_per_char": -0.17103411257266998, "num_chars": 4}, {"sum_logits": -0.8398680090904236, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": false, "logits_per_token": -0.8398680090904236, "logits_per_char": -0.2799560030301412, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 357, "native_id": 2767, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5448220372200012, "incorrect_loss_raw": 0.9356318712234497, "correct_loss_per_char": 0.1362055093050003, "incorrect_loss_per_char": 0.3118772904078166, "correct_loss_per_token": 0.5448220372200012, "incorrect_loss_per_token": 0.9356318712234497, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5448220372200012, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": true, "logits_per_token": -0.5448220372200012, "logits_per_char": -0.1362055093050003, "num_chars": 4}, {"sum_logits": -0.9356318712234497, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": false, "logits_per_token": -0.9356318712234497, "logits_per_char": -0.3118772904078166, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 358, "native_id": 2983, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7108240723609924, "incorrect_loss_raw": 0.7432411909103394, "correct_loss_per_char": 0.23694135745366415, "incorrect_loss_per_char": 0.18581029772758484, "correct_loss_per_token": 0.7108240723609924, "incorrect_loss_per_token": 0.7432411909103394, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7432411909103394, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -0.7432411909103394, "logits_per_char": -0.18581029772758484, "num_chars": 4}, {"sum_logits": -0.7108240723609924, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -0.7108240723609924, "logits_per_char": -0.23694135745366415, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 359, "native_id": 2180, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9462859630584717, "incorrect_loss_raw": 0.5762845277786255, "correct_loss_per_char": 0.23657149076461792, "incorrect_loss_per_char": 0.19209484259287515, "correct_loss_per_token": 0.9462859630584717, "incorrect_loss_per_token": 0.5762845277786255, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9462859630584717, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -0.9462859630584717, "logits_per_char": -0.23657149076461792, "num_chars": 4}, {"sum_logits": -0.5762845277786255, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": true, "logits_per_token": -0.5762845277786255, "logits_per_char": -0.19209484259287515, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 360, "native_id": 2550, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.102579116821289, "incorrect_loss_raw": 0.4290395975112915, "correct_loss_per_char": 0.27564477920532227, "incorrect_loss_per_char": 0.1430131991704305, "correct_loss_per_token": 1.102579116821289, "incorrect_loss_per_token": 0.4290395975112915, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.102579116821289, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -1.102579116821289, "logits_per_char": -0.27564477920532227, "num_chars": 4}, {"sum_logits": -0.4290395975112915, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.4290395975112915, "logits_per_char": -0.1430131991704305, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 361, "native_id": 2538, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.35905036330223083, "incorrect_loss_raw": 1.2675775289535522, "correct_loss_per_char": 0.11968345443407695, "incorrect_loss_per_char": 0.31689438223838806, "correct_loss_per_token": 0.35905036330223083, "incorrect_loss_per_token": 1.2675775289535522, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2675775289535522, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.2675775289535522, "logits_per_char": -0.31689438223838806, "num_chars": 4}, {"sum_logits": -0.35905036330223083, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -0.35905036330223083, "logits_per_char": -0.11968345443407695, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 362, "native_id": 279, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6170980930328369, "incorrect_loss_raw": 0.9080700874328613, "correct_loss_per_char": 0.15427452325820923, "incorrect_loss_per_char": 0.3026900291442871, "correct_loss_per_token": 0.6170980930328369, "incorrect_loss_per_token": 0.9080700874328613, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6170980930328369, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": true, "logits_per_token": -0.6170980930328369, "logits_per_char": -0.15427452325820923, "num_chars": 4}, {"sum_logits": -0.9080700874328613, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": false, "logits_per_token": -0.9080700874328613, "logits_per_char": -0.3026900291442871, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 363, "native_id": 596, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7898478507995605, "incorrect_loss_raw": 0.665877103805542, "correct_loss_per_char": 0.19746196269989014, "incorrect_loss_per_char": 0.22195903460184732, "correct_loss_per_token": 0.7898478507995605, "incorrect_loss_per_token": 0.665877103805542, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7898478507995605, "num_tokens": 1, "num_tokens_all": 1196, "is_greedy": false, "logits_per_token": -0.7898478507995605, "logits_per_char": -0.19746196269989014, "num_chars": 4}, {"sum_logits": -0.665877103805542, "num_tokens": 1, "num_tokens_all": 1196, "is_greedy": true, "logits_per_token": -0.665877103805542, "logits_per_char": -0.22195903460184732, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 364, "native_id": 2176, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.46279025077819824, "incorrect_loss_raw": 1.1080387830734253, "correct_loss_per_char": 0.15426341692606607, "incorrect_loss_per_char": 0.2770096957683563, "correct_loss_per_token": 0.46279025077819824, "incorrect_loss_per_token": 1.1080387830734253, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1080387830734253, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": false, "logits_per_token": -1.1080387830734253, "logits_per_char": -0.2770096957683563, "num_chars": 4}, {"sum_logits": -0.46279025077819824, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": true, "logits_per_token": -0.46279025077819824, "logits_per_char": -0.15426341692606607, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 365, "native_id": 996, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.34935006499290466, "incorrect_loss_raw": 1.2848691940307617, "correct_loss_per_char": 0.11645002166430156, "incorrect_loss_per_char": 0.32121729850769043, "correct_loss_per_token": 0.34935006499290466, "incorrect_loss_per_token": 1.2848691940307617, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2848691940307617, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -1.2848691940307617, "logits_per_char": -0.32121729850769043, "num_chars": 4}, {"sum_logits": -0.34935006499290466, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.34935006499290466, "logits_per_char": -0.11645002166430156, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 366, "native_id": 2820, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6469547152519226, "incorrect_loss_raw": 0.8713507652282715, "correct_loss_per_char": 0.16173867881298065, "incorrect_loss_per_char": 0.2904502550760905, "correct_loss_per_token": 0.6469547152519226, "incorrect_loss_per_token": 0.8713507652282715, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6469547152519226, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": true, "logits_per_token": -0.6469547152519226, "logits_per_char": -0.16173867881298065, "num_chars": 4}, {"sum_logits": -0.8713507652282715, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": false, "logits_per_token": -0.8713507652282715, "logits_per_char": -0.2904502550760905, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 367, "native_id": 672, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7882089018821716, "incorrect_loss_raw": 0.6528501510620117, "correct_loss_per_char": 0.1970522254705429, "incorrect_loss_per_char": 0.21761671702067056, "correct_loss_per_token": 0.7882089018821716, "incorrect_loss_per_token": 0.6528501510620117, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7882089018821716, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -0.7882089018821716, "logits_per_char": -0.1970522254705429, "num_chars": 4}, {"sum_logits": -0.6528501510620117, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": true, "logits_per_token": -0.6528501510620117, "logits_per_char": -0.21761671702067056, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 368, "native_id": 2074, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.879254937171936, "incorrect_loss_raw": 0.6198775172233582, "correct_loss_per_char": 0.219813734292984, "incorrect_loss_per_char": 0.20662583907445273, "correct_loss_per_token": 0.879254937171936, "incorrect_loss_per_token": 0.6198775172233582, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.879254937171936, "num_tokens": 1, "num_tokens_all": 1291, "is_greedy": false, "logits_per_token": -0.879254937171936, "logits_per_char": -0.219813734292984, "num_chars": 4}, {"sum_logits": -0.6198775172233582, "num_tokens": 1, "num_tokens_all": 1291, "is_greedy": true, "logits_per_token": -0.6198775172233582, "logits_per_char": -0.20662583907445273, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 369, "native_id": 2068, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.162510871887207, "incorrect_loss_raw": 0.4178583323955536, "correct_loss_per_char": 0.29062771797180176, "incorrect_loss_per_char": 0.13928611079851785, "correct_loss_per_token": 1.162510871887207, "incorrect_loss_per_token": 0.4178583323955536, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.162510871887207, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": false, "logits_per_token": -1.162510871887207, "logits_per_char": -0.29062771797180176, "num_chars": 4}, {"sum_logits": -0.4178583323955536, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": true, "logits_per_token": -0.4178583323955536, "logits_per_char": -0.13928611079851785, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 370, "native_id": 2831, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6712390184402466, "incorrect_loss_raw": 0.7520115971565247, "correct_loss_per_char": 0.16780975461006165, "incorrect_loss_per_char": 0.25067053238550824, "correct_loss_per_token": 0.6712390184402466, "incorrect_loss_per_token": 0.7520115971565247, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6712390184402466, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.6712390184402466, "logits_per_char": -0.16780975461006165, "num_chars": 4}, {"sum_logits": -0.7520115971565247, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -0.7520115971565247, "logits_per_char": -0.25067053238550824, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 371, "native_id": 1610, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4611360430717468, "incorrect_loss_raw": 1.0887384414672852, "correct_loss_per_char": 0.1152840107679367, "incorrect_loss_per_char": 0.3629128138224284, "correct_loss_per_token": 0.4611360430717468, "incorrect_loss_per_token": 1.0887384414672852, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4611360430717468, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": true, "logits_per_token": -0.4611360430717468, "logits_per_char": -0.1152840107679367, "num_chars": 4}, {"sum_logits": -1.0887384414672852, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": false, "logits_per_token": -1.0887384414672852, "logits_per_char": -0.3629128138224284, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 372, "native_id": 1337, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3908234238624573, "incorrect_loss_raw": 1.360574722290039, "correct_loss_per_char": 0.09770585596561432, "incorrect_loss_per_char": 0.453524907430013, "correct_loss_per_token": 0.3908234238624573, "incorrect_loss_per_token": 1.360574722290039, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3908234238624573, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": true, "logits_per_token": -0.3908234238624573, "logits_per_char": -0.09770585596561432, "num_chars": 4}, {"sum_logits": -1.360574722290039, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -1.360574722290039, "logits_per_char": -0.453524907430013, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 373, "native_id": 528, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.63668292760849, "incorrect_loss_raw": 0.8199095129966736, "correct_loss_per_char": 0.1591707319021225, "incorrect_loss_per_char": 0.2733031709988912, "correct_loss_per_token": 0.63668292760849, "incorrect_loss_per_token": 0.8199095129966736, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.63668292760849, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": true, "logits_per_token": -0.63668292760849, "logits_per_char": -0.1591707319021225, "num_chars": 4}, {"sum_logits": -0.8199095129966736, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": false, "logits_per_token": -0.8199095129966736, "logits_per_char": -0.2733031709988912, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 374, "native_id": 2300, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7735143899917603, "incorrect_loss_raw": 0.6913877725601196, "correct_loss_per_char": 0.2578381299972534, "incorrect_loss_per_char": 0.1728469431400299, "correct_loss_per_token": 0.7735143899917603, "incorrect_loss_per_token": 0.6913877725601196, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6913877725601196, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -0.6913877725601196, "logits_per_char": -0.1728469431400299, "num_chars": 4}, {"sum_logits": -0.7735143899917603, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -0.7735143899917603, "logits_per_char": -0.2578381299972534, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 375, "native_id": 2319, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5721181035041809, "incorrect_loss_raw": 0.8867374658584595, "correct_loss_per_char": 0.19070603450139365, "incorrect_loss_per_char": 0.22168436646461487, "correct_loss_per_token": 0.5721181035041809, "incorrect_loss_per_token": 0.8867374658584595, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8867374658584595, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": false, "logits_per_token": -0.8867374658584595, "logits_per_char": -0.22168436646461487, "num_chars": 4}, {"sum_logits": -0.5721181035041809, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": true, "logits_per_token": -0.5721181035041809, "logits_per_char": -0.19070603450139365, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 376, "native_id": 2191, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6169304847717285, "incorrect_loss_raw": 0.8722680807113647, "correct_loss_per_char": 0.20564349492390951, "incorrect_loss_per_char": 0.2180670201778412, "correct_loss_per_token": 0.6169304847717285, "incorrect_loss_per_token": 0.8722680807113647, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8722680807113647, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": false, "logits_per_token": -0.8722680807113647, "logits_per_char": -0.2180670201778412, "num_chars": 4}, {"sum_logits": -0.6169304847717285, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": true, "logits_per_token": -0.6169304847717285, "logits_per_char": -0.20564349492390951, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 377, "native_id": 2499, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6194282174110413, "incorrect_loss_raw": 0.8295785784721375, "correct_loss_per_char": 0.20647607247034708, "incorrect_loss_per_char": 0.20739464461803436, "correct_loss_per_token": 0.6194282174110413, "incorrect_loss_per_token": 0.8295785784721375, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8295785784721375, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -0.8295785784721375, "logits_per_char": -0.20739464461803436, "num_chars": 4}, {"sum_logits": -0.6194282174110413, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -0.6194282174110413, "logits_per_char": -0.20647607247034708, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 378, "native_id": 2454, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17092275619506836, "incorrect_loss_raw": 2.1075501441955566, "correct_loss_per_char": 0.05697425206502279, "incorrect_loss_per_char": 0.5268875360488892, "correct_loss_per_token": 0.17092275619506836, "incorrect_loss_per_token": 2.1075501441955566, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -2.1075501441955566, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": false, "logits_per_token": -2.1075501441955566, "logits_per_char": -0.5268875360488892, "num_chars": 4}, {"sum_logits": -0.17092275619506836, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": true, "logits_per_token": -0.17092275619506836, "logits_per_char": -0.05697425206502279, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 379, "native_id": 1828, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4727226793766022, "incorrect_loss_raw": 1.0785136222839355, "correct_loss_per_char": 0.11818066984415054, "incorrect_loss_per_char": 0.35950454076131183, "correct_loss_per_token": 0.4727226793766022, "incorrect_loss_per_token": 1.0785136222839355, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4727226793766022, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -0.4727226793766022, "logits_per_char": -0.11818066984415054, "num_chars": 4}, {"sum_logits": -1.0785136222839355, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.0785136222839355, "logits_per_char": -0.35950454076131183, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 380, "native_id": 167, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9449637532234192, "incorrect_loss_raw": 0.5640719532966614, "correct_loss_per_char": 0.3149879177411397, "incorrect_loss_per_char": 0.14101798832416534, "correct_loss_per_token": 0.9449637532234192, "incorrect_loss_per_token": 0.5640719532966614, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5640719532966614, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.5640719532966614, "logits_per_char": -0.14101798832416534, "num_chars": 4}, {"sum_logits": -0.9449637532234192, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -0.9449637532234192, "logits_per_char": -0.3149879177411397, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 381, "native_id": 1522, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3851388096809387, "incorrect_loss_raw": 1.244776964187622, "correct_loss_per_char": 0.12837960322697958, "incorrect_loss_per_char": 0.3111942410469055, "correct_loss_per_token": 0.3851388096809387, "incorrect_loss_per_token": 1.244776964187622, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.244776964187622, "num_tokens": 1, "num_tokens_all": 840, "is_greedy": false, "logits_per_token": -1.244776964187622, "logits_per_char": -0.3111942410469055, "num_chars": 4}, {"sum_logits": -0.3851388096809387, "num_tokens": 1, "num_tokens_all": 840, "is_greedy": true, "logits_per_token": -0.3851388096809387, "logits_per_char": -0.12837960322697958, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 382, "native_id": 281, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5107961893081665, "incorrect_loss_raw": 1.031667947769165, "correct_loss_per_char": 0.12769904732704163, "incorrect_loss_per_char": 0.343889315923055, "correct_loss_per_token": 0.5107961893081665, "incorrect_loss_per_token": 1.031667947769165, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5107961893081665, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.5107961893081665, "logits_per_char": -0.12769904732704163, "num_chars": 4}, {"sum_logits": -1.031667947769165, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.031667947769165, "logits_per_char": -0.343889315923055, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 383, "native_id": 1511, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5492910146713257, "incorrect_loss_raw": 0.9401224851608276, "correct_loss_per_char": 0.13732275366783142, "incorrect_loss_per_char": 0.3133741617202759, "correct_loss_per_token": 0.5492910146713257, "incorrect_loss_per_token": 0.9401224851608276, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5492910146713257, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.5492910146713257, "logits_per_char": -0.13732275366783142, "num_chars": 4}, {"sum_logits": -0.9401224851608276, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -0.9401224851608276, "logits_per_char": -0.3133741617202759, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 384, "native_id": 2768, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6664085984230042, "incorrect_loss_raw": 1.0052623748779297, "correct_loss_per_char": 0.16660214960575104, "incorrect_loss_per_char": 0.33508745829264325, "correct_loss_per_token": 0.6664085984230042, "incorrect_loss_per_token": 1.0052623748779297, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6664085984230042, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.6664085984230042, "logits_per_char": -0.16660214960575104, "num_chars": 4}, {"sum_logits": -1.0052623748779297, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.0052623748779297, "logits_per_char": -0.33508745829264325, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 385, "native_id": 1672, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9516139030456543, "incorrect_loss_raw": 0.5764918327331543, "correct_loss_per_char": 0.23790347576141357, "incorrect_loss_per_char": 0.19216394424438477, "correct_loss_per_token": 0.9516139030456543, "incorrect_loss_per_token": 0.5764918327331543, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9516139030456543, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -0.9516139030456543, "logits_per_char": -0.23790347576141357, "num_chars": 4}, {"sum_logits": -0.5764918327331543, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.5764918327331543, "logits_per_char": -0.19216394424438477, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 386, "native_id": 182, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6069185733795166, "incorrect_loss_raw": 0.899529755115509, "correct_loss_per_char": 0.15172964334487915, "incorrect_loss_per_char": 0.2998432517051697, "correct_loss_per_token": 0.6069185733795166, "incorrect_loss_per_token": 0.899529755115509, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6069185733795166, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.6069185733795166, "logits_per_char": -0.15172964334487915, "num_chars": 4}, {"sum_logits": -0.899529755115509, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -0.899529755115509, "logits_per_char": -0.2998432517051697, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 387, "native_id": 2474, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5001148581504822, "incorrect_loss_raw": 0.9741668701171875, "correct_loss_per_char": 0.1667049527168274, "incorrect_loss_per_char": 0.24354171752929688, "correct_loss_per_token": 0.5001148581504822, "incorrect_loss_per_token": 0.9741668701171875, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9741668701171875, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -0.9741668701171875, "logits_per_char": -0.24354171752929688, "num_chars": 4}, {"sum_logits": -0.5001148581504822, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": true, "logits_per_token": -0.5001148581504822, "logits_per_char": -0.1667049527168274, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 388, "native_id": 2506, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.531574547290802, "incorrect_loss_raw": 0.9779995679855347, "correct_loss_per_char": 0.17719151576360068, "incorrect_loss_per_char": 0.24449989199638367, "correct_loss_per_token": 0.531574547290802, "incorrect_loss_per_token": 0.9779995679855347, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9779995679855347, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -0.9779995679855347, "logits_per_char": -0.24449989199638367, "num_chars": 4}, {"sum_logits": -0.531574547290802, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.531574547290802, "logits_per_char": -0.17719151576360068, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 389, "native_id": 290, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5124585032463074, "incorrect_loss_raw": 1.0278806686401367, "correct_loss_per_char": 0.17081950108210245, "incorrect_loss_per_char": 0.2569701671600342, "correct_loss_per_token": 0.5124585032463074, "incorrect_loss_per_token": 1.0278806686401367, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0278806686401367, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": false, "logits_per_token": -1.0278806686401367, "logits_per_char": -0.2569701671600342, "num_chars": 4}, {"sum_logits": -0.5124585032463074, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": true, "logits_per_token": -0.5124585032463074, "logits_per_char": -0.17081950108210245, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 390, "native_id": 1286, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7813587188720703, "incorrect_loss_raw": 0.6754129528999329, "correct_loss_per_char": 0.19533967971801758, "incorrect_loss_per_char": 0.2251376509666443, "correct_loss_per_token": 0.7813587188720703, "incorrect_loss_per_token": 0.6754129528999329, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7813587188720703, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -0.7813587188720703, "logits_per_char": -0.19533967971801758, "num_chars": 4}, {"sum_logits": -0.6754129528999329, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.6754129528999329, "logits_per_char": -0.2251376509666443, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 391, "native_id": 933, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.680769681930542, "incorrect_loss_raw": 0.7514618635177612, "correct_loss_per_char": 0.1701924204826355, "incorrect_loss_per_char": 0.2504872878392537, "correct_loss_per_token": 0.680769681930542, "incorrect_loss_per_token": 0.7514618635177612, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.680769681930542, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.680769681930542, "logits_per_char": -0.1701924204826355, "num_chars": 4}, {"sum_logits": -0.7514618635177612, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -0.7514618635177612, "logits_per_char": -0.2504872878392537, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 392, "native_id": 3249, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6565219163894653, "incorrect_loss_raw": 0.7726645469665527, "correct_loss_per_char": 0.16413047909736633, "incorrect_loss_per_char": 0.2575548489888509, "correct_loss_per_token": 0.6565219163894653, "incorrect_loss_per_token": 0.7726645469665527, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6565219163894653, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": true, "logits_per_token": -0.6565219163894653, "logits_per_char": -0.16413047909736633, "num_chars": 4}, {"sum_logits": -0.7726645469665527, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": false, "logits_per_token": -0.7726645469665527, "logits_per_char": -0.2575548489888509, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 393, "native_id": 811, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.699615478515625, "incorrect_loss_raw": 0.768075704574585, "correct_loss_per_char": 0.17490386962890625, "incorrect_loss_per_char": 0.256025234858195, "correct_loss_per_token": 0.699615478515625, "incorrect_loss_per_token": 0.768075704574585, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.699615478515625, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": true, "logits_per_token": -0.699615478515625, "logits_per_char": -0.17490386962890625, "num_chars": 4}, {"sum_logits": -0.768075704574585, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -0.768075704574585, "logits_per_char": -0.256025234858195, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 394, "native_id": 3251, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.758733332157135, "incorrect_loss_raw": 0.6742095947265625, "correct_loss_per_char": 0.18968333303928375, "incorrect_loss_per_char": 0.22473653157552084, "correct_loss_per_token": 0.758733332157135, "incorrect_loss_per_token": 0.6742095947265625, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.758733332157135, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": false, "logits_per_token": -0.758733332157135, "logits_per_char": -0.18968333303928375, "num_chars": 4}, {"sum_logits": -0.6742095947265625, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": true, "logits_per_token": -0.6742095947265625, "logits_per_char": -0.22473653157552084, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 395, "native_id": 2135, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.43809837102890015, "incorrect_loss_raw": 1.108384132385254, "correct_loss_per_char": 0.10952459275722504, "incorrect_loss_per_char": 0.3694613774617513, "correct_loss_per_token": 0.43809837102890015, "incorrect_loss_per_token": 1.108384132385254, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.43809837102890015, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": true, "logits_per_token": -0.43809837102890015, "logits_per_char": -0.10952459275722504, "num_chars": 4}, {"sum_logits": -1.108384132385254, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": false, "logits_per_token": -1.108384132385254, "logits_per_char": -0.3694613774617513, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 396, "native_id": 2822, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5723022222518921, "incorrect_loss_raw": 1.0425398349761963, "correct_loss_per_char": 0.19076740741729736, "incorrect_loss_per_char": 0.2606349587440491, "correct_loss_per_token": 0.5723022222518921, "incorrect_loss_per_token": 1.0425398349761963, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0425398349761963, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.0425398349761963, "logits_per_char": -0.2606349587440491, "num_chars": 4}, {"sum_logits": -0.5723022222518921, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.5723022222518921, "logits_per_char": -0.19076740741729736, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 397, "native_id": 1555, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2535758018493652, "incorrect_loss_raw": 0.45330238342285156, "correct_loss_per_char": 0.3133939504623413, "incorrect_loss_per_char": 0.15110079447428384, "correct_loss_per_token": 1.2535758018493652, "incorrect_loss_per_token": 0.45330238342285156, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2535758018493652, "num_tokens": 1, "num_tokens_all": 845, "is_greedy": false, "logits_per_token": -1.2535758018493652, "logits_per_char": -0.3133939504623413, "num_chars": 4}, {"sum_logits": -0.45330238342285156, "num_tokens": 1, "num_tokens_all": 845, "is_greedy": true, "logits_per_token": -0.45330238342285156, "logits_per_char": -0.15110079447428384, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 398, "native_id": 2415, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.09384286403656, "incorrect_loss_raw": 0.4689556956291199, "correct_loss_per_char": 0.27346071600914, "incorrect_loss_per_char": 0.15631856520970663, "correct_loss_per_token": 1.09384286403656, "incorrect_loss_per_token": 0.4689556956291199, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.09384286403656, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.09384286403656, "logits_per_char": -0.27346071600914, "num_chars": 4}, {"sum_logits": -0.4689556956291199, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.4689556956291199, "logits_per_char": -0.15631856520970663, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 399, "native_id": 2018, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.34388047456741333, "incorrect_loss_raw": 1.3408950567245483, "correct_loss_per_char": 0.08597011864185333, "incorrect_loss_per_char": 0.4469650189081828, "correct_loss_per_token": 0.34388047456741333, "incorrect_loss_per_token": 1.3408950567245483, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.34388047456741333, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.34388047456741333, "logits_per_char": -0.08597011864185333, "num_chars": 4}, {"sum_logits": -1.3408950567245483, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.3408950567245483, "logits_per_char": -0.4469650189081828, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 400, "native_id": 214, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7108854651451111, "incorrect_loss_raw": 0.7571834325790405, "correct_loss_per_char": 0.23696182171503702, "incorrect_loss_per_char": 0.18929585814476013, "correct_loss_per_token": 0.7108854651451111, "incorrect_loss_per_token": 0.7571834325790405, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7571834325790405, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": false, "logits_per_token": -0.7571834325790405, "logits_per_char": -0.18929585814476013, "num_chars": 4}, {"sum_logits": -0.7108854651451111, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": true, "logits_per_token": -0.7108854651451111, "logits_per_char": -0.23696182171503702, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 401, "native_id": 122, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6660645008087158, "incorrect_loss_raw": 0.7775543928146362, "correct_loss_per_char": 0.22202150026957193, "incorrect_loss_per_char": 0.19438859820365906, "correct_loss_per_token": 0.6660645008087158, "incorrect_loss_per_token": 0.7775543928146362, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7775543928146362, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -0.7775543928146362, "logits_per_char": -0.19438859820365906, "num_chars": 4}, {"sum_logits": -0.6660645008087158, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.6660645008087158, "logits_per_char": -0.22202150026957193, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 402, "native_id": 1835, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7504832744598389, "incorrect_loss_raw": 0.7124735116958618, "correct_loss_per_char": 0.18762081861495972, "incorrect_loss_per_char": 0.23749117056528726, "correct_loss_per_token": 0.7504832744598389, "incorrect_loss_per_token": 0.7124735116958618, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7504832744598389, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": false, "logits_per_token": -0.7504832744598389, "logits_per_char": -0.18762081861495972, "num_chars": 4}, {"sum_logits": -0.7124735116958618, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": true, "logits_per_token": -0.7124735116958618, "logits_per_char": -0.23749117056528726, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 403, "native_id": 328, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7679969072341919, "incorrect_loss_raw": 0.6858379244804382, "correct_loss_per_char": 0.19199922680854797, "incorrect_loss_per_char": 0.2286126414934794, "correct_loss_per_token": 0.7679969072341919, "incorrect_loss_per_token": 0.6858379244804382, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7679969072341919, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": false, "logits_per_token": -0.7679969072341919, "logits_per_char": -0.19199922680854797, "num_chars": 4}, {"sum_logits": -0.6858379244804382, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": true, "logits_per_token": -0.6858379244804382, "logits_per_char": -0.2286126414934794, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 404, "native_id": 1200, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7926110029220581, "incorrect_loss_raw": 0.6455374360084534, "correct_loss_per_char": 0.19815275073051453, "incorrect_loss_per_char": 0.21517914533615112, "correct_loss_per_token": 0.7926110029220581, "incorrect_loss_per_token": 0.6455374360084534, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7926110029220581, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -0.7926110029220581, "logits_per_char": -0.19815275073051453, "num_chars": 4}, {"sum_logits": -0.6455374360084534, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": true, "logits_per_token": -0.6455374360084534, "logits_per_char": -0.21517914533615112, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 405, "native_id": 3107, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.16887539625167847, "incorrect_loss_raw": 2.356780767440796, "correct_loss_per_char": 0.05629179875055949, "incorrect_loss_per_char": 0.589195191860199, "correct_loss_per_token": 0.16887539625167847, "incorrect_loss_per_token": 2.356780767440796, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -2.356780767440796, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": false, "logits_per_token": -2.356780767440796, "logits_per_char": -0.589195191860199, "num_chars": 4}, {"sum_logits": -0.16887539625167847, "num_tokens": 1, "num_tokens_all": 941, "is_greedy": true, "logits_per_token": -0.16887539625167847, "logits_per_char": -0.05629179875055949, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 406, "native_id": 1393, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.34800267219543457, "incorrect_loss_raw": 1.3917274475097656, "correct_loss_per_char": 0.11600089073181152, "incorrect_loss_per_char": 0.3479318618774414, "correct_loss_per_token": 0.34800267219543457, "incorrect_loss_per_token": 1.3917274475097656, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3917274475097656, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -1.3917274475097656, "logits_per_char": -0.3479318618774414, "num_chars": 4}, {"sum_logits": -0.34800267219543457, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.34800267219543457, "logits_per_char": -0.11600089073181152, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 407, "native_id": 605, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6010692715644836, "incorrect_loss_raw": 0.8836425542831421, "correct_loss_per_char": 0.20035642385482788, "incorrect_loss_per_char": 0.22091063857078552, "correct_loss_per_token": 0.6010692715644836, "incorrect_loss_per_token": 0.8836425542831421, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8836425542831421, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": false, "logits_per_token": -0.8836425542831421, "logits_per_char": -0.22091063857078552, "num_chars": 4}, {"sum_logits": -0.6010692715644836, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": true, "logits_per_token": -0.6010692715644836, "logits_per_char": -0.20035642385482788, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 408, "native_id": 1991, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.848857045173645, "incorrect_loss_raw": 0.6557144522666931, "correct_loss_per_char": 0.21221426129341125, "incorrect_loss_per_char": 0.2185714840888977, "correct_loss_per_token": 0.848857045173645, "incorrect_loss_per_token": 0.6557144522666931, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.848857045173645, "num_tokens": 1, "num_tokens_all": 834, "is_greedy": false, "logits_per_token": -0.848857045173645, "logits_per_char": -0.21221426129341125, "num_chars": 4}, {"sum_logits": -0.6557144522666931, "num_tokens": 1, "num_tokens_all": 834, "is_greedy": true, "logits_per_token": -0.6557144522666931, "logits_per_char": -0.2185714840888977, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 409, "native_id": 2772, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6198821067810059, "incorrect_loss_raw": 0.2470090538263321, "correct_loss_per_char": 0.40497052669525146, "incorrect_loss_per_char": 0.08233635127544403, "correct_loss_per_token": 1.6198821067810059, "incorrect_loss_per_token": 0.2470090538263321, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6198821067810059, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.6198821067810059, "logits_per_char": -0.40497052669525146, "num_chars": 4}, {"sum_logits": -0.2470090538263321, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -0.2470090538263321, "logits_per_char": -0.08233635127544403, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 410, "native_id": 2665, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32577571272850037, "incorrect_loss_raw": 1.4678784608840942, "correct_loss_per_char": 0.10859190424283345, "incorrect_loss_per_char": 0.36696961522102356, "correct_loss_per_token": 0.32577571272850037, "incorrect_loss_per_token": 1.4678784608840942, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4678784608840942, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.4678784608840942, "logits_per_char": -0.36696961522102356, "num_chars": 4}, {"sum_logits": -0.32577571272850037, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": true, "logits_per_token": -0.32577571272850037, "logits_per_char": -0.10859190424283345, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 411, "native_id": 991, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7650057077407837, "incorrect_loss_raw": 0.7454512119293213, "correct_loss_per_char": 0.19125142693519592, "incorrect_loss_per_char": 0.24848373730977377, "correct_loss_per_token": 0.7650057077407837, "incorrect_loss_per_token": 0.7454512119293213, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7650057077407837, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": false, "logits_per_token": -0.7650057077407837, "logits_per_char": -0.19125142693519592, "num_chars": 4}, {"sum_logits": -0.7454512119293213, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": true, "logits_per_token": -0.7454512119293213, "logits_per_char": -0.24848373730977377, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 412, "native_id": 3261, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6622162461280823, "incorrect_loss_raw": 0.7763433456420898, "correct_loss_per_char": 0.16555406153202057, "incorrect_loss_per_char": 0.25878111521402997, "correct_loss_per_token": 0.6622162461280823, "incorrect_loss_per_token": 0.7763433456420898, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6622162461280823, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.6622162461280823, "logits_per_char": -0.16555406153202057, "num_chars": 4}, {"sum_logits": -0.7763433456420898, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -0.7763433456420898, "logits_per_char": -0.25878111521402997, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 413, "native_id": 2868, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.47498172521591187, "incorrect_loss_raw": 1.1645885705947876, "correct_loss_per_char": 0.1583272417386373, "incorrect_loss_per_char": 0.2911471426486969, "correct_loss_per_token": 0.47498172521591187, "incorrect_loss_per_token": 1.1645885705947876, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1645885705947876, "num_tokens": 1, "num_tokens_all": 1185, "is_greedy": false, "logits_per_token": -1.1645885705947876, "logits_per_char": -0.2911471426486969, "num_chars": 4}, {"sum_logits": -0.47498172521591187, "num_tokens": 1, "num_tokens_all": 1185, "is_greedy": true, "logits_per_token": -0.47498172521591187, "logits_per_char": -0.1583272417386373, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 414, "native_id": 1460, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21976661682128906, "incorrect_loss_raw": 1.73660147190094, "correct_loss_per_char": 0.07325553894042969, "incorrect_loss_per_char": 0.434150367975235, "correct_loss_per_token": 0.21976661682128906, "incorrect_loss_per_token": 1.73660147190094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.73660147190094, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.73660147190094, "logits_per_char": -0.434150367975235, "num_chars": 4}, {"sum_logits": -0.21976661682128906, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.21976661682128906, "logits_per_char": -0.07325553894042969, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 415, "native_id": 3005, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.8927183151245117, "incorrect_loss_raw": 0.20155717432498932, "correct_loss_per_char": 0.47317957878112793, "incorrect_loss_per_char": 0.06718572477499644, "correct_loss_per_token": 1.8927183151245117, "incorrect_loss_per_token": 0.20155717432498932, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.8927183151245117, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -1.8927183151245117, "logits_per_char": -0.47317957878112793, "num_chars": 4}, {"sum_logits": -0.20155717432498932, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": true, "logits_per_token": -0.20155717432498932, "logits_per_char": -0.06718572477499644, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 416, "native_id": 1521, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7855288982391357, "incorrect_loss_raw": 0.6780098676681519, "correct_loss_per_char": 0.19638222455978394, "incorrect_loss_per_char": 0.22600328922271729, "correct_loss_per_token": 0.7855288982391357, "incorrect_loss_per_token": 0.6780098676681519, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7855288982391357, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -0.7855288982391357, "logits_per_char": -0.19638222455978394, "num_chars": 4}, {"sum_logits": -0.6780098676681519, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.6780098676681519, "logits_per_char": -0.22600328922271729, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 417, "native_id": 1699, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6655709743499756, "incorrect_loss_raw": 0.23682215809822083, "correct_loss_per_char": 0.4163927435874939, "incorrect_loss_per_char": 0.07894071936607361, "correct_loss_per_token": 1.6655709743499756, "incorrect_loss_per_token": 0.23682215809822083, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6655709743499756, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": false, "logits_per_token": -1.6655709743499756, "logits_per_char": -0.4163927435874939, "num_chars": 4}, {"sum_logits": -0.23682215809822083, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": true, "logits_per_token": -0.23682215809822083, "logits_per_char": -0.07894071936607361, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 418, "native_id": 712, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7372041940689087, "incorrect_loss_raw": 0.7074756622314453, "correct_loss_per_char": 0.2457347313563029, "incorrect_loss_per_char": 0.17686891555786133, "correct_loss_per_token": 0.7372041940689087, "incorrect_loss_per_token": 0.7074756622314453, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7074756622314453, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": true, "logits_per_token": -0.7074756622314453, "logits_per_char": -0.17686891555786133, "num_chars": 4}, {"sum_logits": -0.7372041940689087, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": false, "logits_per_token": -0.7372041940689087, "logits_per_char": -0.2457347313563029, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 419, "native_id": 305, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5056807994842529, "incorrect_loss_raw": 0.9801719188690186, "correct_loss_per_char": 0.12642019987106323, "incorrect_loss_per_char": 0.32672397295633954, "correct_loss_per_token": 0.5056807994842529, "incorrect_loss_per_token": 0.9801719188690186, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5056807994842529, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": true, "logits_per_token": -0.5056807994842529, "logits_per_char": -0.12642019987106323, "num_chars": 4}, {"sum_logits": -0.9801719188690186, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": false, "logits_per_token": -0.9801719188690186, "logits_per_char": -0.32672397295633954, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 420, "native_id": 2619, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9025362730026245, "incorrect_loss_raw": 0.558426022529602, "correct_loss_per_char": 0.22563406825065613, "incorrect_loss_per_char": 0.18614200750986734, "correct_loss_per_token": 0.9025362730026245, "incorrect_loss_per_token": 0.558426022529602, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9025362730026245, "num_tokens": 1, "num_tokens_all": 853, "is_greedy": false, "logits_per_token": -0.9025362730026245, "logits_per_char": -0.22563406825065613, "num_chars": 4}, {"sum_logits": -0.558426022529602, "num_tokens": 1, "num_tokens_all": 853, "is_greedy": true, "logits_per_token": -0.558426022529602, "logits_per_char": -0.18614200750986734, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 421, "native_id": 72, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6338682174682617, "incorrect_loss_raw": 0.845964789390564, "correct_loss_per_char": 0.2112894058227539, "incorrect_loss_per_char": 0.211491197347641, "correct_loss_per_token": 0.6338682174682617, "incorrect_loss_per_token": 0.845964789390564, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.845964789390564, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -0.845964789390564, "logits_per_char": -0.211491197347641, "num_chars": 4}, {"sum_logits": -0.6338682174682617, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -0.6338682174682617, "logits_per_char": -0.2112894058227539, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 422, "native_id": 869, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24388302862644196, "incorrect_loss_raw": 1.7290807962417603, "correct_loss_per_char": 0.08129434287548065, "incorrect_loss_per_char": 0.43227019906044006, "correct_loss_per_token": 0.24388302862644196, "incorrect_loss_per_token": 1.7290807962417603, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7290807962417603, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": false, "logits_per_token": -1.7290807962417603, "logits_per_char": -0.43227019906044006, "num_chars": 4}, {"sum_logits": -0.24388302862644196, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": true, "logits_per_token": -0.24388302862644196, "logits_per_char": -0.08129434287548065, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 423, "native_id": 804, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5865994095802307, "incorrect_loss_raw": 1.0075446367263794, "correct_loss_per_char": 0.19553313652674356, "incorrect_loss_per_char": 0.25188615918159485, "correct_loss_per_token": 0.5865994095802307, "incorrect_loss_per_token": 1.0075446367263794, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0075446367263794, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.0075446367263794, "logits_per_char": -0.25188615918159485, "num_chars": 4}, {"sum_logits": -0.5865994095802307, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.5865994095802307, "logits_per_char": -0.19553313652674356, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 424, "native_id": 2478, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.404741495847702, "incorrect_loss_raw": 1.1955504417419434, "correct_loss_per_char": 0.134913831949234, "incorrect_loss_per_char": 0.29888761043548584, "correct_loss_per_token": 0.404741495847702, "incorrect_loss_per_token": 1.1955504417419434, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1955504417419434, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": false, "logits_per_token": -1.1955504417419434, "logits_per_char": -0.29888761043548584, "num_chars": 4}, {"sum_logits": -0.404741495847702, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": true, "logits_per_token": -0.404741495847702, "logits_per_char": -0.134913831949234, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 425, "native_id": 2541, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21747595071792603, "incorrect_loss_raw": 1.8085439205169678, "correct_loss_per_char": 0.07249198357264201, "incorrect_loss_per_char": 0.45213598012924194, "correct_loss_per_token": 0.21747595071792603, "incorrect_loss_per_token": 1.8085439205169678, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.8085439205169678, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -1.8085439205169678, "logits_per_char": -0.45213598012924194, "num_chars": 4}, {"sum_logits": -0.21747595071792603, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -0.21747595071792603, "logits_per_char": -0.07249198357264201, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 426, "native_id": 2242, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6403695344924927, "incorrect_loss_raw": 0.8477491736412048, "correct_loss_per_char": 0.21345651149749756, "incorrect_loss_per_char": 0.2119372934103012, "correct_loss_per_token": 0.6403695344924927, "incorrect_loss_per_token": 0.8477491736412048, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8477491736412048, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": false, "logits_per_token": -0.8477491736412048, "logits_per_char": -0.2119372934103012, "num_chars": 4}, {"sum_logits": -0.6403695344924927, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": true, "logits_per_token": -0.6403695344924927, "logits_per_char": -0.21345651149749756, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 427, "native_id": 579, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7126614451408386, "incorrect_loss_raw": 0.7692539691925049, "correct_loss_per_char": 0.17816536128520966, "incorrect_loss_per_char": 0.25641798973083496, "correct_loss_per_token": 0.7126614451408386, "incorrect_loss_per_token": 0.7692539691925049, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7126614451408386, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.7126614451408386, "logits_per_char": -0.17816536128520966, "num_chars": 4}, {"sum_logits": -0.7692539691925049, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -0.7692539691925049, "logits_per_char": -0.25641798973083496, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 428, "native_id": 2055, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8189631700515747, "incorrect_loss_raw": 0.6978311538696289, "correct_loss_per_char": 0.20474079251289368, "incorrect_loss_per_char": 0.23261038462320963, "correct_loss_per_token": 0.8189631700515747, "incorrect_loss_per_token": 0.6978311538696289, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8189631700515747, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": false, "logits_per_token": -0.8189631700515747, "logits_per_char": -0.20474079251289368, "num_chars": 4}, {"sum_logits": -0.6978311538696289, "num_tokens": 1, "num_tokens_all": 954, "is_greedy": true, "logits_per_token": -0.6978311538696289, "logits_per_char": -0.23261038462320963, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 429, "native_id": 542, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8621364831924438, "incorrect_loss_raw": 0.5933660268783569, "correct_loss_per_char": 0.21553412079811096, "incorrect_loss_per_char": 0.197788675626119, "correct_loss_per_token": 0.8621364831924438, "incorrect_loss_per_token": 0.5933660268783569, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8621364831924438, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": false, "logits_per_token": -0.8621364831924438, "logits_per_char": -0.21553412079811096, "num_chars": 4}, {"sum_logits": -0.5933660268783569, "num_tokens": 1, "num_tokens_all": 1038, "is_greedy": true, "logits_per_token": -0.5933660268783569, "logits_per_char": -0.197788675626119, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 430, "native_id": 2761, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6479929685592651, "incorrect_loss_raw": 0.8281381726264954, "correct_loss_per_char": 0.16199824213981628, "incorrect_loss_per_char": 0.2760460575421651, "correct_loss_per_token": 0.6479929685592651, "incorrect_loss_per_token": 0.8281381726264954, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6479929685592651, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": true, "logits_per_token": -0.6479929685592651, "logits_per_char": -0.16199824213981628, "num_chars": 4}, {"sum_logits": -0.8281381726264954, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": false, "logits_per_token": -0.8281381726264954, "logits_per_char": -0.2760460575421651, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 431, "native_id": 1043, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8583269715309143, "incorrect_loss_raw": 0.6013689637184143, "correct_loss_per_char": 0.21458174288272858, "incorrect_loss_per_char": 0.20045632123947144, "correct_loss_per_token": 0.8583269715309143, "incorrect_loss_per_token": 0.6013689637184143, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8583269715309143, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -0.8583269715309143, "logits_per_char": -0.21458174288272858, "num_chars": 4}, {"sum_logits": -0.6013689637184143, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.6013689637184143, "logits_per_char": -0.20045632123947144, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 432, "native_id": 2667, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0524269342422485, "incorrect_loss_raw": 0.502149224281311, "correct_loss_per_char": 0.26310673356056213, "incorrect_loss_per_char": 0.167383074760437, "correct_loss_per_token": 1.0524269342422485, "incorrect_loss_per_token": 0.502149224281311, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0524269342422485, "num_tokens": 1, "num_tokens_all": 864, "is_greedy": false, "logits_per_token": -1.0524269342422485, "logits_per_char": -0.26310673356056213, "num_chars": 4}, {"sum_logits": -0.502149224281311, "num_tokens": 1, "num_tokens_all": 864, "is_greedy": true, "logits_per_token": -0.502149224281311, "logits_per_char": -0.167383074760437, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 433, "native_id": 202, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.315425992012024, "incorrect_loss_raw": 0.3651773929595947, "correct_loss_per_char": 0.328856498003006, "incorrect_loss_per_char": 0.12172579765319824, "correct_loss_per_token": 1.315425992012024, "incorrect_loss_per_token": 0.3651773929595947, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.315425992012024, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": false, "logits_per_token": -1.315425992012024, "logits_per_char": -0.328856498003006, "num_chars": 4}, {"sum_logits": -0.3651773929595947, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": true, "logits_per_token": -0.3651773929595947, "logits_per_char": -0.12172579765319824, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 434, "native_id": 2457, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0256929397583008, "incorrect_loss_raw": 0.5086688995361328, "correct_loss_per_char": 0.2564232349395752, "incorrect_loss_per_char": 0.1695562998453776, "correct_loss_per_token": 1.0256929397583008, "incorrect_loss_per_token": 0.5086688995361328, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0256929397583008, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": false, "logits_per_token": -1.0256929397583008, "logits_per_char": -0.2564232349395752, "num_chars": 4}, {"sum_logits": -0.5086688995361328, "num_tokens": 1, "num_tokens_all": 830, "is_greedy": true, "logits_per_token": -0.5086688995361328, "logits_per_char": -0.1695562998453776, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 435, "native_id": 3163, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6097080111503601, "incorrect_loss_raw": 0.8650093674659729, "correct_loss_per_char": 0.15242700278759003, "incorrect_loss_per_char": 0.28833645582199097, "correct_loss_per_token": 0.6097080111503601, "incorrect_loss_per_token": 0.8650093674659729, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6097080111503601, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": true, "logits_per_token": -0.6097080111503601, "logits_per_char": -0.15242700278759003, "num_chars": 4}, {"sum_logits": -0.8650093674659729, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": false, "logits_per_token": -0.8650093674659729, "logits_per_char": -0.28833645582199097, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 436, "native_id": 1480, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6320553421974182, "incorrect_loss_raw": 0.8412205576896667, "correct_loss_per_char": 0.15801383554935455, "incorrect_loss_per_char": 0.28040685256322223, "correct_loss_per_token": 0.6320553421974182, "incorrect_loss_per_token": 0.8412205576896667, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6320553421974182, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": true, "logits_per_token": -0.6320553421974182, "logits_per_char": -0.15801383554935455, "num_chars": 4}, {"sum_logits": -0.8412205576896667, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": false, "logits_per_token": -0.8412205576896667, "logits_per_char": -0.28040685256322223, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 437, "native_id": 2448, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5975664258003235, "incorrect_loss_raw": 0.8950254321098328, "correct_loss_per_char": 0.14939160645008087, "incorrect_loss_per_char": 0.2983418107032776, "correct_loss_per_token": 0.5975664258003235, "incorrect_loss_per_token": 0.8950254321098328, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5975664258003235, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.5975664258003235, "logits_per_char": -0.14939160645008087, "num_chars": 4}, {"sum_logits": -0.8950254321098328, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -0.8950254321098328, "logits_per_char": -0.2983418107032776, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 438, "native_id": 2888, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.47082123160362244, "incorrect_loss_raw": 1.1750839948654175, "correct_loss_per_char": 0.11770530790090561, "incorrect_loss_per_char": 0.39169466495513916, "correct_loss_per_token": 0.47082123160362244, "incorrect_loss_per_token": 1.1750839948654175, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.47082123160362244, "num_tokens": 1, "num_tokens_all": 1006, "is_greedy": true, "logits_per_token": -0.47082123160362244, "logits_per_char": -0.11770530790090561, "num_chars": 4}, {"sum_logits": -1.1750839948654175, "num_tokens": 1, "num_tokens_all": 1006, "is_greedy": false, "logits_per_token": -1.1750839948654175, "logits_per_char": -0.39169466495513916, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 439, "native_id": 1181, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6270876526832581, "incorrect_loss_raw": 0.8724478483200073, "correct_loss_per_char": 0.15677191317081451, "incorrect_loss_per_char": 0.29081594944000244, "correct_loss_per_token": 0.6270876526832581, "incorrect_loss_per_token": 0.8724478483200073, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6270876526832581, "num_tokens": 1, "num_tokens_all": 1161, "is_greedy": true, "logits_per_token": -0.6270876526832581, "logits_per_char": -0.15677191317081451, "num_chars": 4}, {"sum_logits": -0.8724478483200073, "num_tokens": 1, "num_tokens_all": 1161, "is_greedy": false, "logits_per_token": -0.8724478483200073, "logits_per_char": -0.29081594944000244, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 440, "native_id": 3104, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.48324233293533325, "incorrect_loss_raw": 1.0602061748504639, "correct_loss_per_char": 0.12081058323383331, "incorrect_loss_per_char": 0.353402058283488, "correct_loss_per_token": 0.48324233293533325, "incorrect_loss_per_token": 1.0602061748504639, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.48324233293533325, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.48324233293533325, "logits_per_char": -0.12081058323383331, "num_chars": 4}, {"sum_logits": -1.0602061748504639, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.0602061748504639, "logits_per_char": -0.353402058283488, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 441, "native_id": 1671, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9521448612213135, "incorrect_loss_raw": 0.5454471111297607, "correct_loss_per_char": 0.23803621530532837, "incorrect_loss_per_char": 0.18181570370992026, "correct_loss_per_token": 0.9521448612213135, "incorrect_loss_per_token": 0.5454471111297607, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9521448612213135, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -0.9521448612213135, "logits_per_char": -0.23803621530532837, "num_chars": 4}, {"sum_logits": -0.5454471111297607, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": true, "logits_per_token": -0.5454471111297607, "logits_per_char": -0.18181570370992026, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 442, "native_id": 1506, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.46352633833885193, "incorrect_loss_raw": 1.0499259233474731, "correct_loss_per_char": 0.15450877944628397, "incorrect_loss_per_char": 0.2624814808368683, "correct_loss_per_token": 0.46352633833885193, "incorrect_loss_per_token": 1.0499259233474731, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0499259233474731, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": false, "logits_per_token": -1.0499259233474731, "logits_per_char": -0.2624814808368683, "num_chars": 4}, {"sum_logits": -0.46352633833885193, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": true, "logits_per_token": -0.46352633833885193, "logits_per_char": -0.15450877944628397, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 443, "native_id": 959, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.246778130531311, "incorrect_loss_raw": 0.37995317578315735, "correct_loss_per_char": 0.31169453263282776, "incorrect_loss_per_char": 0.12665105859438577, "correct_loss_per_token": 1.246778130531311, "incorrect_loss_per_token": 0.37995317578315735, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.246778130531311, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": false, "logits_per_token": -1.246778130531311, "logits_per_char": -0.31169453263282776, "num_chars": 4}, {"sum_logits": -0.37995317578315735, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": true, "logits_per_token": -0.37995317578315735, "logits_per_char": -0.12665105859438577, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 444, "native_id": 1168, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9273661375045776, "incorrect_loss_raw": 0.5472694635391235, "correct_loss_per_char": 0.2318415343761444, "incorrect_loss_per_char": 0.18242315451304117, "correct_loss_per_token": 0.9273661375045776, "incorrect_loss_per_token": 0.5472694635391235, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9273661375045776, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -0.9273661375045776, "logits_per_char": -0.2318415343761444, "num_chars": 4}, {"sum_logits": -0.5472694635391235, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.5472694635391235, "logits_per_char": -0.18242315451304117, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 445, "native_id": 35, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5636786222457886, "incorrect_loss_raw": 0.4037497341632843, "correct_loss_per_char": 0.39091965556144714, "incorrect_loss_per_char": 0.13458324472109476, "correct_loss_per_token": 1.5636786222457886, "incorrect_loss_per_token": 0.4037497341632843, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5636786222457886, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": false, "logits_per_token": -1.5636786222457886, "logits_per_char": -0.39091965556144714, "num_chars": 4}, {"sum_logits": -0.4037497341632843, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": true, "logits_per_token": -0.4037497341632843, "logits_per_char": -0.13458324472109476, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 446, "native_id": 1281, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2876809239387512, "incorrect_loss_raw": 1.4976422786712646, "correct_loss_per_char": 0.09589364131291707, "incorrect_loss_per_char": 0.37441056966781616, "correct_loss_per_token": 0.2876809239387512, "incorrect_loss_per_token": 1.4976422786712646, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4976422786712646, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": false, "logits_per_token": -1.4976422786712646, "logits_per_char": -0.37441056966781616, "num_chars": 4}, {"sum_logits": -0.2876809239387512, "num_tokens": 1, "num_tokens_all": 964, "is_greedy": true, "logits_per_token": -0.2876809239387512, "logits_per_char": -0.09589364131291707, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 447, "native_id": 2975, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8313233256340027, "incorrect_loss_raw": 0.6426844596862793, "correct_loss_per_char": 0.20783083140850067, "incorrect_loss_per_char": 0.21422815322875977, "correct_loss_per_token": 0.8313233256340027, "incorrect_loss_per_token": 0.6426844596862793, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8313233256340027, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -0.8313233256340027, "logits_per_char": -0.20783083140850067, "num_chars": 4}, {"sum_logits": -0.6426844596862793, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.6426844596862793, "logits_per_char": -0.21422815322875977, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 448, "native_id": 1089, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27201128005981445, "incorrect_loss_raw": 1.5065943002700806, "correct_loss_per_char": 0.09067042668660481, "incorrect_loss_per_char": 0.37664857506752014, "correct_loss_per_token": 0.27201128005981445, "incorrect_loss_per_token": 1.5065943002700806, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5065943002700806, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": false, "logits_per_token": -1.5065943002700806, "logits_per_char": -0.37664857506752014, "num_chars": 4}, {"sum_logits": -0.27201128005981445, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": true, "logits_per_token": -0.27201128005981445, "logits_per_char": -0.09067042668660481, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 449, "native_id": 493, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6717838644981384, "incorrect_loss_raw": 0.8443378210067749, "correct_loss_per_char": 0.2239279548327128, "incorrect_loss_per_char": 0.21108445525169373, "correct_loss_per_token": 0.6717838644981384, "incorrect_loss_per_token": 0.8443378210067749, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8443378210067749, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": false, "logits_per_token": -0.8443378210067749, "logits_per_char": -0.21108445525169373, "num_chars": 4}, {"sum_logits": -0.6717838644981384, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": true, "logits_per_token": -0.6717838644981384, "logits_per_char": -0.2239279548327128, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 450, "native_id": 2229, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6131733059883118, "incorrect_loss_raw": 0.8513397574424744, "correct_loss_per_char": 0.15329332649707794, "incorrect_loss_per_char": 0.28377991914749146, "correct_loss_per_token": 0.6131733059883118, "incorrect_loss_per_token": 0.8513397574424744, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6131733059883118, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": true, "logits_per_token": -0.6131733059883118, "logits_per_char": -0.15329332649707794, "num_chars": 4}, {"sum_logits": -0.8513397574424744, "num_tokens": 1, "num_tokens_all": 972, "is_greedy": false, "logits_per_token": -0.8513397574424744, "logits_per_char": -0.28377991914749146, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 451, "native_id": 2835, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6450767517089844, "incorrect_loss_raw": 0.8007673025131226, "correct_loss_per_char": 0.1612691879272461, "incorrect_loss_per_char": 0.26692243417104083, "correct_loss_per_token": 0.6450767517089844, "incorrect_loss_per_token": 0.8007673025131226, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6450767517089844, "num_tokens": 1, "num_tokens_all": 852, "is_greedy": true, "logits_per_token": -0.6450767517089844, "logits_per_char": -0.1612691879272461, "num_chars": 4}, {"sum_logits": -0.8007673025131226, "num_tokens": 1, "num_tokens_all": 852, "is_greedy": false, "logits_per_token": -0.8007673025131226, "logits_per_char": -0.26692243417104083, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 452, "native_id": 145, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8243253231048584, "incorrect_loss_raw": 0.7137895226478577, "correct_loss_per_char": 0.2060813307762146, "incorrect_loss_per_char": 0.23792984088261923, "correct_loss_per_token": 0.8243253231048584, "incorrect_loss_per_token": 0.7137895226478577, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8243253231048584, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": false, "logits_per_token": -0.8243253231048584, "logits_per_char": -0.2060813307762146, "num_chars": 4}, {"sum_logits": -0.7137895226478577, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": true, "logits_per_token": -0.7137895226478577, "logits_per_char": -0.23792984088261923, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 453, "native_id": 895, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5303109288215637, "incorrect_loss_raw": 0.9796557426452637, "correct_loss_per_char": 0.13257773220539093, "incorrect_loss_per_char": 0.3265519142150879, "correct_loss_per_token": 0.5303109288215637, "incorrect_loss_per_token": 0.9796557426452637, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5303109288215637, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": true, "logits_per_token": -0.5303109288215637, "logits_per_char": -0.13257773220539093, "num_chars": 4}, {"sum_logits": -0.9796557426452637, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": false, "logits_per_token": -0.9796557426452637, "logits_per_char": -0.3265519142150879, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 454, "native_id": 2966, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1519572734832764, "incorrect_loss_raw": 0.4523582458496094, "correct_loss_per_char": 0.2879893183708191, "incorrect_loss_per_char": 0.15078608194986978, "correct_loss_per_token": 1.1519572734832764, "incorrect_loss_per_token": 0.4523582458496094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1519572734832764, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.1519572734832764, "logits_per_char": -0.2879893183708191, "num_chars": 4}, {"sum_logits": -0.4523582458496094, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -0.4523582458496094, "logits_per_char": -0.15078608194986978, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 455, "native_id": 2339, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3110809922218323, "incorrect_loss_raw": 1.5414588451385498, "correct_loss_per_char": 0.10369366407394409, "incorrect_loss_per_char": 0.38536471128463745, "correct_loss_per_token": 0.3110809922218323, "incorrect_loss_per_token": 1.5414588451385498, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5414588451385498, "num_tokens": 1, "num_tokens_all": 900, "is_greedy": false, "logits_per_token": -1.5414588451385498, "logits_per_char": -0.38536471128463745, "num_chars": 4}, {"sum_logits": -0.3110809922218323, "num_tokens": 1, "num_tokens_all": 900, "is_greedy": true, "logits_per_token": -0.3110809922218323, "logits_per_char": -0.10369366407394409, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 456, "native_id": 2431, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1086242198944092, "incorrect_loss_raw": 0.4331721365451813, "correct_loss_per_char": 0.2771560549736023, "incorrect_loss_per_char": 0.14439071218172708, "correct_loss_per_token": 1.1086242198944092, "incorrect_loss_per_token": 0.4331721365451813, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1086242198944092, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": false, "logits_per_token": -1.1086242198944092, "logits_per_char": -0.2771560549736023, "num_chars": 4}, {"sum_logits": -0.4331721365451813, "num_tokens": 1, "num_tokens_all": 1065, "is_greedy": true, "logits_per_token": -0.4331721365451813, "logits_per_char": -0.14439071218172708, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 457, "native_id": 3156, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5765548348426819, "incorrect_loss_raw": 0.9355660676956177, "correct_loss_per_char": 0.19218494494756064, "incorrect_loss_per_char": 0.23389151692390442, "correct_loss_per_token": 0.5765548348426819, "incorrect_loss_per_token": 0.9355660676956177, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9355660676956177, "num_tokens": 1, "num_tokens_all": 859, "is_greedy": false, "logits_per_token": -0.9355660676956177, "logits_per_char": -0.23389151692390442, "num_chars": 4}, {"sum_logits": -0.5765548348426819, "num_tokens": 1, "num_tokens_all": 859, "is_greedy": true, "logits_per_token": -0.5765548348426819, "logits_per_char": -0.19218494494756064, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 458, "native_id": 2200, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8712143898010254, "incorrect_loss_raw": 0.6175020337104797, "correct_loss_per_char": 0.21780359745025635, "incorrect_loss_per_char": 0.20583401123682657, "correct_loss_per_token": 0.8712143898010254, "incorrect_loss_per_token": 0.6175020337104797, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8712143898010254, "num_tokens": 1, "num_tokens_all": 994, "is_greedy": false, "logits_per_token": -0.8712143898010254, "logits_per_char": -0.21780359745025635, "num_chars": 4}, {"sum_logits": -0.6175020337104797, "num_tokens": 1, "num_tokens_all": 994, "is_greedy": true, "logits_per_token": -0.6175020337104797, "logits_per_char": -0.20583401123682657, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 459, "native_id": 753, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.032253623008728, "incorrect_loss_raw": 0.4800795316696167, "correct_loss_per_char": 0.258063405752182, "incorrect_loss_per_char": 0.1600265105565389, "correct_loss_per_token": 1.032253623008728, "incorrect_loss_per_token": 0.4800795316696167, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.032253623008728, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.032253623008728, "logits_per_char": -0.258063405752182, "num_chars": 4}, {"sum_logits": -0.4800795316696167, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.4800795316696167, "logits_per_char": -0.1600265105565389, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 460, "native_id": 1319, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6525068879127502, "incorrect_loss_raw": 0.8785620927810669, "correct_loss_per_char": 0.16312672197818756, "incorrect_loss_per_char": 0.2928540309270223, "correct_loss_per_token": 0.6525068879127502, "incorrect_loss_per_token": 0.8785620927810669, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6525068879127502, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.6525068879127502, "logits_per_char": -0.16312672197818756, "num_chars": 4}, {"sum_logits": -0.8785620927810669, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -0.8785620927810669, "logits_per_char": -0.2928540309270223, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 461, "native_id": 1199, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2288832664489746, "incorrect_loss_raw": 0.3973139822483063, "correct_loss_per_char": 0.30722081661224365, "incorrect_loss_per_char": 0.13243799408276877, "correct_loss_per_token": 1.2288832664489746, "incorrect_loss_per_token": 0.3973139822483063, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2288832664489746, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": false, "logits_per_token": -1.2288832664489746, "logits_per_char": -0.30722081661224365, "num_chars": 4}, {"sum_logits": -0.3973139822483063, "num_tokens": 1, "num_tokens_all": 1071, "is_greedy": true, "logits_per_token": -0.3973139822483063, "logits_per_char": -0.13243799408276877, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 462, "native_id": 1486, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5100300312042236, "incorrect_loss_raw": 0.2751442492008209, "correct_loss_per_char": 0.3775075078010559, "incorrect_loss_per_char": 0.09171474973360698, "correct_loss_per_token": 1.5100300312042236, "incorrect_loss_per_token": 0.2751442492008209, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5100300312042236, "num_tokens": 1, "num_tokens_all": 1043, "is_greedy": false, "logits_per_token": -1.5100300312042236, "logits_per_char": -0.3775075078010559, "num_chars": 4}, {"sum_logits": -0.2751442492008209, "num_tokens": 1, "num_tokens_all": 1043, "is_greedy": true, "logits_per_token": -0.2751442492008209, "logits_per_char": -0.09171474973360698, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 463, "native_id": 1117, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0250526666641235, "incorrect_loss_raw": 0.4968370795249939, "correct_loss_per_char": 0.2562631666660309, "incorrect_loss_per_char": 0.16561235984166464, "correct_loss_per_token": 1.0250526666641235, "incorrect_loss_per_token": 0.4968370795249939, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0250526666641235, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.0250526666641235, "logits_per_char": -0.2562631666660309, "num_chars": 4}, {"sum_logits": -0.4968370795249939, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.4968370795249939, "logits_per_char": -0.16561235984166464, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 464, "native_id": 2632, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6168832778930664, "incorrect_loss_raw": 0.34704869985580444, "correct_loss_per_char": 0.4042208194732666, "incorrect_loss_per_char": 0.11568289995193481, "correct_loss_per_token": 1.6168832778930664, "incorrect_loss_per_token": 0.34704869985580444, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6168832778930664, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.6168832778930664, "logits_per_char": -0.4042208194732666, "num_chars": 4}, {"sum_logits": -0.34704869985580444, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.34704869985580444, "logits_per_char": -0.11568289995193481, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 465, "native_id": 722, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8350397944450378, "incorrect_loss_raw": 0.6577452421188354, "correct_loss_per_char": 0.20875994861125946, "incorrect_loss_per_char": 0.21924841403961182, "correct_loss_per_token": 0.8350397944450378, "incorrect_loss_per_token": 0.6577452421188354, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8350397944450378, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": false, "logits_per_token": -0.8350397944450378, "logits_per_char": -0.20875994861125946, "num_chars": 4}, {"sum_logits": -0.6577452421188354, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": true, "logits_per_token": -0.6577452421188354, "logits_per_char": -0.21924841403961182, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 466, "native_id": 1871, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8714670538902283, "incorrect_loss_raw": 0.606555163860321, "correct_loss_per_char": 0.21786676347255707, "incorrect_loss_per_char": 0.20218505462010702, "correct_loss_per_token": 0.8714670538902283, "incorrect_loss_per_token": 0.606555163860321, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8714670538902283, "num_tokens": 1, "num_tokens_all": 900, "is_greedy": false, "logits_per_token": -0.8714670538902283, "logits_per_char": -0.21786676347255707, "num_chars": 4}, {"sum_logits": -0.606555163860321, "num_tokens": 1, "num_tokens_all": 900, "is_greedy": true, "logits_per_token": -0.606555163860321, "logits_per_char": -0.20218505462010702, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 467, "native_id": 693, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7415716648101807, "incorrect_loss_raw": 0.7053192853927612, "correct_loss_per_char": 0.18539291620254517, "incorrect_loss_per_char": 0.23510642846425375, "correct_loss_per_token": 0.7415716648101807, "incorrect_loss_per_token": 0.7053192853927612, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7415716648101807, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": false, "logits_per_token": -0.7415716648101807, "logits_per_char": -0.18539291620254517, "num_chars": 4}, {"sum_logits": -0.7053192853927612, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": true, "logits_per_token": -0.7053192853927612, "logits_per_char": -0.23510642846425375, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 468, "native_id": 13, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.49365419149398804, "incorrect_loss_raw": 0.9844557046890259, "correct_loss_per_char": 0.12341354787349701, "incorrect_loss_per_char": 0.3281519015630086, "correct_loss_per_token": 0.49365419149398804, "incorrect_loss_per_token": 0.9844557046890259, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.49365419149398804, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -0.49365419149398804, "logits_per_char": -0.12341354787349701, "num_chars": 4}, {"sum_logits": -0.9844557046890259, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -0.9844557046890259, "logits_per_char": -0.3281519015630086, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 469, "native_id": 2226, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9305188059806824, "incorrect_loss_raw": 0.6032097339630127, "correct_loss_per_char": 0.2326297014951706, "incorrect_loss_per_char": 0.20106991132100424, "correct_loss_per_token": 0.9305188059806824, "incorrect_loss_per_token": 0.6032097339630127, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9305188059806824, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": false, "logits_per_token": -0.9305188059806824, "logits_per_char": -0.2326297014951706, "num_chars": 4}, {"sum_logits": -0.6032097339630127, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": true, "logits_per_token": -0.6032097339630127, "logits_per_char": -0.20106991132100424, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 470, "native_id": 1673, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5306890606880188, "incorrect_loss_raw": 0.929904580116272, "correct_loss_per_char": 0.17689635356267294, "incorrect_loss_per_char": 0.232476145029068, "correct_loss_per_token": 0.5306890606880188, "incorrect_loss_per_token": 0.929904580116272, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.929904580116272, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -0.929904580116272, "logits_per_char": -0.232476145029068, "num_chars": 4}, {"sum_logits": -0.5306890606880188, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.5306890606880188, "logits_per_char": -0.17689635356267294, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 471, "native_id": 979, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.49109122157096863, "incorrect_loss_raw": 1.0780727863311768, "correct_loss_per_char": 0.16369707385698953, "incorrect_loss_per_char": 0.2695181965827942, "correct_loss_per_token": 0.49109122157096863, "incorrect_loss_per_token": 1.0780727863311768, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0780727863311768, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -1.0780727863311768, "logits_per_char": -0.2695181965827942, "num_chars": 4}, {"sum_logits": -0.49109122157096863, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": true, "logits_per_token": -0.49109122157096863, "logits_per_char": -0.16369707385698953, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 472, "native_id": 785, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.105994462966919, "incorrect_loss_raw": 0.44862088561058044, "correct_loss_per_char": 0.27649861574172974, "incorrect_loss_per_char": 0.14954029520352682, "correct_loss_per_token": 1.105994462966919, "incorrect_loss_per_token": 0.44862088561058044, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.105994462966919, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.105994462966919, "logits_per_char": -0.27649861574172974, "num_chars": 4}, {"sum_logits": -0.44862088561058044, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.44862088561058044, "logits_per_char": -0.14954029520352682, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 473, "native_id": 1817, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8263063430786133, "incorrect_loss_raw": 0.6660167574882507, "correct_loss_per_char": 0.20657658576965332, "incorrect_loss_per_char": 0.2220055858294169, "correct_loss_per_token": 0.8263063430786133, "incorrect_loss_per_token": 0.6660167574882507, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8263063430786133, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -0.8263063430786133, "logits_per_char": -0.20657658576965332, "num_chars": 4}, {"sum_logits": -0.6660167574882507, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.6660167574882507, "logits_per_char": -0.2220055858294169, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 474, "native_id": 1119, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4041439294815063, "incorrect_loss_raw": 0.32784411311149597, "correct_loss_per_char": 0.3510359823703766, "incorrect_loss_per_char": 0.10928137103716533, "correct_loss_per_token": 1.4041439294815063, "incorrect_loss_per_token": 0.32784411311149597, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4041439294815063, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -1.4041439294815063, "logits_per_char": -0.3510359823703766, "num_chars": 4}, {"sum_logits": -0.32784411311149597, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.32784411311149597, "logits_per_char": -0.10928137103716533, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 475, "native_id": 713, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7770896553993225, "incorrect_loss_raw": 0.6488426923751831, "correct_loss_per_char": 0.19427241384983063, "incorrect_loss_per_char": 0.21628089745839438, "correct_loss_per_token": 0.7770896553993225, "incorrect_loss_per_token": 0.6488426923751831, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7770896553993225, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -0.7770896553993225, "logits_per_char": -0.19427241384983063, "num_chars": 4}, {"sum_logits": -0.6488426923751831, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.6488426923751831, "logits_per_char": -0.21628089745839438, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 476, "native_id": 1449, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2836824357509613, "incorrect_loss_raw": 1.5308313369750977, "correct_loss_per_char": 0.0945608119169871, "incorrect_loss_per_char": 0.3827078342437744, "correct_loss_per_token": 0.2836824357509613, "incorrect_loss_per_token": 1.5308313369750977, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5308313369750977, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.5308313369750977, "logits_per_char": -0.3827078342437744, "num_chars": 4}, {"sum_logits": -0.2836824357509613, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.2836824357509613, "logits_per_char": -0.0945608119169871, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 477, "native_id": 2401, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7486950159072876, "incorrect_loss_raw": 0.7009176015853882, "correct_loss_per_char": 0.1871737539768219, "incorrect_loss_per_char": 0.23363920052846274, "correct_loss_per_token": 0.7486950159072876, "incorrect_loss_per_token": 0.7009176015853882, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7486950159072876, "num_tokens": 1, "num_tokens_all": 1152, "is_greedy": false, "logits_per_token": -0.7486950159072876, "logits_per_char": -0.1871737539768219, "num_chars": 4}, {"sum_logits": -0.7009176015853882, "num_tokens": 1, "num_tokens_all": 1152, "is_greedy": true, "logits_per_token": -0.7009176015853882, "logits_per_char": -0.23363920052846274, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 478, "native_id": 1676, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7346259355545044, "incorrect_loss_raw": 0.6851077079772949, "correct_loss_per_char": 0.1836564838886261, "incorrect_loss_per_char": 0.22836923599243164, "correct_loss_per_token": 0.7346259355545044, "incorrect_loss_per_token": 0.6851077079772949, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7346259355545044, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": false, "logits_per_token": -0.7346259355545044, "logits_per_char": -0.1836564838886261, "num_chars": 4}, {"sum_logits": -0.6851077079772949, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": true, "logits_per_token": -0.6851077079772949, "logits_per_char": -0.22836923599243164, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 479, "native_id": 3213, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6239989399909973, "incorrect_loss_raw": 0.8891282081604004, "correct_loss_per_char": 0.20799964666366577, "incorrect_loss_per_char": 0.2222820520401001, "correct_loss_per_token": 0.6239989399909973, "incorrect_loss_per_token": 0.8891282081604004, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8891282081604004, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": false, "logits_per_token": -0.8891282081604004, "logits_per_char": -0.2222820520401001, "num_chars": 4}, {"sum_logits": -0.6239989399909973, "num_tokens": 1, "num_tokens_all": 887, "is_greedy": true, "logits_per_token": -0.6239989399909973, "logits_per_char": -0.20799964666366577, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 480, "native_id": 2861, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28925496339797974, "incorrect_loss_raw": 1.5410542488098145, "correct_loss_per_char": 0.09641832113265991, "incorrect_loss_per_char": 0.3852635622024536, "correct_loss_per_token": 0.28925496339797974, "incorrect_loss_per_token": 1.5410542488098145, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5410542488098145, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": false, "logits_per_token": -1.5410542488098145, "logits_per_char": -0.3852635622024536, "num_chars": 4}, {"sum_logits": -0.28925496339797974, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": true, "logits_per_token": -0.28925496339797974, "logits_per_char": -0.09641832113265991, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 481, "native_id": 2452, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7099089622497559, "incorrect_loss_raw": 0.7534011602401733, "correct_loss_per_char": 0.2366363207499186, "incorrect_loss_per_char": 0.18835029006004333, "correct_loss_per_token": 0.7099089622497559, "incorrect_loss_per_token": 0.7534011602401733, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7534011602401733, "num_tokens": 1, "num_tokens_all": 1185, "is_greedy": false, "logits_per_token": -0.7534011602401733, "logits_per_char": -0.18835029006004333, "num_chars": 4}, {"sum_logits": -0.7099089622497559, "num_tokens": 1, "num_tokens_all": 1185, "is_greedy": true, "logits_per_token": -0.7099089622497559, "logits_per_char": -0.2366363207499186, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 482, "native_id": 2405, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7613623142242432, "incorrect_loss_raw": 0.683363676071167, "correct_loss_per_char": 0.1903405785560608, "incorrect_loss_per_char": 0.22778789202372232, "correct_loss_per_token": 0.7613623142242432, "incorrect_loss_per_token": 0.683363676071167, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7613623142242432, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -0.7613623142242432, "logits_per_char": -0.1903405785560608, "num_chars": 4}, {"sum_logits": -0.683363676071167, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.683363676071167, "logits_per_char": -0.22778789202372232, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 483, "native_id": 3220, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0183746814727783, "incorrect_loss_raw": 0.5582390427589417, "correct_loss_per_char": 0.3394582271575928, "incorrect_loss_per_char": 0.1395597606897354, "correct_loss_per_token": 1.0183746814727783, "incorrect_loss_per_token": 0.5582390427589417, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5582390427589417, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.5582390427589417, "logits_per_char": -0.1395597606897354, "num_chars": 4}, {"sum_logits": -1.0183746814727783, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.0183746814727783, "logits_per_char": -0.3394582271575928, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 484, "native_id": 3059, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8232611417770386, "incorrect_loss_raw": 0.669907808303833, "correct_loss_per_char": 0.20581528544425964, "incorrect_loss_per_char": 0.22330260276794434, "correct_loss_per_token": 0.8232611417770386, "incorrect_loss_per_token": 0.669907808303833, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8232611417770386, "num_tokens": 1, "num_tokens_all": 840, "is_greedy": false, "logits_per_token": -0.8232611417770386, "logits_per_char": -0.20581528544425964, "num_chars": 4}, {"sum_logits": -0.669907808303833, "num_tokens": 1, "num_tokens_all": 840, "is_greedy": true, "logits_per_token": -0.669907808303833, "logits_per_char": -0.22330260276794434, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 485, "native_id": 2106, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4072104692459106, "incorrect_loss_raw": 0.3113778829574585, "correct_loss_per_char": 0.35180261731147766, "incorrect_loss_per_char": 0.10379262765248616, "correct_loss_per_token": 1.4072104692459106, "incorrect_loss_per_token": 0.3113778829574585, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4072104692459106, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": false, "logits_per_token": -1.4072104692459106, "logits_per_char": -0.35180261731147766, "num_chars": 4}, {"sum_logits": -0.3113778829574585, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": true, "logits_per_token": -0.3113778829574585, "logits_per_char": -0.10379262765248616, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 486, "native_id": 1823, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6367738246917725, "incorrect_loss_raw": 0.852886438369751, "correct_loss_per_char": 0.15919345617294312, "incorrect_loss_per_char": 0.2842954794565837, "correct_loss_per_token": 0.6367738246917725, "incorrect_loss_per_token": 0.852886438369751, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6367738246917725, "num_tokens": 1, "num_tokens_all": 1029, "is_greedy": true, "logits_per_token": -0.6367738246917725, "logits_per_char": -0.15919345617294312, "num_chars": 4}, {"sum_logits": -0.852886438369751, "num_tokens": 1, "num_tokens_all": 1029, "is_greedy": false, "logits_per_token": -0.852886438369751, "logits_per_char": -0.2842954794565837, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 487, "native_id": 1527, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4004318714141846, "incorrect_loss_raw": 0.3124784827232361, "correct_loss_per_char": 0.35010796785354614, "incorrect_loss_per_char": 0.10415949424107869, "correct_loss_per_token": 1.4004318714141846, "incorrect_loss_per_token": 0.3124784827232361, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4004318714141846, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": false, "logits_per_token": -1.4004318714141846, "logits_per_char": -0.35010796785354614, "num_chars": 4}, {"sum_logits": -0.3124784827232361, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": true, "logits_per_token": -0.3124784827232361, "logits_per_char": -0.10415949424107869, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 488, "native_id": 2532, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4636378288269043, "incorrect_loss_raw": 1.1446468830108643, "correct_loss_per_char": 0.11590945720672607, "incorrect_loss_per_char": 0.3815489610036214, "correct_loss_per_token": 0.4636378288269043, "incorrect_loss_per_token": 1.1446468830108643, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4636378288269043, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": true, "logits_per_token": -0.4636378288269043, "logits_per_char": -0.11590945720672607, "num_chars": 4}, {"sum_logits": -1.1446468830108643, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": false, "logits_per_token": -1.1446468830108643, "logits_per_char": -0.3815489610036214, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 489, "native_id": 420, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6352567672729492, "incorrect_loss_raw": 0.832729697227478, "correct_loss_per_char": 0.21175225575764975, "incorrect_loss_per_char": 0.2081824243068695, "correct_loss_per_token": 0.6352567672729492, "incorrect_loss_per_token": 0.832729697227478, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.832729697227478, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -0.832729697227478, "logits_per_char": -0.2081824243068695, "num_chars": 4}, {"sum_logits": -0.6352567672729492, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -0.6352567672729492, "logits_per_char": -0.21175225575764975, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 490, "native_id": 2764, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2282357215881348, "incorrect_loss_raw": 0.399710476398468, "correct_loss_per_char": 0.3070589303970337, "incorrect_loss_per_char": 0.133236825466156, "correct_loss_per_token": 1.2282357215881348, "incorrect_loss_per_token": 0.399710476398468, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2282357215881348, "num_tokens": 1, "num_tokens_all": 836, "is_greedy": false, "logits_per_token": -1.2282357215881348, "logits_per_char": -0.3070589303970337, "num_chars": 4}, {"sum_logits": -0.399710476398468, "num_tokens": 1, "num_tokens_all": 836, "is_greedy": true, "logits_per_token": -0.399710476398468, "logits_per_char": -0.133236825466156, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 491, "native_id": 2167, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4683734178543091, "incorrect_loss_raw": 1.1047649383544922, "correct_loss_per_char": 0.15612447261810303, "incorrect_loss_per_char": 0.27619123458862305, "correct_loss_per_token": 0.4683734178543091, "incorrect_loss_per_token": 1.1047649383544922, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1047649383544922, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.1047649383544922, "logits_per_char": -0.27619123458862305, "num_chars": 4}, {"sum_logits": -0.4683734178543091, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -0.4683734178543091, "logits_per_char": -0.15612447261810303, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 492, "native_id": 1644, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1777257919311523, "incorrect_loss_raw": 0.4591901898384094, "correct_loss_per_char": 0.3925752639770508, "incorrect_loss_per_char": 0.11479754745960236, "correct_loss_per_token": 1.1777257919311523, "incorrect_loss_per_token": 0.4591901898384094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4591901898384094, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": true, "logits_per_token": -0.4591901898384094, "logits_per_char": -0.11479754745960236, "num_chars": 4}, {"sum_logits": -1.1777257919311523, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": false, "logits_per_token": -1.1777257919311523, "logits_per_char": -0.3925752639770508, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 493, "native_id": 2375, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.061415433883667, "incorrect_loss_raw": 0.4682972729206085, "correct_loss_per_char": 0.26535385847091675, "incorrect_loss_per_char": 0.15609909097353616, "correct_loss_per_token": 1.061415433883667, "incorrect_loss_per_token": 0.4682972729206085, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.061415433883667, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": false, "logits_per_token": -1.061415433883667, "logits_per_char": -0.26535385847091675, "num_chars": 4}, {"sum_logits": -0.4682972729206085, "num_tokens": 1, "num_tokens_all": 849, "is_greedy": true, "logits_per_token": -0.4682972729206085, "logits_per_char": -0.15609909097353616, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 494, "native_id": 520, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5962173342704773, "incorrect_loss_raw": 0.9197938442230225, "correct_loss_per_char": 0.19873911142349243, "incorrect_loss_per_char": 0.22994846105575562, "correct_loss_per_token": 0.5962173342704773, "incorrect_loss_per_token": 0.9197938442230225, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9197938442230225, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -0.9197938442230225, "logits_per_char": -0.22994846105575562, "num_chars": 4}, {"sum_logits": -0.5962173342704773, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.5962173342704773, "logits_per_char": -0.19873911142349243, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 495, "native_id": 434, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3689754605293274, "incorrect_loss_raw": 1.2657948732376099, "correct_loss_per_char": 0.12299182017644246, "incorrect_loss_per_char": 0.31644871830940247, "correct_loss_per_token": 0.3689754605293274, "incorrect_loss_per_token": 1.2657948732376099, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2657948732376099, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": false, "logits_per_token": -1.2657948732376099, "logits_per_char": -0.31644871830940247, "num_chars": 4}, {"sum_logits": -0.3689754605293274, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": true, "logits_per_token": -0.3689754605293274, "logits_per_char": -0.12299182017644246, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 496, "native_id": 1922, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6426334381103516, "incorrect_loss_raw": 0.8047369718551636, "correct_loss_per_char": 0.1606583595275879, "incorrect_loss_per_char": 0.2682456572850545, "correct_loss_per_token": 0.6426334381103516, "incorrect_loss_per_token": 0.8047369718551636, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6426334381103516, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": true, "logits_per_token": -0.6426334381103516, "logits_per_char": -0.1606583595275879, "num_chars": 4}, {"sum_logits": -0.8047369718551636, "num_tokens": 1, "num_tokens_all": 1081, "is_greedy": false, "logits_per_token": -0.8047369718551636, "logits_per_char": -0.2682456572850545, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 497, "native_id": 1999, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.656339704990387, "incorrect_loss_raw": 0.8097946047782898, "correct_loss_per_char": 0.2187799016634623, "incorrect_loss_per_char": 0.20244865119457245, "correct_loss_per_token": 0.656339704990387, "incorrect_loss_per_token": 0.8097946047782898, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8097946047782898, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": false, "logits_per_token": -0.8097946047782898, "logits_per_char": -0.20244865119457245, "num_chars": 4}, {"sum_logits": -0.656339704990387, "num_tokens": 1, "num_tokens_all": 1101, "is_greedy": true, "logits_per_token": -0.656339704990387, "logits_per_char": -0.2187799016634623, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 498, "native_id": 396, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3592471480369568, "incorrect_loss_raw": 1.279754877090454, "correct_loss_per_char": 0.11974904934565227, "incorrect_loss_per_char": 0.3199387192726135, "correct_loss_per_token": 0.3592471480369568, "incorrect_loss_per_token": 1.279754877090454, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.279754877090454, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": false, "logits_per_token": -1.279754877090454, "logits_per_char": -0.3199387192726135, "num_chars": 4}, {"sum_logits": -0.3592471480369568, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": true, "logits_per_token": -0.3592471480369568, "logits_per_char": -0.11974904934565227, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 499, "native_id": 2237, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3167953491210938, "incorrect_loss_raw": 0.34714576601982117, "correct_loss_per_char": 0.32919883728027344, "incorrect_loss_per_char": 0.11571525533994038, "correct_loss_per_token": 1.3167953491210938, "incorrect_loss_per_token": 0.34714576601982117, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3167953491210938, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -1.3167953491210938, "logits_per_char": -0.32919883728027344, "num_chars": 4}, {"sum_logits": -0.34714576601982117, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -0.34714576601982117, "logits_per_char": -0.11571525533994038, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 500, "native_id": 2284, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7274971604347229, "incorrect_loss_raw": 0.7091279029846191, "correct_loss_per_char": 0.18187429010868073, "incorrect_loss_per_char": 0.2363759676615397, "correct_loss_per_token": 0.7274971604347229, "incorrect_loss_per_token": 0.7091279029846191, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7274971604347229, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -0.7274971604347229, "logits_per_char": -0.18187429010868073, "num_chars": 4}, {"sum_logits": -0.7091279029846191, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": true, "logits_per_token": -0.7091279029846191, "logits_per_char": -0.2363759676615397, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 501, "native_id": 540, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7269878387451172, "incorrect_loss_raw": 0.21841676533222198, "correct_loss_per_char": 0.4317469596862793, "incorrect_loss_per_char": 0.07280558844407399, "correct_loss_per_token": 1.7269878387451172, "incorrect_loss_per_token": 0.21841676533222198, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7269878387451172, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": false, "logits_per_token": -1.7269878387451172, "logits_per_char": -0.4317469596862793, "num_chars": 4}, {"sum_logits": -0.21841676533222198, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": true, "logits_per_token": -0.21841676533222198, "logits_per_char": -0.07280558844407399, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 502, "native_id": 1048, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3370901346206665, "incorrect_loss_raw": 1.3825454711914062, "correct_loss_per_char": 0.11236337820688884, "incorrect_loss_per_char": 0.34563636779785156, "correct_loss_per_token": 0.3370901346206665, "incorrect_loss_per_token": 1.3825454711914062, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3825454711914062, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.3825454711914062, "logits_per_char": -0.34563636779785156, "num_chars": 4}, {"sum_logits": -0.3370901346206665, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.3370901346206665, "logits_per_char": -0.11236337820688884, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 503, "native_id": 978, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4149322509765625, "incorrect_loss_raw": 0.3161015510559082, "correct_loss_per_char": 0.3537330627441406, "incorrect_loss_per_char": 0.10536718368530273, "correct_loss_per_token": 1.4149322509765625, "incorrect_loss_per_token": 0.3161015510559082, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4149322509765625, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.4149322509765625, "logits_per_char": -0.3537330627441406, "num_chars": 4}, {"sum_logits": -0.3161015510559082, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.3161015510559082, "logits_per_char": -0.10536718368530273, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 504, "native_id": 2880, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7308585047721863, "incorrect_loss_raw": 0.7725638151168823, "correct_loss_per_char": 0.18271462619304657, "incorrect_loss_per_char": 0.25752127170562744, "correct_loss_per_token": 0.7308585047721863, "incorrect_loss_per_token": 0.7725638151168823, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7308585047721863, "num_tokens": 1, "num_tokens_all": 1021, "is_greedy": true, "logits_per_token": -0.7308585047721863, "logits_per_char": -0.18271462619304657, "num_chars": 4}, {"sum_logits": -0.7725638151168823, "num_tokens": 1, "num_tokens_all": 1021, "is_greedy": false, "logits_per_token": -0.7725638151168823, "logits_per_char": -0.25752127170562744, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 505, "native_id": 1373, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1508768796920776, "incorrect_loss_raw": 0.4304538071155548, "correct_loss_per_char": 0.2877192199230194, "incorrect_loss_per_char": 0.1434846023718516, "correct_loss_per_token": 1.1508768796920776, "incorrect_loss_per_token": 0.4304538071155548, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1508768796920776, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.1508768796920776, "logits_per_char": -0.2877192199230194, "num_chars": 4}, {"sum_logits": -0.4304538071155548, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.4304538071155548, "logits_per_char": -0.1434846023718516, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 506, "native_id": 1606, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8689217567443848, "incorrect_loss_raw": 0.6074003577232361, "correct_loss_per_char": 0.2172304391860962, "incorrect_loss_per_char": 0.20246678590774536, "correct_loss_per_token": 0.8689217567443848, "incorrect_loss_per_token": 0.6074003577232361, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8689217567443848, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": false, "logits_per_token": -0.8689217567443848, "logits_per_char": -0.2172304391860962, "num_chars": 4}, {"sum_logits": -0.6074003577232361, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": true, "logits_per_token": -0.6074003577232361, "logits_per_char": -0.20246678590774536, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 507, "native_id": 1202, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8673990368843079, "incorrect_loss_raw": 0.578318178653717, "correct_loss_per_char": 0.21684975922107697, "incorrect_loss_per_char": 0.19277272621790567, "correct_loss_per_token": 0.8673990368843079, "incorrect_loss_per_token": 0.578318178653717, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8673990368843079, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -0.8673990368843079, "logits_per_char": -0.21684975922107697, "num_chars": 4}, {"sum_logits": -0.578318178653717, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.578318178653717, "logits_per_char": -0.19277272621790567, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 508, "native_id": 2138, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9752944707870483, "incorrect_loss_raw": 0.5503029823303223, "correct_loss_per_char": 0.3250981569290161, "incorrect_loss_per_char": 0.13757574558258057, "correct_loss_per_token": 0.9752944707870483, "incorrect_loss_per_token": 0.5503029823303223, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5503029823303223, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": true, "logits_per_token": -0.5503029823303223, "logits_per_char": -0.13757574558258057, "num_chars": 4}, {"sum_logits": -0.9752944707870483, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": false, "logits_per_token": -0.9752944707870483, "logits_per_char": -0.3250981569290161, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 509, "native_id": 1453, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5943335294723511, "incorrect_loss_raw": 1.007265567779541, "correct_loss_per_char": 0.1981111764907837, "incorrect_loss_per_char": 0.25181639194488525, "correct_loss_per_token": 0.5943335294723511, "incorrect_loss_per_token": 1.007265567779541, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.007265567779541, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -1.007265567779541, "logits_per_char": -0.25181639194488525, "num_chars": 4}, {"sum_logits": -0.5943335294723511, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.5943335294723511, "logits_per_char": -0.1981111764907837, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 510, "native_id": 1660, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3544061481952667, "incorrect_loss_raw": 1.301332712173462, "correct_loss_per_char": 0.08860153704881668, "incorrect_loss_per_char": 0.4337775707244873, "correct_loss_per_token": 0.3544061481952667, "incorrect_loss_per_token": 1.301332712173462, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3544061481952667, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": true, "logits_per_token": -0.3544061481952667, "logits_per_char": -0.08860153704881668, "num_chars": 4}, {"sum_logits": -1.301332712173462, "num_tokens": 1, "num_tokens_all": 1054, "is_greedy": false, "logits_per_token": -1.301332712173462, "logits_per_char": -0.4337775707244873, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 511, "native_id": 2244, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.109344482421875, "incorrect_loss_raw": 0.4294969141483307, "correct_loss_per_char": 0.27733612060546875, "incorrect_loss_per_char": 0.14316563804944357, "correct_loss_per_token": 1.109344482421875, "incorrect_loss_per_token": 0.4294969141483307, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.109344482421875, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -1.109344482421875, "logits_per_char": -0.27733612060546875, "num_chars": 4}, {"sum_logits": -0.4294969141483307, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": true, "logits_per_token": -0.4294969141483307, "logits_per_char": -0.14316563804944357, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 512, "native_id": 771, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5226279497146606, "incorrect_loss_raw": 1.1542006731033325, "correct_loss_per_char": 0.13065698742866516, "incorrect_loss_per_char": 0.38473355770111084, "correct_loss_per_token": 0.5226279497146606, "incorrect_loss_per_token": 1.1542006731033325, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5226279497146606, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": true, "logits_per_token": -0.5226279497146606, "logits_per_char": -0.13065698742866516, "num_chars": 4}, {"sum_logits": -1.1542006731033325, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": false, "logits_per_token": -1.1542006731033325, "logits_per_char": -0.38473355770111084, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 513, "native_id": 2480, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3103103637695312, "incorrect_loss_raw": 0.353047639131546, "correct_loss_per_char": 0.3275775909423828, "incorrect_loss_per_char": 0.117682546377182, "correct_loss_per_token": 1.3103103637695312, "incorrect_loss_per_token": 0.353047639131546, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3103103637695312, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -1.3103103637695312, "logits_per_char": -0.3275775909423828, "num_chars": 4}, {"sum_logits": -0.353047639131546, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.353047639131546, "logits_per_char": -0.117682546377182, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 514, "native_id": 1937, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6244505643844604, "incorrect_loss_raw": 0.8301193714141846, "correct_loss_per_char": 0.20815018812815347, "incorrect_loss_per_char": 0.20752984285354614, "correct_loss_per_token": 0.6244505643844604, "incorrect_loss_per_token": 0.8301193714141846, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8301193714141846, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -0.8301193714141846, "logits_per_char": -0.20752984285354614, "num_chars": 4}, {"sum_logits": -0.6244505643844604, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.6244505643844604, "logits_per_char": -0.20815018812815347, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 515, "native_id": 1907, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.34090369939804077, "incorrect_loss_raw": 1.3386685848236084, "correct_loss_per_char": 0.1136345664660136, "incorrect_loss_per_char": 0.3346671462059021, "correct_loss_per_token": 0.34090369939804077, "incorrect_loss_per_token": 1.3386685848236084, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3386685848236084, "num_tokens": 1, "num_tokens_all": 1044, "is_greedy": false, "logits_per_token": -1.3386685848236084, "logits_per_char": -0.3346671462059021, "num_chars": 4}, {"sum_logits": -0.34090369939804077, "num_tokens": 1, "num_tokens_all": 1044, "is_greedy": true, "logits_per_token": -0.34090369939804077, "logits_per_char": -0.1136345664660136, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 516, "native_id": 1308, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9214845895767212, "incorrect_loss_raw": 0.5401402711868286, "correct_loss_per_char": 0.2303711473941803, "incorrect_loss_per_char": 0.1800467570622762, "correct_loss_per_token": 0.9214845895767212, "incorrect_loss_per_token": 0.5401402711868286, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9214845895767212, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": false, "logits_per_token": -0.9214845895767212, "logits_per_char": -0.2303711473941803, "num_chars": 4}, {"sum_logits": -0.5401402711868286, "num_tokens": 1, "num_tokens_all": 981, "is_greedy": true, "logits_per_token": -0.5401402711868286, "logits_per_char": -0.1800467570622762, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 517, "native_id": 1808, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6909967660903931, "incorrect_loss_raw": 0.7511208057403564, "correct_loss_per_char": 0.17274919152259827, "incorrect_loss_per_char": 0.25037360191345215, "correct_loss_per_token": 0.6909967660903931, "incorrect_loss_per_token": 0.7511208057403564, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6909967660903931, "num_tokens": 1, "num_tokens_all": 1024, "is_greedy": true, "logits_per_token": -0.6909967660903931, "logits_per_char": -0.17274919152259827, "num_chars": 4}, {"sum_logits": -0.7511208057403564, "num_tokens": 1, "num_tokens_all": 1024, "is_greedy": false, "logits_per_token": -0.7511208057403564, "logits_per_char": -0.25037360191345215, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 518, "native_id": 2149, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.480773389339447, "incorrect_loss_raw": 1.0251907110214233, "correct_loss_per_char": 0.16025779644648233, "incorrect_loss_per_char": 0.25629767775535583, "correct_loss_per_token": 0.480773389339447, "incorrect_loss_per_token": 1.0251907110214233, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0251907110214233, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": false, "logits_per_token": -1.0251907110214233, "logits_per_char": -0.25629767775535583, "num_chars": 4}, {"sum_logits": -0.480773389339447, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": true, "logits_per_token": -0.480773389339447, "logits_per_char": -0.16025779644648233, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 519, "native_id": 441, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3035213053226471, "incorrect_loss_raw": 1.5261509418487549, "correct_loss_per_char": 0.07588032633066177, "incorrect_loss_per_char": 0.5087169806162516, "correct_loss_per_token": 0.3035213053226471, "incorrect_loss_per_token": 1.5261509418487549, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3035213053226471, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": true, "logits_per_token": -0.3035213053226471, "logits_per_char": -0.07588032633066177, "num_chars": 4}, {"sum_logits": -1.5261509418487549, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": false, "logits_per_token": -1.5261509418487549, "logits_per_char": -0.5087169806162516, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 520, "native_id": 2208, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3877761960029602, "incorrect_loss_raw": 1.2973272800445557, "correct_loss_per_char": 0.12925873200098673, "incorrect_loss_per_char": 0.3243318200111389, "correct_loss_per_token": 0.3877761960029602, "incorrect_loss_per_token": 1.2973272800445557, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2973272800445557, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.2973272800445557, "logits_per_char": -0.3243318200111389, "num_chars": 4}, {"sum_logits": -0.3877761960029602, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.3877761960029602, "logits_per_char": -0.12925873200098673, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 521, "native_id": 1897, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5157828330993652, "incorrect_loss_raw": 0.28479617834091187, "correct_loss_per_char": 0.3789457082748413, "incorrect_loss_per_char": 0.09493205944697063, "correct_loss_per_token": 1.5157828330993652, "incorrect_loss_per_token": 0.28479617834091187, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5157828330993652, "num_tokens": 1, "num_tokens_all": 851, "is_greedy": false, "logits_per_token": -1.5157828330993652, "logits_per_char": -0.3789457082748413, "num_chars": 4}, {"sum_logits": -0.28479617834091187, "num_tokens": 1, "num_tokens_all": 851, "is_greedy": true, "logits_per_token": -0.28479617834091187, "logits_per_char": -0.09493205944697063, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 522, "native_id": 351, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9585717916488647, "incorrect_loss_raw": 0.5911535024642944, "correct_loss_per_char": 0.3195239305496216, "incorrect_loss_per_char": 0.1477883756160736, "correct_loss_per_token": 0.9585717916488647, "incorrect_loss_per_token": 0.5911535024642944, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5911535024642944, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": true, "logits_per_token": -0.5911535024642944, "logits_per_char": -0.1477883756160736, "num_chars": 4}, {"sum_logits": -0.9585717916488647, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": false, "logits_per_token": -0.9585717916488647, "logits_per_char": -0.3195239305496216, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 523, "native_id": 311, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7996670603752136, "incorrect_loss_raw": 0.6561453938484192, "correct_loss_per_char": 0.26655568679173786, "incorrect_loss_per_char": 0.1640363484621048, "correct_loss_per_token": 0.7996670603752136, "incorrect_loss_per_token": 0.6561453938484192, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6561453938484192, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": true, "logits_per_token": -0.6561453938484192, "logits_per_char": -0.1640363484621048, "num_chars": 4}, {"sum_logits": -0.7996670603752136, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": false, "logits_per_token": -0.7996670603752136, "logits_per_char": -0.26655568679173786, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 524, "native_id": 808, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0588176250457764, "incorrect_loss_raw": 0.48998552560806274, "correct_loss_per_char": 0.2647044062614441, "incorrect_loss_per_char": 0.1633285085360209, "correct_loss_per_token": 1.0588176250457764, "incorrect_loss_per_token": 0.48998552560806274, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0588176250457764, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.0588176250457764, "logits_per_char": -0.2647044062614441, "num_chars": 4}, {"sum_logits": -0.48998552560806274, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -0.48998552560806274, "logits_per_char": -0.1633285085360209, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 525, "native_id": 720, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6562253832817078, "incorrect_loss_raw": 0.819139838218689, "correct_loss_per_char": 0.21874179442723593, "incorrect_loss_per_char": 0.20478495955467224, "correct_loss_per_token": 0.6562253832817078, "incorrect_loss_per_token": 0.819139838218689, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.819139838218689, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -0.819139838218689, "logits_per_char": -0.20478495955467224, "num_chars": 4}, {"sum_logits": -0.6562253832817078, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.6562253832817078, "logits_per_char": -0.21874179442723593, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 526, "native_id": 2489, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9776034951210022, "incorrect_loss_raw": 0.5143054127693176, "correct_loss_per_char": 0.24440087378025055, "incorrect_loss_per_char": 0.17143513758977255, "correct_loss_per_token": 0.9776034951210022, "incorrect_loss_per_token": 0.5143054127693176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9776034951210022, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": false, "logits_per_token": -0.9776034951210022, "logits_per_char": -0.24440087378025055, "num_chars": 4}, {"sum_logits": -0.5143054127693176, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": true, "logits_per_token": -0.5143054127693176, "logits_per_char": -0.17143513758977255, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 527, "native_id": 1375, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3621397018432617, "incorrect_loss_raw": 1.388769507408142, "correct_loss_per_char": 0.09053492546081543, "incorrect_loss_per_char": 0.46292316913604736, "correct_loss_per_token": 0.3621397018432617, "incorrect_loss_per_token": 1.388769507408142, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3621397018432617, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": true, "logits_per_token": -0.3621397018432617, "logits_per_char": -0.09053492546081543, "num_chars": 4}, {"sum_logits": -1.388769507408142, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": false, "logits_per_token": -1.388769507408142, "logits_per_char": -0.46292316913604736, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 528, "native_id": 707, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3916662037372589, "incorrect_loss_raw": 1.2452727556228638, "correct_loss_per_char": 0.13055540124575296, "incorrect_loss_per_char": 0.31131818890571594, "correct_loss_per_token": 0.3916662037372589, "incorrect_loss_per_token": 1.2452727556228638, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2452727556228638, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": false, "logits_per_token": -1.2452727556228638, "logits_per_char": -0.31131818890571594, "num_chars": 4}, {"sum_logits": -0.3916662037372589, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": true, "logits_per_token": -0.3916662037372589, "logits_per_char": -0.13055540124575296, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 529, "native_id": 1547, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8174404501914978, "incorrect_loss_raw": 0.6958956122398376, "correct_loss_per_char": 0.20436011254787445, "incorrect_loss_per_char": 0.2319652040799459, "correct_loss_per_token": 0.8174404501914978, "incorrect_loss_per_token": 0.6958956122398376, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8174404501914978, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": false, "logits_per_token": -0.8174404501914978, "logits_per_char": -0.20436011254787445, "num_chars": 4}, {"sum_logits": -0.6958956122398376, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": true, "logits_per_token": -0.6958956122398376, "logits_per_char": -0.2319652040799459, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 530, "native_id": 3176, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6994958519935608, "incorrect_loss_raw": 0.8324964046478271, "correct_loss_per_char": 0.1748739629983902, "incorrect_loss_per_char": 0.2774988015492757, "correct_loss_per_token": 0.6994958519935608, "incorrect_loss_per_token": 0.8324964046478271, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6994958519935608, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.6994958519935608, "logits_per_char": -0.1748739629983902, "num_chars": 4}, {"sum_logits": -0.8324964046478271, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -0.8324964046478271, "logits_per_char": -0.2774988015492757, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 531, "native_id": 817, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5154192447662354, "incorrect_loss_raw": 0.9538782835006714, "correct_loss_per_char": 0.12885481119155884, "incorrect_loss_per_char": 0.31795942783355713, "correct_loss_per_token": 0.5154192447662354, "incorrect_loss_per_token": 0.9538782835006714, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5154192447662354, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": true, "logits_per_token": -0.5154192447662354, "logits_per_char": -0.12885481119155884, "num_chars": 4}, {"sum_logits": -0.9538782835006714, "num_tokens": 1, "num_tokens_all": 986, "is_greedy": false, "logits_per_token": -0.9538782835006714, "logits_per_char": -0.31795942783355713, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 532, "native_id": 1083, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4262644052505493, "incorrect_loss_raw": 0.38786599040031433, "correct_loss_per_char": 0.35656610131263733, "incorrect_loss_per_char": 0.12928866346677145, "correct_loss_per_token": 1.4262644052505493, "incorrect_loss_per_token": 0.38786599040031433, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4262644052505493, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -1.4262644052505493, "logits_per_char": -0.35656610131263733, "num_chars": 4}, {"sum_logits": -0.38786599040031433, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.38786599040031433, "logits_per_char": -0.12928866346677145, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 533, "native_id": 120, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0238711833953857, "incorrect_loss_raw": 0.5128893256187439, "correct_loss_per_char": 0.25596779584884644, "incorrect_loss_per_char": 0.1709631085395813, "correct_loss_per_token": 1.0238711833953857, "incorrect_loss_per_token": 0.5128893256187439, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0238711833953857, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": false, "logits_per_token": -1.0238711833953857, "logits_per_char": -0.25596779584884644, "num_chars": 4}, {"sum_logits": -0.5128893256187439, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": true, "logits_per_token": -0.5128893256187439, "logits_per_char": -0.1709631085395813, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 534, "native_id": 647, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5446674823760986, "incorrect_loss_raw": 0.9419727921485901, "correct_loss_per_char": 0.18155582745869955, "incorrect_loss_per_char": 0.23549319803714752, "correct_loss_per_token": 0.5446674823760986, "incorrect_loss_per_token": 0.9419727921485901, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9419727921485901, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": false, "logits_per_token": -0.9419727921485901, "logits_per_char": -0.23549319803714752, "num_chars": 4}, {"sum_logits": -0.5446674823760986, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": true, "logits_per_token": -0.5446674823760986, "logits_per_char": -0.18155582745869955, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 535, "native_id": 2710, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5666370987892151, "incorrect_loss_raw": 0.9035004377365112, "correct_loss_per_char": 0.18887903292973837, "incorrect_loss_per_char": 0.2258751094341278, "correct_loss_per_token": 0.5666370987892151, "incorrect_loss_per_token": 0.9035004377365112, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9035004377365112, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": false, "logits_per_token": -0.9035004377365112, "logits_per_char": -0.2258751094341278, "num_chars": 4}, {"sum_logits": -0.5666370987892151, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": true, "logits_per_token": -0.5666370987892151, "logits_per_char": -0.18887903292973837, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 536, "native_id": 1294, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7144078016281128, "incorrect_loss_raw": 0.7125595808029175, "correct_loss_per_char": 0.1786019504070282, "incorrect_loss_per_char": 0.23751986026763916, "correct_loss_per_token": 0.7144078016281128, "incorrect_loss_per_token": 0.7125595808029175, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7144078016281128, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -0.7144078016281128, "logits_per_char": -0.1786019504070282, "num_chars": 4}, {"sum_logits": -0.7125595808029175, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": true, "logits_per_token": -0.7125595808029175, "logits_per_char": -0.23751986026763916, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 537, "native_id": 2964, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9488403797149658, "incorrect_loss_raw": 0.6017512083053589, "correct_loss_per_char": 0.23721009492874146, "incorrect_loss_per_char": 0.2005837361017863, "correct_loss_per_token": 0.9488403797149658, "incorrect_loss_per_token": 0.6017512083053589, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9488403797149658, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": false, "logits_per_token": -0.9488403797149658, "logits_per_char": -0.23721009492874146, "num_chars": 4}, {"sum_logits": -0.6017512083053589, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": true, "logits_per_token": -0.6017512083053589, "logits_per_char": -0.2005837361017863, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 538, "native_id": 408, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.39591285586357117, "incorrect_loss_raw": 1.1798232793807983, "correct_loss_per_char": 0.09897821396589279, "incorrect_loss_per_char": 0.3932744264602661, "correct_loss_per_token": 0.39591285586357117, "incorrect_loss_per_token": 1.1798232793807983, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.39591285586357117, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": true, "logits_per_token": -0.39591285586357117, "logits_per_char": -0.09897821396589279, "num_chars": 4}, {"sum_logits": -1.1798232793807983, "num_tokens": 1, "num_tokens_all": 1060, "is_greedy": false, "logits_per_token": -1.1798232793807983, "logits_per_char": -0.3932744264602661, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 539, "native_id": 3161, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7373042106628418, "incorrect_loss_raw": 0.7460734844207764, "correct_loss_per_char": 0.18432605266571045, "incorrect_loss_per_char": 0.24869116147359213, "correct_loss_per_token": 0.7373042106628418, "incorrect_loss_per_token": 0.7460734844207764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7373042106628418, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.7373042106628418, "logits_per_char": -0.18432605266571045, "num_chars": 4}, {"sum_logits": -0.7460734844207764, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -0.7460734844207764, "logits_per_char": -0.24869116147359213, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 540, "native_id": 228, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6520987153053284, "incorrect_loss_raw": 0.8307004570960999, "correct_loss_per_char": 0.1630246788263321, "incorrect_loss_per_char": 0.27690015236536664, "correct_loss_per_token": 0.6520987153053284, "incorrect_loss_per_token": 0.8307004570960999, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6520987153053284, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": true, "logits_per_token": -0.6520987153053284, "logits_per_char": -0.1630246788263321, "num_chars": 4}, {"sum_logits": -0.8307004570960999, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": false, "logits_per_token": -0.8307004570960999, "logits_per_char": -0.27690015236536664, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 541, "native_id": 3043, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2861566543579102, "incorrect_loss_raw": 0.3537624478340149, "correct_loss_per_char": 0.32153916358947754, "incorrect_loss_per_char": 0.11792081594467163, "correct_loss_per_token": 1.2861566543579102, "incorrect_loss_per_token": 0.3537624478340149, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2861566543579102, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -1.2861566543579102, "logits_per_char": -0.32153916358947754, "num_chars": 4}, {"sum_logits": -0.3537624478340149, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.3537624478340149, "logits_per_char": -0.11792081594467163, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 542, "native_id": 1736, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5728633999824524, "incorrect_loss_raw": 0.9397873282432556, "correct_loss_per_char": 0.1432158499956131, "incorrect_loss_per_char": 0.3132624427477519, "correct_loss_per_token": 0.5728633999824524, "incorrect_loss_per_token": 0.9397873282432556, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5728633999824524, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.5728633999824524, "logits_per_char": -0.1432158499956131, "num_chars": 4}, {"sum_logits": -0.9397873282432556, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -0.9397873282432556, "logits_per_char": -0.3132624427477519, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 543, "native_id": 1323, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8924470543861389, "incorrect_loss_raw": 0.6293305158615112, "correct_loss_per_char": 0.22311176359653473, "incorrect_loss_per_char": 0.20977683862050375, "correct_loss_per_token": 0.8924470543861389, "incorrect_loss_per_token": 0.6293305158615112, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8924470543861389, "num_tokens": 1, "num_tokens_all": 861, "is_greedy": false, "logits_per_token": -0.8924470543861389, "logits_per_char": -0.22311176359653473, "num_chars": 4}, {"sum_logits": -0.6293305158615112, "num_tokens": 1, "num_tokens_all": 861, "is_greedy": true, "logits_per_token": -0.6293305158615112, "logits_per_char": -0.20977683862050375, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 544, "native_id": 1392, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5048997402191162, "incorrect_loss_raw": 1.1139025688171387, "correct_loss_per_char": 0.12622493505477905, "incorrect_loss_per_char": 0.3713008562723796, "correct_loss_per_token": 0.5048997402191162, "incorrect_loss_per_token": 1.1139025688171387, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5048997402191162, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.5048997402191162, "logits_per_char": -0.12622493505477905, "num_chars": 4}, {"sum_logits": -1.1139025688171387, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -1.1139025688171387, "logits_per_char": -0.3713008562723796, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 545, "native_id": 3020, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9217121005058289, "incorrect_loss_raw": 0.5981780290603638, "correct_loss_per_char": 0.23042802512645721, "incorrect_loss_per_char": 0.1993926763534546, "correct_loss_per_token": 0.9217121005058289, "incorrect_loss_per_token": 0.5981780290603638, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9217121005058289, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -0.9217121005058289, "logits_per_char": -0.23042802512645721, "num_chars": 4}, {"sum_logits": -0.5981780290603638, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.5981780290603638, "logits_per_char": -0.1993926763534546, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 546, "native_id": 2426, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.38265565037727356, "incorrect_loss_raw": 1.2838705778121948, "correct_loss_per_char": 0.09566391259431839, "incorrect_loss_per_char": 0.4279568592707316, "correct_loss_per_token": 0.38265565037727356, "incorrect_loss_per_token": 1.2838705778121948, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.38265565037727356, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": true, "logits_per_token": -0.38265565037727356, "logits_per_char": -0.09566391259431839, "num_chars": 4}, {"sum_logits": -1.2838705778121948, "num_tokens": 1, "num_tokens_all": 1007, "is_greedy": false, "logits_per_token": -1.2838705778121948, "logits_per_char": -0.4279568592707316, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 547, "native_id": 1776, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5085798501968384, "incorrect_loss_raw": 0.9866523742675781, "correct_loss_per_char": 0.1271449625492096, "incorrect_loss_per_char": 0.3288841247558594, "correct_loss_per_token": 0.5085798501968384, "incorrect_loss_per_token": 0.9866523742675781, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5085798501968384, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.5085798501968384, "logits_per_char": -0.1271449625492096, "num_chars": 4}, {"sum_logits": -0.9866523742675781, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -0.9866523742675781, "logits_per_char": -0.3288841247558594, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 548, "native_id": 2362, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27376383543014526, "incorrect_loss_raw": 1.6051928997039795, "correct_loss_per_char": 0.09125461181004842, "incorrect_loss_per_char": 0.4012982249259949, "correct_loss_per_token": 0.27376383543014526, "incorrect_loss_per_token": 1.6051928997039795, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6051928997039795, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": false, "logits_per_token": -1.6051928997039795, "logits_per_char": -0.4012982249259949, "num_chars": 4}, {"sum_logits": -0.27376383543014526, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": true, "logits_per_token": -0.27376383543014526, "logits_per_char": -0.09125461181004842, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 549, "native_id": 681, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1907455921173096, "incorrect_loss_raw": 0.404928058385849, "correct_loss_per_char": 0.2976863980293274, "incorrect_loss_per_char": 0.13497601946194968, "correct_loss_per_token": 1.1907455921173096, "incorrect_loss_per_token": 0.404928058385849, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1907455921173096, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": false, "logits_per_token": -1.1907455921173096, "logits_per_char": -0.2976863980293274, "num_chars": 4}, {"sum_logits": -0.404928058385849, "num_tokens": 1, "num_tokens_all": 897, "is_greedy": true, "logits_per_token": -0.404928058385849, "logits_per_char": -0.13497601946194968, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 550, "native_id": 1539, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8501510620117188, "incorrect_loss_raw": 0.6119920015335083, "correct_loss_per_char": 0.2125377655029297, "incorrect_loss_per_char": 0.20399733384450278, "correct_loss_per_token": 0.8501510620117188, "incorrect_loss_per_token": 0.6119920015335083, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8501510620117188, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": false, "logits_per_token": -0.8501510620117188, "logits_per_char": -0.2125377655029297, "num_chars": 4}, {"sum_logits": -0.6119920015335083, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": true, "logits_per_token": -0.6119920015335083, "logits_per_char": -0.20399733384450278, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 551, "native_id": 2945, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19798770546913147, "incorrect_loss_raw": 2.043403148651123, "correct_loss_per_char": 0.06599590182304382, "incorrect_loss_per_char": 0.5108507871627808, "correct_loss_per_token": 0.19798770546913147, "incorrect_loss_per_token": 2.043403148651123, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -2.043403148651123, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -2.043403148651123, "logits_per_char": -0.5108507871627808, "num_chars": 4}, {"sum_logits": -0.19798770546913147, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -0.19798770546913147, "logits_per_char": -0.06599590182304382, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 552, "native_id": 36, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4286862909793854, "incorrect_loss_raw": 1.1138168573379517, "correct_loss_per_char": 0.1428954303264618, "incorrect_loss_per_char": 0.2784542143344879, "correct_loss_per_token": 0.4286862909793854, "incorrect_loss_per_token": 1.1138168573379517, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1138168573379517, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.1138168573379517, "logits_per_char": -0.2784542143344879, "num_chars": 4}, {"sum_logits": -0.4286862909793854, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.4286862909793854, "logits_per_char": -0.1428954303264618, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 553, "native_id": 1184, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.807010293006897, "incorrect_loss_raw": 0.6535620093345642, "correct_loss_per_char": 0.20175257325172424, "incorrect_loss_per_char": 0.2178540031115214, "correct_loss_per_token": 0.807010293006897, "incorrect_loss_per_token": 0.6535620093345642, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.807010293006897, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": false, "logits_per_token": -0.807010293006897, "logits_per_char": -0.20175257325172424, "num_chars": 4}, {"sum_logits": -0.6535620093345642, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": true, "logits_per_token": -0.6535620093345642, "logits_per_char": -0.2178540031115214, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 554, "native_id": 2443, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3470296561717987, "incorrect_loss_raw": 1.2867063283920288, "correct_loss_per_char": 0.11567655205726624, "incorrect_loss_per_char": 0.3216765820980072, "correct_loss_per_token": 0.3470296561717987, "incorrect_loss_per_token": 1.2867063283920288, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2867063283920288, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.2867063283920288, "logits_per_char": -0.3216765820980072, "num_chars": 4}, {"sum_logits": -0.3470296561717987, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.3470296561717987, "logits_per_char": -0.11567655205726624, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 555, "native_id": 2434, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8758999109268188, "incorrect_loss_raw": 0.6129264235496521, "correct_loss_per_char": 0.2189749777317047, "incorrect_loss_per_char": 0.20430880784988403, "correct_loss_per_token": 0.8758999109268188, "incorrect_loss_per_token": 0.6129264235496521, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8758999109268188, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -0.8758999109268188, "logits_per_char": -0.2189749777317047, "num_chars": 4}, {"sum_logits": -0.6129264235496521, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": true, "logits_per_token": -0.6129264235496521, "logits_per_char": -0.20430880784988403, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 556, "native_id": 1162, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5684657692909241, "incorrect_loss_raw": 0.9376517534255981, "correct_loss_per_char": 0.14211644232273102, "incorrect_loss_per_char": 0.3125505844751994, "correct_loss_per_token": 0.5684657692909241, "incorrect_loss_per_token": 0.9376517534255981, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5684657692909241, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": true, "logits_per_token": -0.5684657692909241, "logits_per_char": -0.14211644232273102, "num_chars": 4}, {"sum_logits": -0.9376517534255981, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": false, "logits_per_token": -0.9376517534255981, "logits_per_char": -0.3125505844751994, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 557, "native_id": 1296, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26608744263648987, "incorrect_loss_raw": 1.5808706283569336, "correct_loss_per_char": 0.08869581421216328, "incorrect_loss_per_char": 0.3952176570892334, "correct_loss_per_token": 0.26608744263648987, "incorrect_loss_per_token": 1.5808706283569336, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5808706283569336, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": false, "logits_per_token": -1.5808706283569336, "logits_per_char": -0.3952176570892334, "num_chars": 4}, {"sum_logits": -0.26608744263648987, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": true, "logits_per_token": -0.26608744263648987, "logits_per_char": -0.08869581421216328, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 558, "native_id": 2496, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9792599081993103, "incorrect_loss_raw": 0.5802233815193176, "correct_loss_per_char": 0.24481497704982758, "incorrect_loss_per_char": 0.19340779383977255, "correct_loss_per_token": 0.9792599081993103, "incorrect_loss_per_token": 0.5802233815193176, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9792599081993103, "num_tokens": 1, "num_tokens_all": 1044, "is_greedy": false, "logits_per_token": -0.9792599081993103, "logits_per_char": -0.24481497704982758, "num_chars": 4}, {"sum_logits": -0.5802233815193176, "num_tokens": 1, "num_tokens_all": 1044, "is_greedy": true, "logits_per_token": -0.5802233815193176, "logits_per_char": -0.19340779383977255, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 559, "native_id": 1019, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19616468250751495, "incorrect_loss_raw": 2.0264668464660645, "correct_loss_per_char": 0.06538822750250499, "incorrect_loss_per_char": 0.5066167116165161, "correct_loss_per_token": 0.19616468250751495, "incorrect_loss_per_token": 2.0264668464660645, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -2.0264668464660645, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -2.0264668464660645, "logits_per_char": -0.5066167116165161, "num_chars": 4}, {"sum_logits": -0.19616468250751495, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.19616468250751495, "logits_per_char": -0.06538822750250499, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 560, "native_id": 639, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6100464463233948, "incorrect_loss_raw": 0.8493691682815552, "correct_loss_per_char": 0.2033488154411316, "incorrect_loss_per_char": 0.2123422920703888, "correct_loss_per_token": 0.6100464463233948, "incorrect_loss_per_token": 0.8493691682815552, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8493691682815552, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -0.8493691682815552, "logits_per_char": -0.2123422920703888, "num_chars": 4}, {"sum_logits": -0.6100464463233948, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -0.6100464463233948, "logits_per_char": -0.2033488154411316, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 561, "native_id": 795, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5154725313186646, "incorrect_loss_raw": 0.9439519643783569, "correct_loss_per_char": 0.12886813282966614, "incorrect_loss_per_char": 0.31465065479278564, "correct_loss_per_token": 0.5154725313186646, "incorrect_loss_per_token": 0.9439519643783569, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5154725313186646, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": true, "logits_per_token": -0.5154725313186646, "logits_per_char": -0.12886813282966614, "num_chars": 4}, {"sum_logits": -0.9439519643783569, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": false, "logits_per_token": -0.9439519643783569, "logits_per_char": -0.31465065479278564, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 562, "native_id": 2498, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.39154765009880066, "incorrect_loss_raw": 1.2557590007781982, "correct_loss_per_char": 0.13051588336626688, "incorrect_loss_per_char": 0.31393975019454956, "correct_loss_per_token": 0.39154765009880066, "incorrect_loss_per_token": 1.2557590007781982, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2557590007781982, "num_tokens": 1, "num_tokens_all": 1045, "is_greedy": false, "logits_per_token": -1.2557590007781982, "logits_per_char": -0.31393975019454956, "num_chars": 4}, {"sum_logits": -0.39154765009880066, "num_tokens": 1, "num_tokens_all": 1045, "is_greedy": true, "logits_per_token": -0.39154765009880066, "logits_per_char": -0.13051588336626688, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 563, "native_id": 1855, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.581067681312561, "incorrect_loss_raw": 0.8878121376037598, "correct_loss_per_char": 0.14526692032814026, "incorrect_loss_per_char": 0.29593737920125324, "correct_loss_per_token": 0.581067681312561, "incorrect_loss_per_token": 0.8878121376037598, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.581067681312561, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.581067681312561, "logits_per_char": -0.14526692032814026, "num_chars": 4}, {"sum_logits": -0.8878121376037598, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -0.8878121376037598, "logits_per_char": -0.29593737920125324, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 564, "native_id": 2485, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9660532474517822, "incorrect_loss_raw": 0.510983943939209, "correct_loss_per_char": 0.24151331186294556, "incorrect_loss_per_char": 0.17032798131306967, "correct_loss_per_token": 0.9660532474517822, "incorrect_loss_per_token": 0.510983943939209, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9660532474517822, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": false, "logits_per_token": -0.9660532474517822, "logits_per_char": -0.24151331186294556, "num_chars": 4}, {"sum_logits": -0.510983943939209, "num_tokens": 1, "num_tokens_all": 1041, "is_greedy": true, "logits_per_token": -0.510983943939209, "logits_per_char": -0.17032798131306967, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 565, "native_id": 1822, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.257946491241455, "incorrect_loss_raw": 0.40313512086868286, "correct_loss_per_char": 0.31448662281036377, "incorrect_loss_per_char": 0.1343783736228943, "correct_loss_per_token": 1.257946491241455, "incorrect_loss_per_token": 0.40313512086868286, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.257946491241455, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": false, "logits_per_token": -1.257946491241455, "logits_per_char": -0.31448662281036377, "num_chars": 4}, {"sum_logits": -0.40313512086868286, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": true, "logits_per_token": -0.40313512086868286, "logits_per_char": -0.1343783736228943, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 566, "native_id": 1710, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4578225016593933, "incorrect_loss_raw": 1.049790859222412, "correct_loss_per_char": 0.1526075005531311, "incorrect_loss_per_char": 0.262447714805603, "correct_loss_per_token": 0.4578225016593933, "incorrect_loss_per_token": 1.049790859222412, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.049790859222412, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": false, "logits_per_token": -1.049790859222412, "logits_per_char": -0.262447714805603, "num_chars": 4}, {"sum_logits": -0.4578225016593933, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": true, "logits_per_token": -0.4578225016593933, "logits_per_char": -0.1526075005531311, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 567, "native_id": 2841, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9004364013671875, "incorrect_loss_raw": 0.5760841369628906, "correct_loss_per_char": 0.22510910034179688, "incorrect_loss_per_char": 0.19202804565429688, "correct_loss_per_token": 0.9004364013671875, "incorrect_loss_per_token": 0.5760841369628906, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9004364013671875, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": false, "logits_per_token": -0.9004364013671875, "logits_per_char": -0.22510910034179688, "num_chars": 4}, {"sum_logits": -0.5760841369628906, "num_tokens": 1, "num_tokens_all": 985, "is_greedy": true, "logits_per_token": -0.5760841369628906, "logits_per_char": -0.19202804565429688, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 568, "native_id": 1377, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7784643173217773, "incorrect_loss_raw": 0.7559801936149597, "correct_loss_per_char": 0.2594881057739258, "incorrect_loss_per_char": 0.18899504840373993, "correct_loss_per_token": 0.7784643173217773, "incorrect_loss_per_token": 0.7559801936149597, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7559801936149597, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.7559801936149597, "logits_per_char": -0.18899504840373993, "num_chars": 4}, {"sum_logits": -0.7784643173217773, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -0.7784643173217773, "logits_per_char": -0.2594881057739258, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 569, "native_id": 2142, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9150030612945557, "incorrect_loss_raw": 0.565295934677124, "correct_loss_per_char": 0.22875076532363892, "incorrect_loss_per_char": 0.188431978225708, "correct_loss_per_token": 0.9150030612945557, "incorrect_loss_per_token": 0.565295934677124, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9150030612945557, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -0.9150030612945557, "logits_per_char": -0.22875076532363892, "num_chars": 4}, {"sum_logits": -0.565295934677124, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.565295934677124, "logits_per_char": -0.188431978225708, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 570, "native_id": 1100, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4843701720237732, "incorrect_loss_raw": 1.0171573162078857, "correct_loss_per_char": 0.1614567240079244, "incorrect_loss_per_char": 0.25428932905197144, "correct_loss_per_token": 0.4843701720237732, "incorrect_loss_per_token": 1.0171573162078857, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0171573162078857, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -1.0171573162078857, "logits_per_char": -0.25428932905197144, "num_chars": 4}, {"sum_logits": -0.4843701720237732, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.4843701720237732, "logits_per_char": -0.1614567240079244, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 571, "native_id": 1782, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8235825300216675, "incorrect_loss_raw": 0.6460461616516113, "correct_loss_per_char": 0.20589563250541687, "incorrect_loss_per_char": 0.2153487205505371, "correct_loss_per_token": 0.8235825300216675, "incorrect_loss_per_token": 0.6460461616516113, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8235825300216675, "num_tokens": 1, "num_tokens_all": 875, "is_greedy": false, "logits_per_token": -0.8235825300216675, "logits_per_char": -0.20589563250541687, "num_chars": 4}, {"sum_logits": -0.6460461616516113, "num_tokens": 1, "num_tokens_all": 875, "is_greedy": true, "logits_per_token": -0.6460461616516113, "logits_per_char": -0.2153487205505371, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 572, "native_id": 1604, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3950073719024658, "incorrect_loss_raw": 1.1857640743255615, "correct_loss_per_char": 0.13166912396748862, "incorrect_loss_per_char": 0.2964410185813904, "correct_loss_per_token": 0.3950073719024658, "incorrect_loss_per_token": 1.1857640743255615, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1857640743255615, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -1.1857640743255615, "logits_per_char": -0.2964410185813904, "num_chars": 4}, {"sum_logits": -0.3950073719024658, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -0.3950073719024658, "logits_per_char": -0.13166912396748862, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 573, "native_id": 1063, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8616048097610474, "incorrect_loss_raw": 0.627781093120575, "correct_loss_per_char": 0.21540120244026184, "incorrect_loss_per_char": 0.209260364373525, "correct_loss_per_token": 0.8616048097610474, "incorrect_loss_per_token": 0.627781093120575, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8616048097610474, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -0.8616048097610474, "logits_per_char": -0.21540120244026184, "num_chars": 4}, {"sum_logits": -0.627781093120575, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.627781093120575, "logits_per_char": -0.209260364373525, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 574, "native_id": 2352, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3910929560661316, "incorrect_loss_raw": 1.231797218322754, "correct_loss_per_char": 0.13036431868871054, "incorrect_loss_per_char": 0.3079493045806885, "correct_loss_per_token": 0.3910929560661316, "incorrect_loss_per_token": 1.231797218322754, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.231797218322754, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -1.231797218322754, "logits_per_char": -0.3079493045806885, "num_chars": 4}, {"sum_logits": -0.3910929560661316, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.3910929560661316, "logits_per_char": -0.13036431868871054, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 575, "native_id": 2021, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27765804529190063, "incorrect_loss_raw": 1.5066115856170654, "correct_loss_per_char": 0.09255268176396687, "incorrect_loss_per_char": 0.37665289640426636, "correct_loss_per_token": 0.27765804529190063, "incorrect_loss_per_token": 1.5066115856170654, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5066115856170654, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.5066115856170654, "logits_per_char": -0.37665289640426636, "num_chars": 4}, {"sum_logits": -0.27765804529190063, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.27765804529190063, "logits_per_char": -0.09255268176396687, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 576, "native_id": 1290, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0492682456970215, "incorrect_loss_raw": 0.4763818383216858, "correct_loss_per_char": 0.26231706142425537, "incorrect_loss_per_char": 0.1587939461072286, "correct_loss_per_token": 1.0492682456970215, "incorrect_loss_per_token": 0.4763818383216858, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0492682456970215, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": false, "logits_per_token": -1.0492682456970215, "logits_per_char": -0.26231706142425537, "num_chars": 4}, {"sum_logits": -0.4763818383216858, "num_tokens": 1, "num_tokens_all": 876, "is_greedy": true, "logits_per_token": -0.4763818383216858, "logits_per_char": -0.1587939461072286, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 577, "native_id": 1014, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6731176972389221, "incorrect_loss_raw": 0.8355222344398499, "correct_loss_per_char": 0.16827942430973053, "incorrect_loss_per_char": 0.27850741147994995, "correct_loss_per_token": 0.6731176972389221, "incorrect_loss_per_token": 0.8355222344398499, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6731176972389221, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": true, "logits_per_token": -0.6731176972389221, "logits_per_char": -0.16827942430973053, "num_chars": 4}, {"sum_logits": -0.8355222344398499, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": false, "logits_per_token": -0.8355222344398499, "logits_per_char": -0.27850741147994995, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 578, "native_id": 3121, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.46999821066856384, "incorrect_loss_raw": 1.082876443862915, "correct_loss_per_char": 0.15666607022285461, "incorrect_loss_per_char": 0.27071911096572876, "correct_loss_per_token": 0.46999821066856384, "incorrect_loss_per_token": 1.082876443862915, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.082876443862915, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": false, "logits_per_token": -1.082876443862915, "logits_per_char": -0.27071911096572876, "num_chars": 4}, {"sum_logits": -0.46999821066856384, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": true, "logits_per_token": -0.46999821066856384, "logits_per_char": -0.15666607022285461, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 579, "native_id": 646, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0583927631378174, "incorrect_loss_raw": 0.4750070571899414, "correct_loss_per_char": 0.26459819078445435, "incorrect_loss_per_char": 0.15833568572998047, "correct_loss_per_token": 1.0583927631378174, "incorrect_loss_per_token": 0.4750070571899414, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0583927631378174, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": false, "logits_per_token": -1.0583927631378174, "logits_per_char": -0.26459819078445435, "num_chars": 4}, {"sum_logits": -0.4750070571899414, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": true, "logits_per_token": -0.4750070571899414, "logits_per_char": -0.15833568572998047, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 580, "native_id": 3196, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4427090883255005, "incorrect_loss_raw": 1.1947520971298218, "correct_loss_per_char": 0.14756969610850015, "incorrect_loss_per_char": 0.29868802428245544, "correct_loss_per_token": 0.4427090883255005, "incorrect_loss_per_token": 1.1947520971298218, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1947520971298218, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": false, "logits_per_token": -1.1947520971298218, "logits_per_char": -0.29868802428245544, "num_chars": 4}, {"sum_logits": -0.4427090883255005, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": true, "logits_per_token": -0.4427090883255005, "logits_per_char": -0.14756969610850015, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 581, "native_id": 1682, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1071751117706299, "incorrect_loss_raw": 0.4449370801448822, "correct_loss_per_char": 0.27679377794265747, "incorrect_loss_per_char": 0.14831236004829407, "correct_loss_per_token": 1.1071751117706299, "incorrect_loss_per_token": 0.4449370801448822, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1071751117706299, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -1.1071751117706299, "logits_per_char": -0.27679377794265747, "num_chars": 4}, {"sum_logits": -0.4449370801448822, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -0.4449370801448822, "logits_per_char": -0.14831236004829407, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 582, "native_id": 645, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.40257781744003296, "incorrect_loss_raw": 1.2507009506225586, "correct_loss_per_char": 0.10064445436000824, "incorrect_loss_per_char": 0.4169003168741862, "correct_loss_per_token": 0.40257781744003296, "incorrect_loss_per_token": 1.2507009506225586, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.40257781744003296, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.40257781744003296, "logits_per_char": -0.10064445436000824, "num_chars": 4}, {"sum_logits": -1.2507009506225586, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -1.2507009506225586, "logits_per_char": -0.4169003168741862, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 583, "native_id": 141, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.17543095350265503, "incorrect_loss_raw": 1.9526441097259521, "correct_loss_per_char": 0.05847698450088501, "incorrect_loss_per_char": 0.48816102743148804, "correct_loss_per_token": 0.17543095350265503, "incorrect_loss_per_token": 1.9526441097259521, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.9526441097259521, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -1.9526441097259521, "logits_per_char": -0.48816102743148804, "num_chars": 4}, {"sum_logits": -0.17543095350265503, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": true, "logits_per_token": -0.17543095350265503, "logits_per_char": -0.05847698450088501, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 584, "native_id": 3024, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1234934329986572, "incorrect_loss_raw": 0.43614697456359863, "correct_loss_per_char": 0.2808733582496643, "incorrect_loss_per_char": 0.14538232485453287, "correct_loss_per_token": 1.1234934329986572, "incorrect_loss_per_token": 0.43614697456359863, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1234934329986572, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.1234934329986572, "logits_per_char": -0.2808733582496643, "num_chars": 4}, {"sum_logits": -0.43614697456359863, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.43614697456359863, "logits_per_char": -0.14538232485453287, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 585, "native_id": 2360, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8257779479026794, "incorrect_loss_raw": 0.693911612033844, "correct_loss_per_char": 0.20644448697566986, "incorrect_loss_per_char": 0.231303870677948, "correct_loss_per_token": 0.8257779479026794, "incorrect_loss_per_token": 0.693911612033844, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8257779479026794, "num_tokens": 1, "num_tokens_all": 1144, "is_greedy": false, "logits_per_token": -0.8257779479026794, "logits_per_char": -0.20644448697566986, "num_chars": 4}, {"sum_logits": -0.693911612033844, "num_tokens": 1, "num_tokens_all": 1144, "is_greedy": true, "logits_per_token": -0.693911612033844, "logits_per_char": -0.231303870677948, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 586, "native_id": 2233, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3769005835056305, "incorrect_loss_raw": 1.2560185194015503, "correct_loss_per_char": 0.12563352783521017, "incorrect_loss_per_char": 0.3140046298503876, "correct_loss_per_token": 0.3769005835056305, "incorrect_loss_per_token": 1.2560185194015503, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2560185194015503, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -1.2560185194015503, "logits_per_char": -0.3140046298503876, "num_chars": 4}, {"sum_logits": -0.3769005835056305, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -0.3769005835056305, "logits_per_char": -0.12563352783521017, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 587, "native_id": 2793, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6123064756393433, "incorrect_loss_raw": 0.2543918192386627, "correct_loss_per_char": 0.4030766189098358, "incorrect_loss_per_char": 0.08479727307955424, "correct_loss_per_token": 1.6123064756393433, "incorrect_loss_per_token": 0.2543918192386627, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6123064756393433, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": false, "logits_per_token": -1.6123064756393433, "logits_per_char": -0.4030766189098358, "num_chars": 4}, {"sum_logits": -0.2543918192386627, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": true, "logits_per_token": -0.2543918192386627, "logits_per_char": -0.08479727307955424, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 588, "native_id": 3009, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5369230508804321, "incorrect_loss_raw": 0.9828755855560303, "correct_loss_per_char": 0.13423076272010803, "incorrect_loss_per_char": 0.32762519518534344, "correct_loss_per_token": 0.5369230508804321, "incorrect_loss_per_token": 0.9828755855560303, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5369230508804321, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.5369230508804321, "logits_per_char": -0.13423076272010803, "num_chars": 4}, {"sum_logits": -0.9828755855560303, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -0.9828755855560303, "logits_per_char": -0.32762519518534344, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 589, "native_id": 2227, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2986251413822174, "incorrect_loss_raw": 1.4376178979873657, "correct_loss_per_char": 0.09954171379407246, "incorrect_loss_per_char": 0.35940447449684143, "correct_loss_per_token": 0.2986251413822174, "incorrect_loss_per_token": 1.4376178979873657, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4376178979873657, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": false, "logits_per_token": -1.4376178979873657, "logits_per_char": -0.35940447449684143, "num_chars": 4}, {"sum_logits": -0.2986251413822174, "num_tokens": 1, "num_tokens_all": 1082, "is_greedy": true, "logits_per_token": -0.2986251413822174, "logits_per_char": -0.09954171379407246, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 590, "native_id": 3000, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6750432252883911, "incorrect_loss_raw": 0.8204543590545654, "correct_loss_per_char": 0.2250144084294637, "incorrect_loss_per_char": 0.20511358976364136, "correct_loss_per_token": 0.6750432252883911, "incorrect_loss_per_token": 0.8204543590545654, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8204543590545654, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": false, "logits_per_token": -0.8204543590545654, "logits_per_char": -0.20511358976364136, "num_chars": 4}, {"sum_logits": -0.6750432252883911, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": true, "logits_per_token": -0.6750432252883911, "logits_per_char": -0.2250144084294637, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 591, "native_id": 1761, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8940218687057495, "incorrect_loss_raw": 0.6262282133102417, "correct_loss_per_char": 0.22350546717643738, "incorrect_loss_per_char": 0.20874273777008057, "correct_loss_per_token": 0.8940218687057495, "incorrect_loss_per_token": 0.6262282133102417, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8940218687057495, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -0.8940218687057495, "logits_per_char": -0.22350546717643738, "num_chars": 4}, {"sum_logits": -0.6262282133102417, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.6262282133102417, "logits_per_char": -0.20874273777008057, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 592, "native_id": 1819, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.758817732334137, "incorrect_loss_raw": 0.6859663724899292, "correct_loss_per_char": 0.18970443308353424, "incorrect_loss_per_char": 0.22865545749664307, "correct_loss_per_token": 0.758817732334137, "incorrect_loss_per_token": 0.6859663724899292, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.758817732334137, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -0.758817732334137, "logits_per_char": -0.18970443308353424, "num_chars": 4}, {"sum_logits": -0.6859663724899292, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": true, "logits_per_token": -0.6859663724899292, "logits_per_char": -0.22865545749664307, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 593, "native_id": 99, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7066746950149536, "incorrect_loss_raw": 0.7823565602302551, "correct_loss_per_char": 0.2355582316716512, "incorrect_loss_per_char": 0.19558914005756378, "correct_loss_per_token": 0.7066746950149536, "incorrect_loss_per_token": 0.7823565602302551, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7823565602302551, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": false, "logits_per_token": -0.7823565602302551, "logits_per_char": -0.19558914005756378, "num_chars": 4}, {"sum_logits": -0.7066746950149536, "num_tokens": 1, "num_tokens_all": 1051, "is_greedy": true, "logits_per_token": -0.7066746950149536, "logits_per_char": -0.2355582316716512, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 594, "native_id": 2252, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.43468549847602844, "incorrect_loss_raw": 1.138800024986267, "correct_loss_per_char": 0.10867137461900711, "incorrect_loss_per_char": 0.3796000083287557, "correct_loss_per_token": 0.43468549847602844, "incorrect_loss_per_token": 1.138800024986267, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.43468549847602844, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": true, "logits_per_token": -0.43468549847602844, "logits_per_char": -0.10867137461900711, "num_chars": 4}, {"sum_logits": -1.138800024986267, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": false, "logits_per_token": -1.138800024986267, "logits_per_char": -0.3796000083287557, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 595, "native_id": 1656, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.697613000869751, "incorrect_loss_raw": 0.7452473044395447, "correct_loss_per_char": 0.23253766695658365, "incorrect_loss_per_char": 0.18631182610988617, "correct_loss_per_token": 0.697613000869751, "incorrect_loss_per_token": 0.7452473044395447, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7452473044395447, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": false, "logits_per_token": -0.7452473044395447, "logits_per_char": -0.18631182610988617, "num_chars": 4}, {"sum_logits": -0.697613000869751, "num_tokens": 1, "num_tokens_all": 995, "is_greedy": true, "logits_per_token": -0.697613000869751, "logits_per_char": -0.23253766695658365, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 596, "native_id": 283, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5746418237686157, "incorrect_loss_raw": 0.9354551434516907, "correct_loss_per_char": 0.14366045594215393, "incorrect_loss_per_char": 0.31181838115056354, "correct_loss_per_token": 0.5746418237686157, "incorrect_loss_per_token": 0.9354551434516907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5746418237686157, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": true, "logits_per_token": -0.5746418237686157, "logits_per_char": -0.14366045594215393, "num_chars": 4}, {"sum_logits": -0.9354551434516907, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": false, "logits_per_token": -0.9354551434516907, "logits_per_char": -0.31181838115056354, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 597, "native_id": 3223, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8455023765563965, "incorrect_loss_raw": 0.666735053062439, "correct_loss_per_char": 0.28183412551879883, "incorrect_loss_per_char": 0.16668376326560974, "correct_loss_per_token": 0.8455023765563965, "incorrect_loss_per_token": 0.666735053062439, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.666735053062439, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -0.666735053062439, "logits_per_char": -0.16668376326560974, "num_chars": 4}, {"sum_logits": -0.8455023765563965, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -0.8455023765563965, "logits_per_char": -0.28183412551879883, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 598, "native_id": 3253, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8240981698036194, "incorrect_loss_raw": 0.6925041079521179, "correct_loss_per_char": 0.20602454245090485, "incorrect_loss_per_char": 0.23083470265070596, "correct_loss_per_token": 0.8240981698036194, "incorrect_loss_per_token": 0.6925041079521179, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8240981698036194, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": false, "logits_per_token": -0.8240981698036194, "logits_per_char": -0.20602454245090485, "num_chars": 4}, {"sum_logits": -0.6925041079521179, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": true, "logits_per_token": -0.6925041079521179, "logits_per_char": -0.23083470265070596, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 599, "native_id": 1001, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2852025330066681, "incorrect_loss_raw": 1.8251044750213623, "correct_loss_per_char": 0.0950675110022227, "incorrect_loss_per_char": 0.4562761187553406, "correct_loss_per_token": 0.2852025330066681, "incorrect_loss_per_token": 1.8251044750213623, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.8251044750213623, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": false, "logits_per_token": -1.8251044750213623, "logits_per_char": -0.4562761187553406, "num_chars": 4}, {"sum_logits": -0.2852025330066681, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": true, "logits_per_token": -0.2852025330066681, "logits_per_char": -0.0950675110022227, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 600, "native_id": 2647, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.1825549602508545, "incorrect_loss_raw": 0.1551840603351593, "correct_loss_per_char": 0.5456387400627136, "incorrect_loss_per_char": 0.051728020111719765, "correct_loss_per_token": 2.1825549602508545, "incorrect_loss_per_token": 0.1551840603351593, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -2.1825549602508545, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": false, "logits_per_token": -2.1825549602508545, "logits_per_char": -0.5456387400627136, "num_chars": 4}, {"sum_logits": -0.1551840603351593, "num_tokens": 1, "num_tokens_all": 1034, "is_greedy": true, "logits_per_token": -0.1551840603351593, "logits_per_char": -0.051728020111719765, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 601, "native_id": 3055, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8045225739479065, "incorrect_loss_raw": 0.6519755125045776, "correct_loss_per_char": 0.20113064348697662, "incorrect_loss_per_char": 0.21732517083485922, "correct_loss_per_token": 0.8045225739479065, "incorrect_loss_per_token": 0.6519755125045776, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8045225739479065, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": false, "logits_per_token": -0.8045225739479065, "logits_per_char": -0.20113064348697662, "num_chars": 4}, {"sum_logits": -0.6519755125045776, "num_tokens": 1, "num_tokens_all": 956, "is_greedy": true, "logits_per_token": -0.6519755125045776, "logits_per_char": -0.21732517083485922, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 602, "native_id": 2929, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6946398615837097, "incorrect_loss_raw": 0.7574378252029419, "correct_loss_per_char": 0.17365996539592743, "incorrect_loss_per_char": 0.2524792750676473, "correct_loss_per_token": 0.6946398615837097, "incorrect_loss_per_token": 0.7574378252029419, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6946398615837097, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.6946398615837097, "logits_per_char": -0.17365996539592743, "num_chars": 4}, {"sum_logits": -0.7574378252029419, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -0.7574378252029419, "logits_per_char": -0.2524792750676473, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 603, "native_id": 2872, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8819718360900879, "incorrect_loss_raw": 0.5824269652366638, "correct_loss_per_char": 0.22049295902252197, "incorrect_loss_per_char": 0.1941423217455546, "correct_loss_per_token": 0.8819718360900879, "incorrect_loss_per_token": 0.5824269652366638, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8819718360900879, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -0.8819718360900879, "logits_per_char": -0.22049295902252197, "num_chars": 4}, {"sum_logits": -0.5824269652366638, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.5824269652366638, "logits_per_char": -0.1941423217455546, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 604, "native_id": 972, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2813335955142975, "incorrect_loss_raw": 1.5032461881637573, "correct_loss_per_char": 0.0937778651714325, "incorrect_loss_per_char": 0.37581154704093933, "correct_loss_per_token": 0.2813335955142975, "incorrect_loss_per_token": 1.5032461881637573, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5032461881637573, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": false, "logits_per_token": -1.5032461881637573, "logits_per_char": -0.37581154704093933, "num_chars": 4}, {"sum_logits": -0.2813335955142975, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": true, "logits_per_token": -0.2813335955142975, "logits_per_char": -0.0937778651714325, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 605, "native_id": 1239, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5218352675437927, "incorrect_loss_raw": 1.005542278289795, "correct_loss_per_char": 0.17394508918126425, "incorrect_loss_per_char": 0.25138556957244873, "correct_loss_per_token": 0.5218352675437927, "incorrect_loss_per_token": 1.005542278289795, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.005542278289795, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": false, "logits_per_token": -1.005542278289795, "logits_per_char": -0.25138556957244873, "num_chars": 4}, {"sum_logits": -0.5218352675437927, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": true, "logits_per_token": -0.5218352675437927, "logits_per_char": -0.17394508918126425, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 606, "native_id": 2101, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8275244235992432, "incorrect_loss_raw": 0.6567608714103699, "correct_loss_per_char": 0.2068811058998108, "incorrect_loss_per_char": 0.2189202904701233, "correct_loss_per_token": 0.8275244235992432, "incorrect_loss_per_token": 0.6567608714103699, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8275244235992432, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": false, "logits_per_token": -0.8275244235992432, "logits_per_char": -0.2068811058998108, "num_chars": 4}, {"sum_logits": -0.6567608714103699, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": true, "logits_per_token": -0.6567608714103699, "logits_per_char": -0.2189202904701233, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 607, "native_id": 1340, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3758184909820557, "incorrect_loss_raw": 0.38128095865249634, "correct_loss_per_char": 0.3439546227455139, "incorrect_loss_per_char": 0.12709365288416544, "correct_loss_per_token": 1.3758184909820557, "incorrect_loss_per_token": 0.38128095865249634, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3758184909820557, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": false, "logits_per_token": -1.3758184909820557, "logits_per_char": -0.3439546227455139, "num_chars": 4}, {"sum_logits": -0.38128095865249634, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": true, "logits_per_token": -0.38128095865249634, "logits_per_char": -0.12709365288416544, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 608, "native_id": 2127, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28712064027786255, "incorrect_loss_raw": 1.47194504737854, "correct_loss_per_char": 0.09570688009262085, "incorrect_loss_per_char": 0.367986261844635, "correct_loss_per_token": 0.28712064027786255, "incorrect_loss_per_token": 1.47194504737854, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.47194504737854, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": false, "logits_per_token": -1.47194504737854, "logits_per_char": -0.367986261844635, "num_chars": 4}, {"sum_logits": -0.28712064027786255, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": true, "logits_per_token": -0.28712064027786255, "logits_per_char": -0.09570688009262085, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 609, "native_id": 2123, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6636878848075867, "incorrect_loss_raw": 0.8002383708953857, "correct_loss_per_char": 0.22122929493586221, "incorrect_loss_per_char": 0.20005959272384644, "correct_loss_per_token": 0.6636878848075867, "incorrect_loss_per_token": 0.8002383708953857, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8002383708953857, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": false, "logits_per_token": -0.8002383708953857, "logits_per_char": -0.20005959272384644, "num_chars": 4}, {"sum_logits": -0.6636878848075867, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": true, "logits_per_token": -0.6636878848075867, "logits_per_char": -0.22122929493586221, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 610, "native_id": 1851, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.508634090423584, "incorrect_loss_raw": 0.28691524267196655, "correct_loss_per_char": 0.377158522605896, "incorrect_loss_per_char": 0.09563841422398885, "correct_loss_per_token": 1.508634090423584, "incorrect_loss_per_token": 0.28691524267196655, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.508634090423584, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": false, "logits_per_token": -1.508634090423584, "logits_per_char": -0.377158522605896, "num_chars": 4}, {"sum_logits": -0.28691524267196655, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": true, "logits_per_token": -0.28691524267196655, "logits_per_char": -0.09563841422398885, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 611, "native_id": 263, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6418619155883789, "incorrect_loss_raw": 0.7838976383209229, "correct_loss_per_char": 0.16046547889709473, "incorrect_loss_per_char": 0.26129921277364093, "correct_loss_per_token": 0.6418619155883789, "incorrect_loss_per_token": 0.7838976383209229, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6418619155883789, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": true, "logits_per_token": -0.6418619155883789, "logits_per_char": -0.16046547889709473, "num_chars": 4}, {"sum_logits": -0.7838976383209229, "num_tokens": 1, "num_tokens_all": 951, "is_greedy": false, "logits_per_token": -0.7838976383209229, "logits_per_char": -0.26129921277364093, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 612, "native_id": 1240, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5433306694030762, "incorrect_loss_raw": 1.0512839555740356, "correct_loss_per_char": 0.18111022313435873, "incorrect_loss_per_char": 0.2628209888935089, "correct_loss_per_token": 0.5433306694030762, "incorrect_loss_per_token": 1.0512839555740356, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0512839555740356, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -1.0512839555740356, "logits_per_char": -0.2628209888935089, "num_chars": 4}, {"sum_logits": -0.5433306694030762, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.5433306694030762, "logits_per_char": -0.18111022313435873, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 613, "native_id": 106, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5876352190971375, "incorrect_loss_raw": 1.0311068296432495, "correct_loss_per_char": 0.1958784063657125, "incorrect_loss_per_char": 0.2577767074108124, "correct_loss_per_token": 0.5876352190971375, "incorrect_loss_per_token": 1.0311068296432495, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0311068296432495, "num_tokens": 1, "num_tokens_all": 836, "is_greedy": false, "logits_per_token": -1.0311068296432495, "logits_per_char": -0.2577767074108124, "num_chars": 4}, {"sum_logits": -0.5876352190971375, "num_tokens": 1, "num_tokens_all": 836, "is_greedy": true, "logits_per_token": -0.5876352190971375, "logits_per_char": -0.1958784063657125, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 614, "native_id": 2052, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.691318929195404, "incorrect_loss_raw": 0.7654278874397278, "correct_loss_per_char": 0.23043964306513467, "incorrect_loss_per_char": 0.19135697185993195, "correct_loss_per_token": 0.691318929195404, "incorrect_loss_per_token": 0.7654278874397278, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7654278874397278, "num_tokens": 1, "num_tokens_all": 893, "is_greedy": false, "logits_per_token": -0.7654278874397278, "logits_per_char": -0.19135697185993195, "num_chars": 4}, {"sum_logits": -0.691318929195404, "num_tokens": 1, "num_tokens_all": 893, "is_greedy": true, "logits_per_token": -0.691318929195404, "logits_per_char": -0.23043964306513467, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 615, "native_id": 739, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4200488030910492, "incorrect_loss_raw": 1.2222710847854614, "correct_loss_per_char": 0.1050122007727623, "incorrect_loss_per_char": 0.4074236949284871, "correct_loss_per_token": 0.4200488030910492, "incorrect_loss_per_token": 1.2222710847854614, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4200488030910492, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.4200488030910492, "logits_per_char": -0.1050122007727623, "num_chars": 4}, {"sum_logits": -1.2222710847854614, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.2222710847854614, "logits_per_char": -0.4074236949284871, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 616, "native_id": 584, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6053221821784973, "incorrect_loss_raw": 0.9452536702156067, "correct_loss_per_char": 0.15133054554462433, "incorrect_loss_per_char": 0.3150845567385356, "correct_loss_per_token": 0.6053221821784973, "incorrect_loss_per_token": 0.9452536702156067, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6053221821784973, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": true, "logits_per_token": -0.6053221821784973, "logits_per_char": -0.15133054554462433, "num_chars": 4}, {"sum_logits": -0.9452536702156067, "num_tokens": 1, "num_tokens_all": 962, "is_greedy": false, "logits_per_token": -0.9452536702156067, "logits_per_char": -0.3150845567385356, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 617, "native_id": 601, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.338594913482666, "incorrect_loss_raw": 0.3529602289199829, "correct_loss_per_char": 0.3346487283706665, "incorrect_loss_per_char": 0.11765340963999431, "correct_loss_per_token": 1.338594913482666, "incorrect_loss_per_token": 0.3529602289199829, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.338594913482666, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -1.338594913482666, "logits_per_char": -0.3346487283706665, "num_chars": 4}, {"sum_logits": -0.3529602289199829, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -0.3529602289199829, "logits_per_char": -0.11765340963999431, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 618, "native_id": 3034, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6921882033348083, "incorrect_loss_raw": 0.7663375735282898, "correct_loss_per_char": 0.23072940111160278, "incorrect_loss_per_char": 0.19158439338207245, "correct_loss_per_token": 0.6921882033348083, "incorrect_loss_per_token": 0.7663375735282898, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7663375735282898, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -0.7663375735282898, "logits_per_char": -0.19158439338207245, "num_chars": 4}, {"sum_logits": -0.6921882033348083, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.6921882033348083, "logits_per_char": -0.23072940111160278, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 619, "native_id": 1754, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.109118938446045, "incorrect_loss_raw": 0.43882232904434204, "correct_loss_per_char": 0.27727973461151123, "incorrect_loss_per_char": 0.14627410968144736, "correct_loss_per_token": 1.109118938446045, "incorrect_loss_per_token": 0.43882232904434204, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.109118938446045, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -1.109118938446045, "logits_per_char": -0.27727973461151123, "num_chars": 4}, {"sum_logits": -0.43882232904434204, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.43882232904434204, "logits_per_char": -0.14627410968144736, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 620, "native_id": 725, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0596027374267578, "incorrect_loss_raw": 0.4629303812980652, "correct_loss_per_char": 0.26490068435668945, "incorrect_loss_per_char": 0.15431012709935507, "correct_loss_per_token": 1.0596027374267578, "incorrect_loss_per_token": 0.4629303812980652, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0596027374267578, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": false, "logits_per_token": -1.0596027374267578, "logits_per_char": -0.26490068435668945, "num_chars": 4}, {"sum_logits": -0.4629303812980652, "num_tokens": 1, "num_tokens_all": 1080, "is_greedy": true, "logits_per_token": -0.4629303812980652, "logits_per_char": -0.15431012709935507, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 621, "native_id": 2160, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8596264123916626, "incorrect_loss_raw": 0.6870379447937012, "correct_loss_per_char": 0.2865421374638875, "incorrect_loss_per_char": 0.1717594861984253, "correct_loss_per_token": 0.8596264123916626, "incorrect_loss_per_token": 0.6870379447937012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6870379447937012, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": true, "logits_per_token": -0.6870379447937012, "logits_per_char": -0.1717594861984253, "num_chars": 4}, {"sum_logits": -0.8596264123916626, "num_tokens": 1, "num_tokens_all": 1105, "is_greedy": false, "logits_per_token": -0.8596264123916626, "logits_per_char": -0.2865421374638875, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 622, "native_id": 560, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.36007222533226013, "incorrect_loss_raw": 1.4500378370285034, "correct_loss_per_char": 0.12002407511075337, "incorrect_loss_per_char": 0.36250945925712585, "correct_loss_per_token": 0.36007222533226013, "incorrect_loss_per_token": 1.4500378370285034, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4500378370285034, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.4500378370285034, "logits_per_char": -0.36250945925712585, "num_chars": 4}, {"sum_logits": -0.36007222533226013, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.36007222533226013, "logits_per_char": -0.12002407511075337, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 623, "native_id": 1234, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.715594470500946, "incorrect_loss_raw": 0.7618501782417297, "correct_loss_per_char": 0.23853149016698202, "incorrect_loss_per_char": 0.19046254456043243, "correct_loss_per_token": 0.715594470500946, "incorrect_loss_per_token": 0.7618501782417297, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7618501782417297, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -0.7618501782417297, "logits_per_char": -0.19046254456043243, "num_chars": 4}, {"sum_logits": -0.715594470500946, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -0.715594470500946, "logits_per_char": -0.23853149016698202, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 624, "native_id": 384, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5906850099563599, "incorrect_loss_raw": 1.054763674736023, "correct_loss_per_char": 0.14767125248908997, "incorrect_loss_per_char": 0.3515878915786743, "correct_loss_per_token": 0.5906850099563599, "incorrect_loss_per_token": 1.054763674736023, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5906850099563599, "num_tokens": 1, "num_tokens_all": 1185, "is_greedy": true, "logits_per_token": -0.5906850099563599, "logits_per_char": -0.14767125248908997, "num_chars": 4}, {"sum_logits": -1.054763674736023, "num_tokens": 1, "num_tokens_all": 1185, "is_greedy": false, "logits_per_token": -1.054763674736023, "logits_per_char": -0.3515878915786743, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 625, "native_id": 2000, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6193434596061707, "incorrect_loss_raw": 0.8827477693557739, "correct_loss_per_char": 0.20644781986872354, "incorrect_loss_per_char": 0.22068694233894348, "correct_loss_per_token": 0.6193434596061707, "incorrect_loss_per_token": 0.8827477693557739, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8827477693557739, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -0.8827477693557739, "logits_per_char": -0.22068694233894348, "num_chars": 4}, {"sum_logits": -0.6193434596061707, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.6193434596061707, "logits_per_char": -0.20644781986872354, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 626, "native_id": 2214, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5462811589241028, "incorrect_loss_raw": 0.9541993737220764, "correct_loss_per_char": 0.18209371964136759, "incorrect_loss_per_char": 0.2385498434305191, "correct_loss_per_token": 0.5462811589241028, "incorrect_loss_per_token": 0.9541993737220764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9541993737220764, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": false, "logits_per_token": -0.9541993737220764, "logits_per_char": -0.2385498434305191, "num_chars": 4}, {"sum_logits": -0.5462811589241028, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": true, "logits_per_token": -0.5462811589241028, "logits_per_char": -0.18209371964136759, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 627, "native_id": 2742, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32094481587409973, "incorrect_loss_raw": 1.3879640102386475, "correct_loss_per_char": 0.10698160529136658, "incorrect_loss_per_char": 0.34699100255966187, "correct_loss_per_token": 0.32094481587409973, "incorrect_loss_per_token": 1.3879640102386475, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3879640102386475, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -1.3879640102386475, "logits_per_char": -0.34699100255966187, "num_chars": 4}, {"sum_logits": -0.32094481587409973, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -0.32094481587409973, "logits_per_char": -0.10698160529136658, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 628, "native_id": 2462, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22600410878658295, "incorrect_loss_raw": 1.7285946607589722, "correct_loss_per_char": 0.07533470292886098, "incorrect_loss_per_char": 0.43214866518974304, "correct_loss_per_token": 0.22600410878658295, "incorrect_loss_per_token": 1.7285946607589722, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7285946607589722, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": false, "logits_per_token": -1.7285946607589722, "logits_per_char": -0.43214866518974304, "num_chars": 4}, {"sum_logits": -0.22600410878658295, "num_tokens": 1, "num_tokens_all": 992, "is_greedy": true, "logits_per_token": -0.22600410878658295, "logits_per_char": -0.07533470292886098, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 629, "native_id": 547, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.48946237564086914, "incorrect_loss_raw": 1.0108091831207275, "correct_loss_per_char": 0.12236559391021729, "incorrect_loss_per_char": 0.33693639437357586, "correct_loss_per_token": 0.48946237564086914, "incorrect_loss_per_token": 1.0108091831207275, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.48946237564086914, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.48946237564086914, "logits_per_char": -0.12236559391021729, "num_chars": 4}, {"sum_logits": -1.0108091831207275, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.0108091831207275, "logits_per_char": -0.33693639437357586, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 630, "native_id": 1093, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.130630612373352, "incorrect_loss_raw": 0.4218050539493561, "correct_loss_per_char": 0.37687687079111737, "incorrect_loss_per_char": 0.10545126348733902, "correct_loss_per_token": 1.130630612373352, "incorrect_loss_per_token": 0.4218050539493561, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4218050539493561, "num_tokens": 1, "num_tokens_all": 1043, "is_greedy": true, "logits_per_token": -0.4218050539493561, "logits_per_char": -0.10545126348733902, "num_chars": 4}, {"sum_logits": -1.130630612373352, "num_tokens": 1, "num_tokens_all": 1043, "is_greedy": false, "logits_per_token": -1.130630612373352, "logits_per_char": -0.37687687079111737, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 631, "native_id": 1765, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.151872158050537, "incorrect_loss_raw": 0.40908583998680115, "correct_loss_per_char": 0.2879680395126343, "incorrect_loss_per_char": 0.13636194666226706, "correct_loss_per_token": 1.151872158050537, "incorrect_loss_per_token": 0.40908583998680115, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.151872158050537, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": false, "logits_per_token": -1.151872158050537, "logits_per_char": -0.2879680395126343, "num_chars": 4}, {"sum_logits": -0.40908583998680115, "num_tokens": 1, "num_tokens_all": 1056, "is_greedy": true, "logits_per_token": -0.40908583998680115, "logits_per_char": -0.13636194666226706, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 632, "native_id": 1933, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9978611469268799, "incorrect_loss_raw": 0.498812735080719, "correct_loss_per_char": 0.24946528673171997, "incorrect_loss_per_char": 0.166270911693573, "correct_loss_per_token": 0.9978611469268799, "incorrect_loss_per_token": 0.498812735080719, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9978611469268799, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -0.9978611469268799, "logits_per_char": -0.24946528673171997, "num_chars": 4}, {"sum_logits": -0.498812735080719, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -0.498812735080719, "logits_per_char": -0.166270911693573, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 633, "native_id": 1141, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4675876498222351, "incorrect_loss_raw": 1.1629364490509033, "correct_loss_per_char": 0.15586254994074503, "incorrect_loss_per_char": 0.29073411226272583, "correct_loss_per_token": 0.4675876498222351, "incorrect_loss_per_token": 1.1629364490509033, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1629364490509033, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -1.1629364490509033, "logits_per_char": -0.29073411226272583, "num_chars": 4}, {"sum_logits": -0.4675876498222351, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.4675876498222351, "logits_per_char": -0.15586254994074503, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 634, "native_id": 1292, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6129735708236694, "incorrect_loss_raw": 0.8295429348945618, "correct_loss_per_char": 0.2043245236078898, "incorrect_loss_per_char": 0.20738573372364044, "correct_loss_per_token": 0.6129735708236694, "incorrect_loss_per_token": 0.8295429348945618, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8295429348945618, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -0.8295429348945618, "logits_per_char": -0.20738573372364044, "num_chars": 4}, {"sum_logits": -0.6129735708236694, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": true, "logits_per_token": -0.6129735708236694, "logits_per_char": -0.2043245236078898, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 635, "native_id": 686, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4741748571395874, "incorrect_loss_raw": 1.1419590711593628, "correct_loss_per_char": 0.1580582857131958, "incorrect_loss_per_char": 0.2854897677898407, "correct_loss_per_token": 0.4741748571395874, "incorrect_loss_per_token": 1.1419590711593628, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1419590711593628, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": false, "logits_per_token": -1.1419590711593628, "logits_per_char": -0.2854897677898407, "num_chars": 4}, {"sum_logits": -0.4741748571395874, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": true, "logits_per_token": -0.4741748571395874, "logits_per_char": -0.1580582857131958, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 636, "native_id": 270, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.33701491355896, "incorrect_loss_raw": 0.328022301197052, "correct_loss_per_char": 0.33425372838974, "incorrect_loss_per_char": 0.109340767065684, "correct_loss_per_token": 1.33701491355896, "incorrect_loss_per_token": 0.328022301197052, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.33701491355896, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -1.33701491355896, "logits_per_char": -0.33425372838974, "num_chars": 4}, {"sum_logits": -0.328022301197052, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.328022301197052, "logits_per_char": -0.109340767065684, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 637, "native_id": 1799, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8034473657608032, "incorrect_loss_raw": 0.6322982907295227, "correct_loss_per_char": 0.2008618414402008, "incorrect_loss_per_char": 0.2107660969098409, "correct_loss_per_token": 0.8034473657608032, "incorrect_loss_per_token": 0.6322982907295227, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8034473657608032, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": false, "logits_per_token": -0.8034473657608032, "logits_per_char": -0.2008618414402008, "num_chars": 4}, {"sum_logits": -0.6322982907295227, "num_tokens": 1, "num_tokens_all": 1058, "is_greedy": true, "logits_per_token": -0.6322982907295227, "logits_per_char": -0.2107660969098409, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 638, "native_id": 943, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7435672879219055, "incorrect_loss_raw": 0.7201786637306213, "correct_loss_per_char": 0.24785576264063516, "incorrect_loss_per_char": 0.18004466593265533, "correct_loss_per_token": 0.7435672879219055, "incorrect_loss_per_token": 0.7201786637306213, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7201786637306213, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.7201786637306213, "logits_per_char": -0.18004466593265533, "num_chars": 4}, {"sum_logits": -0.7435672879219055, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -0.7435672879219055, "logits_per_char": -0.24785576264063516, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 639, "native_id": 1811, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0831927061080933, "incorrect_loss_raw": 0.49026238918304443, "correct_loss_per_char": 0.2707981765270233, "incorrect_loss_per_char": 0.16342079639434814, "correct_loss_per_token": 1.0831927061080933, "incorrect_loss_per_token": 0.49026238918304443, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0831927061080933, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -1.0831927061080933, "logits_per_char": -0.2707981765270233, "num_chars": 4}, {"sum_logits": -0.49026238918304443, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.49026238918304443, "logits_per_char": -0.16342079639434814, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 640, "native_id": 1022, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.040390968322754, "incorrect_loss_raw": 0.46521681547164917, "correct_loss_per_char": 0.2600977420806885, "incorrect_loss_per_char": 0.15507227182388306, "correct_loss_per_token": 1.040390968322754, "incorrect_loss_per_token": 0.46521681547164917, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.040390968322754, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": false, "logits_per_token": -1.040390968322754, "logits_per_char": -0.2600977420806885, "num_chars": 4}, {"sum_logits": -0.46521681547164917, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": true, "logits_per_token": -0.46521681547164917, "logits_per_char": -0.15507227182388306, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 641, "native_id": 273, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5358483195304871, "incorrect_loss_raw": 0.9862020611763, "correct_loss_per_char": 0.13396207988262177, "incorrect_loss_per_char": 0.32873402039210003, "correct_loss_per_token": 0.5358483195304871, "incorrect_loss_per_token": 0.9862020611763, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5358483195304871, "num_tokens": 1, "num_tokens_all": 861, "is_greedy": true, "logits_per_token": -0.5358483195304871, "logits_per_char": -0.13396207988262177, "num_chars": 4}, {"sum_logits": -0.9862020611763, "num_tokens": 1, "num_tokens_all": 861, "is_greedy": false, "logits_per_token": -0.9862020611763, "logits_per_char": -0.32873402039210003, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 642, "native_id": 1092, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5145736932754517, "incorrect_loss_raw": 0.9734487533569336, "correct_loss_per_char": 0.17152456442515054, "incorrect_loss_per_char": 0.2433621883392334, "correct_loss_per_token": 0.5145736932754517, "incorrect_loss_per_token": 0.9734487533569336, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9734487533569336, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -0.9734487533569336, "logits_per_char": -0.2433621883392334, "num_chars": 4}, {"sum_logits": -0.5145736932754517, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": true, "logits_per_token": -0.5145736932754517, "logits_per_char": -0.17152456442515054, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 643, "native_id": 2709, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8065494894981384, "incorrect_loss_raw": 0.6861383318901062, "correct_loss_per_char": 0.2016373723745346, "incorrect_loss_per_char": 0.22871277729670206, "correct_loss_per_token": 0.8065494894981384, "incorrect_loss_per_token": 0.6861383318901062, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8065494894981384, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -0.8065494894981384, "logits_per_char": -0.2016373723745346, "num_chars": 4}, {"sum_logits": -0.6861383318901062, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.6861383318901062, "logits_per_char": -0.22871277729670206, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 644, "native_id": 2578, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4225560426712036, "incorrect_loss_raw": 0.29606080055236816, "correct_loss_per_char": 0.3556390106678009, "incorrect_loss_per_char": 0.09868693351745605, "correct_loss_per_token": 1.4225560426712036, "incorrect_loss_per_token": 0.29606080055236816, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4225560426712036, "num_tokens": 1, "num_tokens_all": 893, "is_greedy": false, "logits_per_token": -1.4225560426712036, "logits_per_char": -0.3556390106678009, "num_chars": 4}, {"sum_logits": -0.29606080055236816, "num_tokens": 1, "num_tokens_all": 893, "is_greedy": true, "logits_per_token": -0.29606080055236816, "logits_per_char": -0.09868693351745605, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 645, "native_id": 2299, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26078417897224426, "incorrect_loss_raw": 1.6793475151062012, "correct_loss_per_char": 0.08692805965741475, "incorrect_loss_per_char": 0.4198368787765503, "correct_loss_per_token": 0.26078417897224426, "incorrect_loss_per_token": 1.6793475151062012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6793475151062012, "num_tokens": 1, "num_tokens_all": 1011, "is_greedy": false, "logits_per_token": -1.6793475151062012, "logits_per_char": -0.4198368787765503, "num_chars": 4}, {"sum_logits": -0.26078417897224426, "num_tokens": 1, "num_tokens_all": 1011, "is_greedy": true, "logits_per_token": -0.26078417897224426, "logits_per_char": -0.08692805965741475, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 646, "native_id": 3033, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27740851044654846, "incorrect_loss_raw": 1.637743353843689, "correct_loss_per_char": 0.09246950348218282, "incorrect_loss_per_char": 0.40943583846092224, "correct_loss_per_token": 0.27740851044654846, "incorrect_loss_per_token": 1.637743353843689, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.637743353843689, "num_tokens": 1, "num_tokens_all": 861, "is_greedy": false, "logits_per_token": -1.637743353843689, "logits_per_char": -0.40943583846092224, "num_chars": 4}, {"sum_logits": -0.27740851044654846, "num_tokens": 1, "num_tokens_all": 861, "is_greedy": true, "logits_per_token": -0.27740851044654846, "logits_per_char": -0.09246950348218282, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 647, "native_id": 3076, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5342320203781128, "incorrect_loss_raw": 0.9828306436538696, "correct_loss_per_char": 0.1335580050945282, "incorrect_loss_per_char": 0.32761021455128986, "correct_loss_per_token": 0.5342320203781128, "incorrect_loss_per_token": 0.9828306436538696, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5342320203781128, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.5342320203781128, "logits_per_char": -0.1335580050945282, "num_chars": 4}, {"sum_logits": -0.9828306436538696, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -0.9828306436538696, "logits_per_char": -0.32761021455128986, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 648, "native_id": 1614, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.695528507232666, "incorrect_loss_raw": 0.7332918643951416, "correct_loss_per_char": 0.1738821268081665, "incorrect_loss_per_char": 0.2444306214650472, "correct_loss_per_token": 0.695528507232666, "incorrect_loss_per_token": 0.7332918643951416, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.695528507232666, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": true, "logits_per_token": -0.695528507232666, "logits_per_char": -0.1738821268081665, "num_chars": 4}, {"sum_logits": -0.7332918643951416, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": false, "logits_per_token": -0.7332918643951416, "logits_per_char": -0.2444306214650472, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 649, "native_id": 892, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8317258954048157, "incorrect_loss_raw": 0.6213054060935974, "correct_loss_per_char": 0.20793147385120392, "incorrect_loss_per_char": 0.20710180203119913, "correct_loss_per_token": 0.8317258954048157, "incorrect_loss_per_token": 0.6213054060935974, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8317258954048157, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -0.8317258954048157, "logits_per_char": -0.20793147385120392, "num_chars": 4}, {"sum_logits": -0.6213054060935974, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.6213054060935974, "logits_per_char": -0.20710180203119913, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 650, "native_id": 823, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1437454223632812, "incorrect_loss_raw": 0.5087850093841553, "correct_loss_per_char": 0.2859363555908203, "incorrect_loss_per_char": 0.16959500312805176, "correct_loss_per_token": 1.1437454223632812, "incorrect_loss_per_token": 0.5087850093841553, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1437454223632812, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -1.1437454223632812, "logits_per_char": -0.2859363555908203, "num_chars": 4}, {"sum_logits": -0.5087850093841553, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.5087850093841553, "logits_per_char": -0.16959500312805176, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 651, "native_id": 2295, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6047067642211914, "incorrect_loss_raw": 0.2788715958595276, "correct_loss_per_char": 0.40117669105529785, "incorrect_loss_per_char": 0.09295719861984253, "correct_loss_per_token": 1.6047067642211914, "incorrect_loss_per_token": 0.2788715958595276, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6047067642211914, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": false, "logits_per_token": -1.6047067642211914, "logits_per_char": -0.40117669105529785, "num_chars": 4}, {"sum_logits": -0.2788715958595276, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": true, "logits_per_token": -0.2788715958595276, "logits_per_char": -0.09295719861984253, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 652, "native_id": 2139, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5649203062057495, "incorrect_loss_raw": 0.9887381792068481, "correct_loss_per_char": 0.14123007655143738, "incorrect_loss_per_char": 0.3295793930689494, "correct_loss_per_token": 0.5649203062057495, "incorrect_loss_per_token": 0.9887381792068481, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5649203062057495, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -0.5649203062057495, "logits_per_char": -0.14123007655143738, "num_chars": 4}, {"sum_logits": -0.9887381792068481, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -0.9887381792068481, "logits_per_char": -0.3295793930689494, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 653, "native_id": 598, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8207383751869202, "incorrect_loss_raw": 0.6232336163520813, "correct_loss_per_char": 0.20518459379673004, "incorrect_loss_per_char": 0.2077445387840271, "correct_loss_per_token": 0.8207383751869202, "incorrect_loss_per_token": 0.6232336163520813, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8207383751869202, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -0.8207383751869202, "logits_per_char": -0.20518459379673004, "num_chars": 4}, {"sum_logits": -0.6232336163520813, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.6232336163520813, "logits_per_char": -0.2077445387840271, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 654, "native_id": 868, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6698975563049316, "incorrect_loss_raw": 0.24549035727977753, "correct_loss_per_char": 0.4174743890762329, "incorrect_loss_per_char": 0.08183011909325917, "correct_loss_per_token": 1.6698975563049316, "incorrect_loss_per_token": 0.24549035727977753, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6698975563049316, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.6698975563049316, "logits_per_char": -0.4174743890762329, "num_chars": 4}, {"sum_logits": -0.24549035727977753, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": true, "logits_per_token": -0.24549035727977753, "logits_per_char": -0.08183011909325917, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 655, "native_id": 1403, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.42400696873664856, "incorrect_loss_raw": 1.203932762145996, "correct_loss_per_char": 0.14133565624554953, "incorrect_loss_per_char": 0.300983190536499, "correct_loss_per_token": 0.42400696873664856, "incorrect_loss_per_token": 1.203932762145996, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.203932762145996, "num_tokens": 1, "num_tokens_all": 893, "is_greedy": false, "logits_per_token": -1.203932762145996, "logits_per_char": -0.300983190536499, "num_chars": 4}, {"sum_logits": -0.42400696873664856, "num_tokens": 1, "num_tokens_all": 893, "is_greedy": true, "logits_per_token": -0.42400696873664856, "logits_per_char": -0.14133565624554953, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 656, "native_id": 2531, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7093474864959717, "incorrect_loss_raw": 0.828731894493103, "correct_loss_per_char": 0.17733687162399292, "incorrect_loss_per_char": 0.27624396483103436, "correct_loss_per_token": 0.7093474864959717, "incorrect_loss_per_token": 0.828731894493103, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7093474864959717, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": true, "logits_per_token": -0.7093474864959717, "logits_per_char": -0.17733687162399292, "num_chars": 4}, {"sum_logits": -0.828731894493103, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": false, "logits_per_token": -0.828731894493103, "logits_per_char": -0.27624396483103436, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 657, "native_id": 1692, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0134416818618774, "incorrect_loss_raw": 0.5429239869117737, "correct_loss_per_char": 0.25336042046546936, "incorrect_loss_per_char": 0.18097466230392456, "correct_loss_per_token": 1.0134416818618774, "incorrect_loss_per_token": 0.5429239869117737, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0134416818618774, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": false, "logits_per_token": -1.0134416818618774, "logits_per_char": -0.25336042046546936, "num_chars": 4}, {"sum_logits": -0.5429239869117737, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": true, "logits_per_token": -0.5429239869117737, "logits_per_char": -0.18097466230392456, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 658, "native_id": 7, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.197936773300171, "incorrect_loss_raw": 0.5026336312294006, "correct_loss_per_char": 0.2994841933250427, "incorrect_loss_per_char": 0.16754454374313354, "correct_loss_per_token": 1.197936773300171, "incorrect_loss_per_token": 0.5026336312294006, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.197936773300171, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -1.197936773300171, "logits_per_char": -0.2994841933250427, "num_chars": 4}, {"sum_logits": -0.5026336312294006, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.5026336312294006, "logits_per_char": -0.16754454374313354, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 659, "native_id": 2660, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.005981683731079, "incorrect_loss_raw": 0.5591539740562439, "correct_loss_per_char": 0.3353272279103597, "incorrect_loss_per_char": 0.13978849351406097, "correct_loss_per_token": 1.005981683731079, "incorrect_loss_per_token": 0.5591539740562439, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5591539740562439, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -0.5591539740562439, "logits_per_char": -0.13978849351406097, "num_chars": 4}, {"sum_logits": -1.005981683731079, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.005981683731079, "logits_per_char": -0.3353272279103597, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 660, "native_id": 3190, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.48897063732147217, "incorrect_loss_raw": 1.0886991024017334, "correct_loss_per_char": 0.16299021244049072, "incorrect_loss_per_char": 0.27217477560043335, "correct_loss_per_token": 0.48897063732147217, "incorrect_loss_per_token": 1.0886991024017334, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0886991024017334, "num_tokens": 1, "num_tokens_all": 1457, "is_greedy": false, "logits_per_token": -1.0886991024017334, "logits_per_char": -0.27217477560043335, "num_chars": 4}, {"sum_logits": -0.48897063732147217, "num_tokens": 1, "num_tokens_all": 1457, "is_greedy": true, "logits_per_token": -0.48897063732147217, "logits_per_char": -0.16299021244049072, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 661, "native_id": 783, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6312044262886047, "incorrect_loss_raw": 0.807168185710907, "correct_loss_per_char": 0.2104014754295349, "incorrect_loss_per_char": 0.20179204642772675, "correct_loss_per_token": 0.6312044262886047, "incorrect_loss_per_token": 0.807168185710907, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.807168185710907, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": false, "logits_per_token": -0.807168185710907, "logits_per_char": -0.20179204642772675, "num_chars": 4}, {"sum_logits": -0.6312044262886047, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": true, "logits_per_token": -0.6312044262886047, "logits_per_char": -0.2104014754295349, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 662, "native_id": 916, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1163785457611084, "incorrect_loss_raw": 0.5003928542137146, "correct_loss_per_char": 0.2790946364402771, "incorrect_loss_per_char": 0.1667976180712382, "correct_loss_per_token": 1.1163785457611084, "incorrect_loss_per_token": 0.5003928542137146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1163785457611084, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": false, "logits_per_token": -1.1163785457611084, "logits_per_char": -0.2790946364402771, "num_chars": 4}, {"sum_logits": -0.5003928542137146, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": true, "logits_per_token": -0.5003928542137146, "logits_per_char": -0.1667976180712382, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 663, "native_id": 2266, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0065891742706299, "incorrect_loss_raw": 0.5196868777275085, "correct_loss_per_char": 0.25164729356765747, "incorrect_loss_per_char": 0.17322895924250284, "correct_loss_per_token": 1.0065891742706299, "incorrect_loss_per_token": 0.5196868777275085, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0065891742706299, "num_tokens": 1, "num_tokens_all": 1011, "is_greedy": false, "logits_per_token": -1.0065891742706299, "logits_per_char": -0.25164729356765747, "num_chars": 4}, {"sum_logits": -0.5196868777275085, "num_tokens": 1, "num_tokens_all": 1011, "is_greedy": true, "logits_per_token": -0.5196868777275085, "logits_per_char": -0.17322895924250284, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 664, "native_id": 67, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5711149573326111, "incorrect_loss_raw": 1.0144908428192139, "correct_loss_per_char": 0.1903716524442037, "incorrect_loss_per_char": 0.25362271070480347, "correct_loss_per_token": 0.5711149573326111, "incorrect_loss_per_token": 1.0144908428192139, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0144908428192139, "num_tokens": 1, "num_tokens_all": 855, "is_greedy": false, "logits_per_token": -1.0144908428192139, "logits_per_char": -0.25362271070480347, "num_chars": 4}, {"sum_logits": -0.5711149573326111, "num_tokens": 1, "num_tokens_all": 855, "is_greedy": true, "logits_per_token": -0.5711149573326111, "logits_per_char": -0.1903716524442037, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 665, "native_id": 2848, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8703339099884033, "incorrect_loss_raw": 0.6039667129516602, "correct_loss_per_char": 0.21758347749710083, "incorrect_loss_per_char": 0.20132223765055338, "correct_loss_per_token": 0.8703339099884033, "incorrect_loss_per_token": 0.6039667129516602, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8703339099884033, "num_tokens": 1, "num_tokens_all": 880, "is_greedy": false, "logits_per_token": -0.8703339099884033, "logits_per_char": -0.21758347749710083, "num_chars": 4}, {"sum_logits": -0.6039667129516602, "num_tokens": 1, "num_tokens_all": 880, "is_greedy": true, "logits_per_token": -0.6039667129516602, "logits_per_char": -0.20132223765055338, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 666, "native_id": 1487, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6723889708518982, "incorrect_loss_raw": 0.7817361354827881, "correct_loss_per_char": 0.16809724271297455, "incorrect_loss_per_char": 0.260578711827596, "correct_loss_per_token": 0.6723889708518982, "incorrect_loss_per_token": 0.7817361354827881, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6723889708518982, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -0.6723889708518982, "logits_per_char": -0.16809724271297455, "num_chars": 4}, {"sum_logits": -0.7817361354827881, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -0.7817361354827881, "logits_per_char": -0.260578711827596, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 667, "native_id": 1803, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7366189360618591, "incorrect_loss_raw": 0.7529943585395813, "correct_loss_per_char": 0.18415473401546478, "incorrect_loss_per_char": 0.2509981195131938, "correct_loss_per_token": 0.7366189360618591, "incorrect_loss_per_token": 0.7529943585395813, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7366189360618591, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": true, "logits_per_token": -0.7366189360618591, "logits_per_char": -0.18415473401546478, "num_chars": 4}, {"sum_logits": -0.7529943585395813, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -0.7529943585395813, "logits_per_char": -0.2509981195131938, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 668, "native_id": 968, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0280094146728516, "incorrect_loss_raw": 0.4987679421901703, "correct_loss_per_char": 0.2570023536682129, "incorrect_loss_per_char": 0.16625598073005676, "correct_loss_per_token": 1.0280094146728516, "incorrect_loss_per_token": 0.4987679421901703, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0280094146728516, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.0280094146728516, "logits_per_char": -0.2570023536682129, "num_chars": 4}, {"sum_logits": -0.4987679421901703, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.4987679421901703, "logits_per_char": -0.16625598073005676, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 669, "native_id": 45, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2970834970474243, "incorrect_loss_raw": 0.36515215039253235, "correct_loss_per_char": 0.3242708742618561, "incorrect_loss_per_char": 0.12171738346417744, "correct_loss_per_token": 1.2970834970474243, "incorrect_loss_per_token": 0.36515215039253235, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2970834970474243, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -1.2970834970474243, "logits_per_char": -0.3242708742618561, "num_chars": 4}, {"sum_logits": -0.36515215039253235, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.36515215039253235, "logits_per_char": -0.12171738346417744, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 670, "native_id": 1697, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6517547965049744, "incorrect_loss_raw": 0.7895494699478149, "correct_loss_per_char": 0.21725159883499146, "incorrect_loss_per_char": 0.19738736748695374, "correct_loss_per_token": 0.6517547965049744, "incorrect_loss_per_token": 0.7895494699478149, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7895494699478149, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": false, "logits_per_token": -0.7895494699478149, "logits_per_char": -0.19738736748695374, "num_chars": 4}, {"sum_logits": -0.6517547965049744, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": true, "logits_per_token": -0.6517547965049744, "logits_per_char": -0.21725159883499146, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 671, "native_id": 1729, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8059006333351135, "incorrect_loss_raw": 0.6321761012077332, "correct_loss_per_char": 0.20147515833377838, "incorrect_loss_per_char": 0.21072536706924438, "correct_loss_per_token": 0.8059006333351135, "incorrect_loss_per_token": 0.6321761012077332, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8059006333351135, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": false, "logits_per_token": -0.8059006333351135, "logits_per_char": -0.20147515833377838, "num_chars": 4}, {"sum_logits": -0.6321761012077332, "num_tokens": 1, "num_tokens_all": 948, "is_greedy": true, "logits_per_token": -0.6321761012077332, "logits_per_char": -0.21072536706924438, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 672, "native_id": 2034, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6140682101249695, "incorrect_loss_raw": 0.8418951034545898, "correct_loss_per_char": 0.15351705253124237, "incorrect_loss_per_char": 0.28063170115152997, "correct_loss_per_token": 0.6140682101249695, "incorrect_loss_per_token": 0.8418951034545898, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6140682101249695, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.6140682101249695, "logits_per_char": -0.15351705253124237, "num_chars": 4}, {"sum_logits": -0.8418951034545898, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -0.8418951034545898, "logits_per_char": -0.28063170115152997, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 673, "native_id": 1727, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6156080961227417, "incorrect_loss_raw": 0.8996953964233398, "correct_loss_per_char": 0.15390202403068542, "incorrect_loss_per_char": 0.2998984654744466, "correct_loss_per_token": 0.6156080961227417, "incorrect_loss_per_token": 0.8996953964233398, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6156080961227417, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.6156080961227417, "logits_per_char": -0.15390202403068542, "num_chars": 4}, {"sum_logits": -0.8996953964233398, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -0.8996953964233398, "logits_per_char": -0.2998984654744466, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 674, "native_id": 2981, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 2.4510087966918945, "incorrect_loss_raw": 0.11402730643749237, "correct_loss_per_char": 0.6127521991729736, "incorrect_loss_per_char": 0.03800910214583079, "correct_loss_per_token": 2.4510087966918945, "incorrect_loss_per_token": 0.11402730643749237, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -2.4510087966918945, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": false, "logits_per_token": -2.4510087966918945, "logits_per_char": -0.6127521991729736, "num_chars": 4}, {"sum_logits": -0.11402730643749237, "num_tokens": 1, "num_tokens_all": 883, "is_greedy": true, "logits_per_token": -0.11402730643749237, "logits_per_char": -0.03800910214583079, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 675, "native_id": 3164, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.712939977645874, "incorrect_loss_raw": 0.24775047600269318, "correct_loss_per_char": 0.4282349944114685, "incorrect_loss_per_char": 0.08258349200089772, "correct_loss_per_token": 1.712939977645874, "incorrect_loss_per_token": 0.24775047600269318, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.712939977645874, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": false, "logits_per_token": -1.712939977645874, "logits_per_char": -0.4282349944114685, "num_chars": 4}, {"sum_logits": -0.24775047600269318, "num_tokens": 1, "num_tokens_all": 974, "is_greedy": true, "logits_per_token": -0.24775047600269318, "logits_per_char": -0.08258349200089772, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 676, "native_id": 2610, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5914899110794067, "incorrect_loss_raw": 0.9226021766662598, "correct_loss_per_char": 0.14787247776985168, "incorrect_loss_per_char": 0.30753405888875324, "correct_loss_per_token": 0.5914899110794067, "incorrect_loss_per_token": 0.9226021766662598, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5914899110794067, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.5914899110794067, "logits_per_char": -0.14787247776985168, "num_chars": 4}, {"sum_logits": -0.9226021766662598, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -0.9226021766662598, "logits_per_char": -0.30753405888875324, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 677, "native_id": 1021, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.386625051498413, "incorrect_loss_raw": 0.31229573488235474, "correct_loss_per_char": 0.34665626287460327, "incorrect_loss_per_char": 0.10409857829411824, "correct_loss_per_token": 1.386625051498413, "incorrect_loss_per_token": 0.31229573488235474, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.386625051498413, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": false, "logits_per_token": -1.386625051498413, "logits_per_char": -0.34665626287460327, "num_chars": 4}, {"sum_logits": -0.31229573488235474, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": true, "logits_per_token": -0.31229573488235474, "logits_per_char": -0.10409857829411824, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 678, "native_id": 2403, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8734967708587646, "incorrect_loss_raw": 0.6050245761871338, "correct_loss_per_char": 0.21837419271469116, "incorrect_loss_per_char": 0.2016748587290446, "correct_loss_per_token": 0.8734967708587646, "incorrect_loss_per_token": 0.6050245761871338, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8734967708587646, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -0.8734967708587646, "logits_per_char": -0.21837419271469116, "num_chars": 4}, {"sum_logits": -0.6050245761871338, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -0.6050245761871338, "logits_per_char": -0.2016748587290446, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 679, "native_id": 3216, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3735159635543823, "incorrect_loss_raw": 1.2342684268951416, "correct_loss_per_char": 0.12450532118479411, "incorrect_loss_per_char": 0.3085671067237854, "correct_loss_per_token": 0.3735159635543823, "incorrect_loss_per_token": 1.2342684268951416, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2342684268951416, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -1.2342684268951416, "logits_per_char": -0.3085671067237854, "num_chars": 4}, {"sum_logits": -0.3735159635543823, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": true, "logits_per_token": -0.3735159635543823, "logits_per_char": -0.12450532118479411, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 680, "native_id": 2308, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.33975639939308167, "incorrect_loss_raw": 1.3500902652740479, "correct_loss_per_char": 0.11325213313102722, "incorrect_loss_per_char": 0.33752256631851196, "correct_loss_per_token": 0.33975639939308167, "incorrect_loss_per_token": 1.3500902652740479, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3500902652740479, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.3500902652740479, "logits_per_char": -0.33752256631851196, "num_chars": 4}, {"sum_logits": -0.33975639939308167, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.33975639939308167, "logits_per_char": -0.11325213313102722, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 681, "native_id": 1985, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.43398118019104004, "incorrect_loss_raw": 1.2195552587509155, "correct_loss_per_char": 0.14466039339701334, "incorrect_loss_per_char": 0.3048888146877289, "correct_loss_per_token": 0.43398118019104004, "incorrect_loss_per_token": 1.2195552587509155, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2195552587509155, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.2195552587509155, "logits_per_char": -0.3048888146877289, "num_chars": 4}, {"sum_logits": -0.43398118019104004, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -0.43398118019104004, "logits_per_char": -0.14466039339701334, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 682, "native_id": 3114, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4314805269241333, "incorrect_loss_raw": 1.1131174564361572, "correct_loss_per_char": 0.10787013173103333, "incorrect_loss_per_char": 0.37103915214538574, "correct_loss_per_token": 0.4314805269241333, "incorrect_loss_per_token": 1.1131174564361572, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4314805269241333, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.4314805269241333, "logits_per_char": -0.10787013173103333, "num_chars": 4}, {"sum_logits": -1.1131174564361572, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.1131174564361572, "logits_per_char": -0.37103915214538574, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 683, "native_id": 1920, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9348385334014893, "incorrect_loss_raw": 0.5296198129653931, "correct_loss_per_char": 0.23370963335037231, "incorrect_loss_per_char": 0.176539937655131, "correct_loss_per_token": 0.9348385334014893, "incorrect_loss_per_token": 0.5296198129653931, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9348385334014893, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": false, "logits_per_token": -0.9348385334014893, "logits_per_char": -0.23370963335037231, "num_chars": 4}, {"sum_logits": -0.5296198129653931, "num_tokens": 1, "num_tokens_all": 988, "is_greedy": true, "logits_per_token": -0.5296198129653931, "logits_per_char": -0.176539937655131, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 684, "native_id": 2419, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9608896374702454, "incorrect_loss_raw": 0.5287822484970093, "correct_loss_per_char": 0.24022240936756134, "incorrect_loss_per_char": 0.17626074949900308, "correct_loss_per_token": 0.9608896374702454, "incorrect_loss_per_token": 0.5287822484970093, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9608896374702454, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": false, "logits_per_token": -0.9608896374702454, "logits_per_char": -0.24022240936756134, "num_chars": 4}, {"sum_logits": -0.5287822484970093, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": true, "logits_per_token": -0.5287822484970093, "logits_per_char": -0.17626074949900308, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 685, "native_id": 69, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6433782577514648, "incorrect_loss_raw": 0.2561333179473877, "correct_loss_per_char": 0.4108445644378662, "incorrect_loss_per_char": 0.08537777264912923, "correct_loss_per_token": 1.6433782577514648, "incorrect_loss_per_token": 0.2561333179473877, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6433782577514648, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": false, "logits_per_token": -1.6433782577514648, "logits_per_char": -0.4108445644378662, "num_chars": 4}, {"sum_logits": -0.2561333179473877, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": true, "logits_per_token": -0.2561333179473877, "logits_per_char": -0.08537777264912923, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 686, "native_id": 82, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7945403456687927, "incorrect_loss_raw": 0.6681437492370605, "correct_loss_per_char": 0.2648467818895976, "incorrect_loss_per_char": 0.16703593730926514, "correct_loss_per_token": 0.7945403456687927, "incorrect_loss_per_token": 0.6681437492370605, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6681437492370605, "num_tokens": 1, "num_tokens_all": 1017, "is_greedy": true, "logits_per_token": -0.6681437492370605, "logits_per_char": -0.16703593730926514, "num_chars": 4}, {"sum_logits": -0.7945403456687927, "num_tokens": 1, "num_tokens_all": 1017, "is_greedy": false, "logits_per_token": -0.7945403456687927, "logits_per_char": -0.2648467818895976, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 687, "native_id": 1196, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3518417775630951, "incorrect_loss_raw": 1.3767591714859009, "correct_loss_per_char": 0.08796044439077377, "incorrect_loss_per_char": 0.4589197238286336, "correct_loss_per_token": 0.3518417775630951, "incorrect_loss_per_token": 1.3767591714859009, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3518417775630951, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.3518417775630951, "logits_per_char": -0.08796044439077377, "num_chars": 4}, {"sum_logits": -1.3767591714859009, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -1.3767591714859009, "logits_per_char": -0.4589197238286336, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 688, "native_id": 2321, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4864489436149597, "incorrect_loss_raw": 1.0433343648910522, "correct_loss_per_char": 0.16214964787165323, "incorrect_loss_per_char": 0.26083359122276306, "correct_loss_per_token": 0.4864489436149597, "incorrect_loss_per_token": 1.0433343648910522, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0433343648910522, "num_tokens": 1, "num_tokens_all": 1019, "is_greedy": false, "logits_per_token": -1.0433343648910522, "logits_per_char": -0.26083359122276306, "num_chars": 4}, {"sum_logits": -0.4864489436149597, "num_tokens": 1, "num_tokens_all": 1019, "is_greedy": true, "logits_per_token": -0.4864489436149597, "logits_per_char": -0.16214964787165323, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 689, "native_id": 505, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7555429935455322, "incorrect_loss_raw": 0.6690837144851685, "correct_loss_per_char": 0.18888574838638306, "incorrect_loss_per_char": 0.2230279048283895, "correct_loss_per_token": 0.7555429935455322, "incorrect_loss_per_token": 0.6690837144851685, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7555429935455322, "num_tokens": 1, "num_tokens_all": 1094, "is_greedy": false, "logits_per_token": -0.7555429935455322, "logits_per_char": -0.18888574838638306, "num_chars": 4}, {"sum_logits": -0.6690837144851685, "num_tokens": 1, "num_tokens_all": 1094, "is_greedy": true, "logits_per_token": -0.6690837144851685, "logits_per_char": -0.2230279048283895, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 690, "native_id": 1852, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9439579248428345, "incorrect_loss_raw": 0.5460116863250732, "correct_loss_per_char": 0.23598948121070862, "incorrect_loss_per_char": 0.18200389544169107, "correct_loss_per_token": 0.9439579248428345, "incorrect_loss_per_token": 0.5460116863250732, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9439579248428345, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -0.9439579248428345, "logits_per_char": -0.23598948121070862, "num_chars": 4}, {"sum_logits": -0.5460116863250732, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.5460116863250732, "logits_per_char": -0.18200389544169107, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 691, "native_id": 2342, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5914424657821655, "incorrect_loss_raw": 0.8831042051315308, "correct_loss_per_char": 0.19714748859405518, "incorrect_loss_per_char": 0.2207760512828827, "correct_loss_per_token": 0.5914424657821655, "incorrect_loss_per_token": 0.8831042051315308, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8831042051315308, "num_tokens": 1, "num_tokens_all": 1194, "is_greedy": false, "logits_per_token": -0.8831042051315308, "logits_per_char": -0.2207760512828827, "num_chars": 4}, {"sum_logits": -0.5914424657821655, "num_tokens": 1, "num_tokens_all": 1194, "is_greedy": true, "logits_per_token": -0.5914424657821655, "logits_per_char": -0.19714748859405518, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 692, "native_id": 1003, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9743748903274536, "incorrect_loss_raw": 0.646731972694397, "correct_loss_per_char": 0.2435937225818634, "incorrect_loss_per_char": 0.21557732423146567, "correct_loss_per_token": 0.9743748903274536, "incorrect_loss_per_token": 0.646731972694397, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9743748903274536, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -0.9743748903274536, "logits_per_char": -0.2435937225818634, "num_chars": 4}, {"sum_logits": -0.646731972694397, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.646731972694397, "logits_per_char": -0.21557732423146567, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 693, "native_id": 3124, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2090801000595093, "incorrect_loss_raw": 0.38035643100738525, "correct_loss_per_char": 0.3022700250148773, "incorrect_loss_per_char": 0.12678547700246176, "correct_loss_per_token": 1.2090801000595093, "incorrect_loss_per_token": 0.38035643100738525, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2090801000595093, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.2090801000595093, "logits_per_char": -0.3022700250148773, "num_chars": 4}, {"sum_logits": -0.38035643100738525, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -0.38035643100738525, "logits_per_char": -0.12678547700246176, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 694, "native_id": 1716, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9023401737213135, "incorrect_loss_raw": 0.5689069032669067, "correct_loss_per_char": 0.22558504343032837, "incorrect_loss_per_char": 0.18963563442230225, "correct_loss_per_token": 0.9023401737213135, "incorrect_loss_per_token": 0.5689069032669067, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9023401737213135, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": false, "logits_per_token": -0.9023401737213135, "logits_per_char": -0.22558504343032837, "num_chars": 4}, {"sum_logits": -0.5689069032669067, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": true, "logits_per_token": -0.5689069032669067, "logits_per_char": -0.18963563442230225, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 695, "native_id": 857, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.668879508972168, "incorrect_loss_raw": 0.827084481716156, "correct_loss_per_char": 0.167219877243042, "incorrect_loss_per_char": 0.2756948272387187, "correct_loss_per_token": 0.668879508972168, "incorrect_loss_per_token": 0.827084481716156, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.668879508972168, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": true, "logits_per_token": -0.668879508972168, "logits_per_char": -0.167219877243042, "num_chars": 4}, {"sum_logits": -0.827084481716156, "num_tokens": 1, "num_tokens_all": 865, "is_greedy": false, "logits_per_token": -0.827084481716156, "logits_per_char": -0.2756948272387187, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 696, "native_id": 172, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4663805961608887, "incorrect_loss_raw": 0.34362146258354187, "correct_loss_per_char": 0.36659514904022217, "incorrect_loss_per_char": 0.11454048752784729, "correct_loss_per_token": 1.4663805961608887, "incorrect_loss_per_token": 0.34362146258354187, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4663805961608887, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": false, "logits_per_token": -1.4663805961608887, "logits_per_char": -0.36659514904022217, "num_chars": 4}, {"sum_logits": -0.34362146258354187, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": true, "logits_per_token": -0.34362146258354187, "logits_per_char": -0.11454048752784729, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 697, "native_id": 1766, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5561650395393372, "incorrect_loss_raw": 0.9041077494621277, "correct_loss_per_char": 0.1390412598848343, "incorrect_loss_per_char": 0.30136924982070923, "correct_loss_per_token": 0.5561650395393372, "incorrect_loss_per_token": 0.9041077494621277, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5561650395393372, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.5561650395393372, "logits_per_char": -0.1390412598848343, "num_chars": 4}, {"sum_logits": -0.9041077494621277, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -0.9041077494621277, "logits_per_char": -0.30136924982070923, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 698, "native_id": 2697, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3665200173854828, "incorrect_loss_raw": 1.3147943019866943, "correct_loss_per_char": 0.0916300043463707, "incorrect_loss_per_char": 0.43826476732889813, "correct_loss_per_token": 0.3665200173854828, "incorrect_loss_per_token": 1.3147943019866943, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3665200173854828, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": true, "logits_per_token": -0.3665200173854828, "logits_per_char": -0.0916300043463707, "num_chars": 4}, {"sum_logits": -1.3147943019866943, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -1.3147943019866943, "logits_per_char": -0.43826476732889813, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 699, "native_id": 456, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.42458510398864746, "incorrect_loss_raw": 1.128946304321289, "correct_loss_per_char": 0.14152836799621582, "incorrect_loss_per_char": 0.28223657608032227, "correct_loss_per_token": 0.42458510398864746, "incorrect_loss_per_token": 1.128946304321289, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.128946304321289, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -1.128946304321289, "logits_per_char": -0.28223657608032227, "num_chars": 4}, {"sum_logits": -0.42458510398864746, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -0.42458510398864746, "logits_per_char": -0.14152836799621582, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 700, "native_id": 1690, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3843971788883209, "incorrect_loss_raw": 1.2145224809646606, "correct_loss_per_char": 0.12813239296277365, "incorrect_loss_per_char": 0.30363062024116516, "correct_loss_per_token": 0.3843971788883209, "incorrect_loss_per_token": 1.2145224809646606, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2145224809646606, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.2145224809646606, "logits_per_char": -0.30363062024116516, "num_chars": 4}, {"sum_logits": -0.3843971788883209, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.3843971788883209, "logits_per_char": -0.12813239296277365, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 701, "native_id": 729, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0526697635650635, "incorrect_loss_raw": 0.45769253373146057, "correct_loss_per_char": 0.26316744089126587, "incorrect_loss_per_char": 0.15256417791048685, "correct_loss_per_token": 1.0526697635650635, "incorrect_loss_per_token": 0.45769253373146057, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0526697635650635, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": false, "logits_per_token": -1.0526697635650635, "logits_per_char": -0.26316744089126587, "num_chars": 4}, {"sum_logits": -0.45769253373146057, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": true, "logits_per_token": -0.45769253373146057, "logits_per_char": -0.15256417791048685, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 702, "native_id": 2794, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.33810991048812866, "incorrect_loss_raw": 1.3961296081542969, "correct_loss_per_char": 0.08452747762203217, "incorrect_loss_per_char": 0.4653765360514323, "correct_loss_per_token": 0.33810991048812866, "incorrect_loss_per_token": 1.3961296081542969, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.33810991048812866, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": true, "logits_per_token": -0.33810991048812866, "logits_per_char": -0.08452747762203217, "num_chars": 4}, {"sum_logits": -1.3961296081542969, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": false, "logits_per_token": -1.3961296081542969, "logits_per_char": -0.4653765360514323, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 703, "native_id": 2711, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32155901193618774, "incorrect_loss_raw": 1.420170783996582, "correct_loss_per_char": 0.10718633731206258, "incorrect_loss_per_char": 0.3550426959991455, "correct_loss_per_token": 0.32155901193618774, "incorrect_loss_per_token": 1.420170783996582, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.420170783996582, "num_tokens": 1, "num_tokens_all": 834, "is_greedy": false, "logits_per_token": -1.420170783996582, "logits_per_char": -0.3550426959991455, "num_chars": 4}, {"sum_logits": -0.32155901193618774, "num_tokens": 1, "num_tokens_all": 834, "is_greedy": true, "logits_per_token": -0.32155901193618774, "logits_per_char": -0.10718633731206258, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 704, "native_id": 2967, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0534626245498657, "incorrect_loss_raw": 0.5229239463806152, "correct_loss_per_char": 0.26336565613746643, "incorrect_loss_per_char": 0.17430798212687174, "correct_loss_per_token": 1.0534626245498657, "incorrect_loss_per_token": 0.5229239463806152, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0534626245498657, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.0534626245498657, "logits_per_char": -0.26336565613746643, "num_chars": 4}, {"sum_logits": -0.5229239463806152, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.5229239463806152, "logits_per_char": -0.17430798212687174, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 705, "native_id": 1509, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.921862781047821, "incorrect_loss_raw": 0.593249499797821, "correct_loss_per_char": 0.23046569526195526, "incorrect_loss_per_char": 0.19774983326594034, "correct_loss_per_token": 0.921862781047821, "incorrect_loss_per_token": 0.593249499797821, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.921862781047821, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": false, "logits_per_token": -0.921862781047821, "logits_per_char": -0.23046569526195526, "num_chars": 4}, {"sum_logits": -0.593249499797821, "num_tokens": 1, "num_tokens_all": 939, "is_greedy": true, "logits_per_token": -0.593249499797821, "logits_per_char": -0.19774983326594034, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 706, "native_id": 698, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2501853108406067, "incorrect_loss_raw": 1.7254762649536133, "correct_loss_per_char": 0.08339510361353557, "incorrect_loss_per_char": 0.4313690662384033, "correct_loss_per_token": 0.2501853108406067, "incorrect_loss_per_token": 1.7254762649536133, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7254762649536133, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -1.7254762649536133, "logits_per_char": -0.4313690662384033, "num_chars": 4}, {"sum_logits": -0.2501853108406067, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": true, "logits_per_token": -0.2501853108406067, "logits_per_char": -0.08339510361353557, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 707, "native_id": 2917, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7563013434410095, "incorrect_loss_raw": 0.7330851554870605, "correct_loss_per_char": 0.18907533586025238, "incorrect_loss_per_char": 0.24436171849568686, "correct_loss_per_token": 0.7563013434410095, "incorrect_loss_per_token": 0.7330851554870605, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7563013434410095, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -0.7563013434410095, "logits_per_char": -0.18907533586025238, "num_chars": 4}, {"sum_logits": -0.7330851554870605, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.7330851554870605, "logits_per_char": -0.24436171849568686, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 708, "native_id": 259, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.563503623008728, "incorrect_loss_raw": 0.9095855355262756, "correct_loss_per_char": 0.18783454100290933, "incorrect_loss_per_char": 0.2273963838815689, "correct_loss_per_token": 0.563503623008728, "incorrect_loss_per_token": 0.9095855355262756, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9095855355262756, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": false, "logits_per_token": -0.9095855355262756, "logits_per_char": -0.2273963838815689, "num_chars": 4}, {"sum_logits": -0.563503623008728, "num_tokens": 1, "num_tokens_all": 1020, "is_greedy": true, "logits_per_token": -0.563503623008728, "logits_per_char": -0.18783454100290933, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 709, "native_id": 2099, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6866273283958435, "incorrect_loss_raw": 0.8560150861740112, "correct_loss_per_char": 0.17165683209896088, "incorrect_loss_per_char": 0.2853383620580037, "correct_loss_per_token": 0.6866273283958435, "incorrect_loss_per_token": 0.8560150861740112, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6866273283958435, "num_tokens": 1, "num_tokens_all": 1022, "is_greedy": true, "logits_per_token": -0.6866273283958435, "logits_per_char": -0.17165683209896088, "num_chars": 4}, {"sum_logits": -0.8560150861740112, "num_tokens": 1, "num_tokens_all": 1022, "is_greedy": false, "logits_per_token": -0.8560150861740112, "logits_per_char": -0.2853383620580037, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 710, "native_id": 1556, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.42093417048454285, "incorrect_loss_raw": 1.1626904010772705, "correct_loss_per_char": 0.14031139016151428, "incorrect_loss_per_char": 0.2906726002693176, "correct_loss_per_token": 0.42093417048454285, "incorrect_loss_per_token": 1.1626904010772705, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1626904010772705, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.1626904010772705, "logits_per_char": -0.2906726002693176, "num_chars": 4}, {"sum_logits": -0.42093417048454285, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -0.42093417048454285, "logits_per_char": -0.14031139016151428, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 711, "native_id": 135, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6381371021270752, "incorrect_loss_raw": 0.7952048182487488, "correct_loss_per_char": 0.1595342755317688, "incorrect_loss_per_char": 0.26506827274958294, "correct_loss_per_token": 0.6381371021270752, "incorrect_loss_per_token": 0.7952048182487488, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6381371021270752, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": true, "logits_per_token": -0.6381371021270752, "logits_per_char": -0.1595342755317688, "num_chars": 4}, {"sum_logits": -0.7952048182487488, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": false, "logits_per_token": -0.7952048182487488, "logits_per_char": -0.26506827274958294, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 712, "native_id": 2775, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.545917272567749, "incorrect_loss_raw": 0.9512500166893005, "correct_loss_per_char": 0.13647931814193726, "incorrect_loss_per_char": 0.31708333889643353, "correct_loss_per_token": 0.545917272567749, "incorrect_loss_per_token": 0.9512500166893005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.545917272567749, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": true, "logits_per_token": -0.545917272567749, "logits_per_char": -0.13647931814193726, "num_chars": 4}, {"sum_logits": -0.9512500166893005, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": false, "logits_per_token": -0.9512500166893005, "logits_per_char": -0.31708333889643353, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 713, "native_id": 1098, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6127805709838867, "incorrect_loss_raw": 0.8859666585922241, "correct_loss_per_char": 0.15319514274597168, "incorrect_loss_per_char": 0.2953222195307414, "correct_loss_per_token": 0.6127805709838867, "incorrect_loss_per_token": 0.8859666585922241, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6127805709838867, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.6127805709838867, "logits_per_char": -0.15319514274597168, "num_chars": 4}, {"sum_logits": -0.8859666585922241, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -0.8859666585922241, "logits_per_char": -0.2953222195307414, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 714, "native_id": 2993, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.20891700685024261, "incorrect_loss_raw": 1.7625898122787476, "correct_loss_per_char": 0.0696390022834142, "incorrect_loss_per_char": 0.4406474530696869, "correct_loss_per_token": 0.20891700685024261, "incorrect_loss_per_token": 1.7625898122787476, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7625898122787476, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -1.7625898122787476, "logits_per_char": -0.4406474530696869, "num_chars": 4}, {"sum_logits": -0.20891700685024261, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": true, "logits_per_token": -0.20891700685024261, "logits_per_char": -0.0696390022834142, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 715, "native_id": 117, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8655247092247009, "incorrect_loss_raw": 0.5955712199211121, "correct_loss_per_char": 0.21638117730617523, "incorrect_loss_per_char": 0.198523739973704, "correct_loss_per_token": 0.8655247092247009, "incorrect_loss_per_token": 0.5955712199211121, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8655247092247009, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": false, "logits_per_token": -0.8655247092247009, "logits_per_char": -0.21638117730617523, "num_chars": 4}, {"sum_logits": -0.5955712199211121, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": true, "logits_per_token": -0.5955712199211121, "logits_per_char": -0.198523739973704, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 716, "native_id": 1413, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0476500988006592, "incorrect_loss_raw": 0.5157274603843689, "correct_loss_per_char": 0.2619125247001648, "incorrect_loss_per_char": 0.1719091534614563, "correct_loss_per_token": 1.0476500988006592, "incorrect_loss_per_token": 0.5157274603843689, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0476500988006592, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.0476500988006592, "logits_per_char": -0.2619125247001648, "num_chars": 4}, {"sum_logits": -0.5157274603843689, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -0.5157274603843689, "logits_per_char": -0.1719091534614563, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 717, "native_id": 2082, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7839303016662598, "incorrect_loss_raw": 0.2336091548204422, "correct_loss_per_char": 0.44598257541656494, "incorrect_loss_per_char": 0.07786971827348073, "correct_loss_per_token": 1.7839303016662598, "incorrect_loss_per_token": 0.2336091548204422, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7839303016662598, "num_tokens": 1, "num_tokens_all": 846, "is_greedy": false, "logits_per_token": -1.7839303016662598, "logits_per_char": -0.44598257541656494, "num_chars": 4}, {"sum_logits": -0.2336091548204422, "num_tokens": 1, "num_tokens_all": 846, "is_greedy": true, "logits_per_token": -0.2336091548204422, "logits_per_char": -0.07786971827348073, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 718, "native_id": 245, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5801624059677124, "incorrect_loss_raw": 0.9329334497451782, "correct_loss_per_char": 0.1450406014919281, "incorrect_loss_per_char": 0.3109778165817261, "correct_loss_per_token": 0.5801624059677124, "incorrect_loss_per_token": 0.9329334497451782, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5801624059677124, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.5801624059677124, "logits_per_char": -0.1450406014919281, "num_chars": 4}, {"sum_logits": -0.9329334497451782, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -0.9329334497451782, "logits_per_char": -0.3109778165817261, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 719, "native_id": 1125, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3093215227127075, "incorrect_loss_raw": 0.40906858444213867, "correct_loss_per_char": 0.4364405075709025, "incorrect_loss_per_char": 0.10226714611053467, "correct_loss_per_token": 1.3093215227127075, "incorrect_loss_per_token": 0.40906858444213867, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.40906858444213867, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -0.40906858444213867, "logits_per_char": -0.10226714611053467, "num_chars": 4}, {"sum_logits": -1.3093215227127075, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -1.3093215227127075, "logits_per_char": -0.4364405075709025, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 720, "native_id": 2120, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2519547939300537, "incorrect_loss_raw": 1.6961143016815186, "correct_loss_per_char": 0.0839849313100179, "incorrect_loss_per_char": 0.42402857542037964, "correct_loss_per_token": 0.2519547939300537, "incorrect_loss_per_token": 1.6961143016815186, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6961143016815186, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": false, "logits_per_token": -1.6961143016815186, "logits_per_char": -0.42402857542037964, "num_chars": 4}, {"sum_logits": -0.2519547939300537, "num_tokens": 1, "num_tokens_all": 925, "is_greedy": true, "logits_per_token": -0.2519547939300537, "logits_per_char": -0.0839849313100179, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 721, "native_id": 2604, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2719099521636963, "incorrect_loss_raw": 0.36329227685928345, "correct_loss_per_char": 0.3179774880409241, "incorrect_loss_per_char": 0.12109742561976115, "correct_loss_per_token": 1.2719099521636963, "incorrect_loss_per_token": 0.36329227685928345, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2719099521636963, "num_tokens": 1, "num_tokens_all": 1023, "is_greedy": false, "logits_per_token": -1.2719099521636963, "logits_per_char": -0.3179774880409241, "num_chars": 4}, {"sum_logits": -0.36329227685928345, "num_tokens": 1, "num_tokens_all": 1023, "is_greedy": true, "logits_per_token": -0.36329227685928345, "logits_per_char": -0.12109742561976115, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 722, "native_id": 2940, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.12846040725708, "incorrect_loss_raw": 0.4471023678779602, "correct_loss_per_char": 0.28211510181427, "incorrect_loss_per_char": 0.14903412262598673, "correct_loss_per_token": 1.12846040725708, "incorrect_loss_per_token": 0.4471023678779602, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.12846040725708, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.12846040725708, "logits_per_char": -0.28211510181427, "num_chars": 4}, {"sum_logits": -0.4471023678779602, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.4471023678779602, "logits_per_char": -0.14903412262598673, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 723, "native_id": 1685, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6992373466491699, "incorrect_loss_raw": 0.7727090120315552, "correct_loss_per_char": 0.2330791155497233, "incorrect_loss_per_char": 0.1931772530078888, "correct_loss_per_token": 0.6992373466491699, "incorrect_loss_per_token": 0.7727090120315552, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7727090120315552, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": false, "logits_per_token": -0.7727090120315552, "logits_per_char": -0.1931772530078888, "num_chars": 4}, {"sum_logits": -0.6992373466491699, "num_tokens": 1, "num_tokens_all": 901, "is_greedy": true, "logits_per_token": -0.6992373466491699, "logits_per_char": -0.2330791155497233, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 724, "native_id": 1971, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.462179958820343, "incorrect_loss_raw": 1.0614807605743408, "correct_loss_per_char": 0.15405998627344766, "incorrect_loss_per_char": 0.2653701901435852, "correct_loss_per_token": 0.462179958820343, "incorrect_loss_per_token": 1.0614807605743408, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0614807605743408, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": false, "logits_per_token": -1.0614807605743408, "logits_per_char": -0.2653701901435852, "num_chars": 4}, {"sum_logits": -0.462179958820343, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": true, "logits_per_token": -0.462179958820343, "logits_per_char": -0.15405998627344766, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 725, "native_id": 124, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4651051163673401, "incorrect_loss_raw": 1.144164800643921, "correct_loss_per_char": 0.11627627909183502, "incorrect_loss_per_char": 0.38138826688130695, "correct_loss_per_token": 0.4651051163673401, "incorrect_loss_per_token": 1.144164800643921, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4651051163673401, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.4651051163673401, "logits_per_char": -0.11627627909183502, "num_chars": 4}, {"sum_logits": -1.144164800643921, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.144164800643921, "logits_per_char": -0.38138826688130695, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 726, "native_id": 2830, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4391734600067139, "incorrect_loss_raw": 0.3607781231403351, "correct_loss_per_char": 0.35979336500167847, "incorrect_loss_per_char": 0.1202593743801117, "correct_loss_per_token": 1.4391734600067139, "incorrect_loss_per_token": 0.3607781231403351, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4391734600067139, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": false, "logits_per_token": -1.4391734600067139, "logits_per_char": -0.35979336500167847, "num_chars": 4}, {"sum_logits": -0.3607781231403351, "num_tokens": 1, "num_tokens_all": 950, "is_greedy": true, "logits_per_token": -0.3607781231403351, "logits_per_char": -0.1202593743801117, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 727, "native_id": 747, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5790254473686218, "incorrect_loss_raw": 1.004532814025879, "correct_loss_per_char": 0.19300848245620728, "incorrect_loss_per_char": 0.2511332035064697, "correct_loss_per_token": 0.5790254473686218, "incorrect_loss_per_token": 1.004532814025879, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.004532814025879, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": false, "logits_per_token": -1.004532814025879, "logits_per_char": -0.2511332035064697, "num_chars": 4}, {"sum_logits": -0.5790254473686218, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": true, "logits_per_token": -0.5790254473686218, "logits_per_char": -0.19300848245620728, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 728, "native_id": 944, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4538935422897339, "incorrect_loss_raw": 0.3032025098800659, "correct_loss_per_char": 0.36347338557243347, "incorrect_loss_per_char": 0.1010675032933553, "correct_loss_per_token": 1.4538935422897339, "incorrect_loss_per_token": 0.3032025098800659, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4538935422897339, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": false, "logits_per_token": -1.4538935422897339, "logits_per_char": -0.36347338557243347, "num_chars": 4}, {"sum_logits": -0.3032025098800659, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": true, "logits_per_token": -0.3032025098800659, "logits_per_char": -0.1010675032933553, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 729, "native_id": 2006, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.133195161819458, "incorrect_loss_raw": 0.4139074683189392, "correct_loss_per_char": 0.2832987904548645, "incorrect_loss_per_char": 0.13796915610631308, "correct_loss_per_token": 1.133195161819458, "incorrect_loss_per_token": 0.4139074683189392, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.133195161819458, "num_tokens": 1, "num_tokens_all": 864, "is_greedy": false, "logits_per_token": -1.133195161819458, "logits_per_char": -0.2832987904548645, "num_chars": 4}, {"sum_logits": -0.4139074683189392, "num_tokens": 1, "num_tokens_all": 864, "is_greedy": true, "logits_per_token": -0.4139074683189392, "logits_per_char": -0.13796915610631308, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 730, "native_id": 2359, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4515947997570038, "incorrect_loss_raw": 1.1002612113952637, "correct_loss_per_char": 0.15053159991900125, "incorrect_loss_per_char": 0.2750653028488159, "correct_loss_per_token": 0.4515947997570038, "incorrect_loss_per_token": 1.1002612113952637, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1002612113952637, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": false, "logits_per_token": -1.1002612113952637, "logits_per_char": -0.2750653028488159, "num_chars": 4}, {"sum_logits": -0.4515947997570038, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": true, "logits_per_token": -0.4515947997570038, "logits_per_char": -0.15053159991900125, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 731, "native_id": 253, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2413899898529053, "incorrect_loss_raw": 0.3683241605758667, "correct_loss_per_char": 0.3103474974632263, "incorrect_loss_per_char": 0.12277472019195557, "correct_loss_per_token": 1.2413899898529053, "incorrect_loss_per_token": 0.3683241605758667, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2413899898529053, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": false, "logits_per_token": -1.2413899898529053, "logits_per_char": -0.3103474974632263, "num_chars": 4}, {"sum_logits": -0.3683241605758667, "num_tokens": 1, "num_tokens_all": 935, "is_greedy": true, "logits_per_token": -0.3683241605758667, "logits_per_char": -0.12277472019195557, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 732, "native_id": 1206, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9768253564834595, "incorrect_loss_raw": 0.529482364654541, "correct_loss_per_char": 0.24420633912086487, "incorrect_loss_per_char": 0.17649412155151367, "correct_loss_per_token": 0.9768253564834595, "incorrect_loss_per_token": 0.529482364654541, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9768253564834595, "num_tokens": 1, "num_tokens_all": 856, "is_greedy": false, "logits_per_token": -0.9768253564834595, "logits_per_char": -0.24420633912086487, "num_chars": 4}, {"sum_logits": -0.529482364654541, "num_tokens": 1, "num_tokens_all": 856, "is_greedy": true, "logits_per_token": -0.529482364654541, "logits_per_char": -0.17649412155151367, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 733, "native_id": 2904, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6314258575439453, "incorrect_loss_raw": 0.9306453466415405, "correct_loss_per_char": 0.21047528584798178, "incorrect_loss_per_char": 0.23266133666038513, "correct_loss_per_token": 0.6314258575439453, "incorrect_loss_per_token": 0.9306453466415405, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9306453466415405, "num_tokens": 1, "num_tokens_all": 1356, "is_greedy": false, "logits_per_token": -0.9306453466415405, "logits_per_char": -0.23266133666038513, "num_chars": 4}, {"sum_logits": -0.6314258575439453, "num_tokens": 1, "num_tokens_all": 1356, "is_greedy": true, "logits_per_token": -0.6314258575439453, "logits_per_char": -0.21047528584798178, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 734, "native_id": 1825, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5717775225639343, "incorrect_loss_raw": 0.9303925633430481, "correct_loss_per_char": 0.14294438064098358, "incorrect_loss_per_char": 0.3101308544476827, "correct_loss_per_token": 0.5717775225639343, "incorrect_loss_per_token": 0.9303925633430481, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5717775225639343, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.5717775225639343, "logits_per_char": -0.14294438064098358, "num_chars": 4}, {"sum_logits": -0.9303925633430481, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -0.9303925633430481, "logits_per_char": -0.3101308544476827, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 735, "native_id": 1879, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.822851836681366, "incorrect_loss_raw": 0.6286640167236328, "correct_loss_per_char": 0.2057129591703415, "incorrect_loss_per_char": 0.20955467224121094, "correct_loss_per_token": 0.822851836681366, "incorrect_loss_per_token": 0.6286640167236328, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.822851836681366, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": false, "logits_per_token": -0.822851836681366, "logits_per_char": -0.2057129591703415, "num_chars": 4}, {"sum_logits": -0.6286640167236328, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": true, "logits_per_token": -0.6286640167236328, "logits_per_char": -0.20955467224121094, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 736, "native_id": 717, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8250266909599304, "incorrect_loss_raw": 0.6113052368164062, "correct_loss_per_char": 0.2062566727399826, "incorrect_loss_per_char": 0.2037684122721354, "correct_loss_per_token": 0.8250266909599304, "incorrect_loss_per_token": 0.6113052368164062, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8250266909599304, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": false, "logits_per_token": -0.8250266909599304, "logits_per_char": -0.2062566727399826, "num_chars": 4}, {"sum_logits": -0.6113052368164062, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": true, "logits_per_token": -0.6113052368164062, "logits_per_char": -0.2037684122721354, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 737, "native_id": 1078, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9562243223190308, "incorrect_loss_raw": 0.6253440380096436, "correct_loss_per_char": 0.31874144077301025, "incorrect_loss_per_char": 0.1563360095024109, "correct_loss_per_token": 0.9562243223190308, "incorrect_loss_per_token": 0.6253440380096436, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6253440380096436, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.6253440380096436, "logits_per_char": -0.1563360095024109, "num_chars": 4}, {"sum_logits": -0.9562243223190308, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -0.9562243223190308, "logits_per_char": -0.31874144077301025, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 738, "native_id": 660, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6817449927330017, "incorrect_loss_raw": 0.7524414658546448, "correct_loss_per_char": 0.17043624818325043, "incorrect_loss_per_char": 0.2508138219515483, "correct_loss_per_token": 0.6817449927330017, "incorrect_loss_per_token": 0.7524414658546448, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6817449927330017, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.6817449927330017, "logits_per_char": -0.17043624818325043, "num_chars": 4}, {"sum_logits": -0.7524414658546448, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -0.7524414658546448, "logits_per_char": -0.2508138219515483, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 739, "native_id": 1709, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9362044930458069, "incorrect_loss_raw": 0.5455476641654968, "correct_loss_per_char": 0.23405112326145172, "incorrect_loss_per_char": 0.18184922138849893, "correct_loss_per_token": 0.9362044930458069, "incorrect_loss_per_token": 0.5455476641654968, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9362044930458069, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -0.9362044930458069, "logits_per_char": -0.23405112326145172, "num_chars": 4}, {"sum_logits": -0.5455476641654968, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -0.5455476641654968, "logits_per_char": -0.18184922138849893, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 740, "native_id": 1276, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.910664439201355, "incorrect_loss_raw": 0.5686967372894287, "correct_loss_per_char": 0.22766610980033875, "incorrect_loss_per_char": 0.18956557909647623, "correct_loss_per_token": 0.910664439201355, "incorrect_loss_per_token": 0.5686967372894287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.910664439201355, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -0.910664439201355, "logits_per_char": -0.22766610980033875, "num_chars": 4}, {"sum_logits": -0.5686967372894287, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -0.5686967372894287, "logits_per_char": -0.18956557909647623, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 741, "native_id": 2617, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8383101224899292, "incorrect_loss_raw": 0.6669232845306396, "correct_loss_per_char": 0.2095775306224823, "incorrect_loss_per_char": 0.22230776151021323, "correct_loss_per_token": 0.8383101224899292, "incorrect_loss_per_token": 0.6669232845306396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8383101224899292, "num_tokens": 1, "num_tokens_all": 1017, "is_greedy": false, "logits_per_token": -0.8383101224899292, "logits_per_char": -0.2095775306224823, "num_chars": 4}, {"sum_logits": -0.6669232845306396, "num_tokens": 1, "num_tokens_all": 1017, "is_greedy": true, "logits_per_token": -0.6669232845306396, "logits_per_char": -0.22230776151021323, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 742, "native_id": 39, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22482042014598846, "incorrect_loss_raw": 1.7938740253448486, "correct_loss_per_char": 0.07494014004866283, "incorrect_loss_per_char": 0.44846850633621216, "correct_loss_per_token": 0.22482042014598846, "incorrect_loss_per_token": 1.7938740253448486, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7938740253448486, "num_tokens": 1, "num_tokens_all": 856, "is_greedy": false, "logits_per_token": -1.7938740253448486, "logits_per_char": -0.44846850633621216, "num_chars": 4}, {"sum_logits": -0.22482042014598846, "num_tokens": 1, "num_tokens_all": 856, "is_greedy": true, "logits_per_token": -0.22482042014598846, "logits_per_char": -0.07494014004866283, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 743, "native_id": 2582, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6710748672485352, "incorrect_loss_raw": 0.8374252915382385, "correct_loss_per_char": 0.1677687168121338, "incorrect_loss_per_char": 0.2791417638460795, "correct_loss_per_token": 0.6710748672485352, "incorrect_loss_per_token": 0.8374252915382385, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6710748672485352, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.6710748672485352, "logits_per_char": -0.1677687168121338, "num_chars": 4}, {"sum_logits": -0.8374252915382385, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -0.8374252915382385, "logits_per_char": -0.2791417638460795, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 744, "native_id": 1335, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.40751975774765015, "incorrect_loss_raw": 1.2056810855865479, "correct_loss_per_char": 0.10187993943691254, "incorrect_loss_per_char": 0.40189369519551593, "correct_loss_per_token": 0.40751975774765015, "incorrect_loss_per_token": 1.2056810855865479, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.40751975774765015, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": true, "logits_per_token": -0.40751975774765015, "logits_per_char": -0.10187993943691254, "num_chars": 4}, {"sum_logits": -1.2056810855865479, "num_tokens": 1, "num_tokens_all": 1062, "is_greedy": false, "logits_per_token": -1.2056810855865479, "logits_per_char": -0.40189369519551593, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 745, "native_id": 3159, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1792240142822266, "incorrect_loss_raw": 0.4079782962799072, "correct_loss_per_char": 0.29480600357055664, "incorrect_loss_per_char": 0.13599276542663574, "correct_loss_per_token": 1.1792240142822266, "incorrect_loss_per_token": 0.4079782962799072, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1792240142822266, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -1.1792240142822266, "logits_per_char": -0.29480600357055664, "num_chars": 4}, {"sum_logits": -0.4079782962799072, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.4079782962799072, "logits_per_char": -0.13599276542663574, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 746, "native_id": 3097, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8300235271453857, "incorrect_loss_raw": 0.6525408625602722, "correct_loss_per_char": 0.20750588178634644, "incorrect_loss_per_char": 0.21751362085342407, "correct_loss_per_token": 0.8300235271453857, "incorrect_loss_per_token": 0.6525408625602722, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8300235271453857, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -0.8300235271453857, "logits_per_char": -0.20750588178634644, "num_chars": 4}, {"sum_logits": -0.6525408625602722, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.6525408625602722, "logits_per_char": -0.21751362085342407, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 747, "native_id": 759, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3619229793548584, "incorrect_loss_raw": 0.3591561019420624, "correct_loss_per_char": 0.3404807448387146, "incorrect_loss_per_char": 0.11971870064735413, "correct_loss_per_token": 1.3619229793548584, "incorrect_loss_per_token": 0.3591561019420624, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3619229793548584, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": false, "logits_per_token": -1.3619229793548584, "logits_per_char": -0.3404807448387146, "num_chars": 4}, {"sum_logits": -0.3591561019420624, "num_tokens": 1, "num_tokens_all": 882, "is_greedy": true, "logits_per_token": -0.3591561019420624, "logits_per_char": -0.11971870064735413, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 748, "native_id": 133, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6160374879837036, "incorrect_loss_raw": 0.8242533206939697, "correct_loss_per_char": 0.2053458293279012, "incorrect_loss_per_char": 0.20606333017349243, "correct_loss_per_token": 0.6160374879837036, "incorrect_loss_per_token": 0.8242533206939697, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8242533206939697, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -0.8242533206939697, "logits_per_char": -0.20606333017349243, "num_chars": 4}, {"sum_logits": -0.6160374879837036, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -0.6160374879837036, "logits_per_char": -0.2053458293279012, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 749, "native_id": 1258, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7669996023178101, "incorrect_loss_raw": 0.6804906725883484, "correct_loss_per_char": 0.19174990057945251, "incorrect_loss_per_char": 0.22683022419611612, "correct_loss_per_token": 0.7669996023178101, "incorrect_loss_per_token": 0.6804906725883484, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7669996023178101, "num_tokens": 1, "num_tokens_all": 1171, "is_greedy": false, "logits_per_token": -0.7669996023178101, "logits_per_char": -0.19174990057945251, "num_chars": 4}, {"sum_logits": -0.6804906725883484, "num_tokens": 1, "num_tokens_all": 1171, "is_greedy": true, "logits_per_token": -0.6804906725883484, "logits_per_char": -0.22683022419611612, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 750, "native_id": 2482, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1786651611328125, "incorrect_loss_raw": 0.42574062943458557, "correct_loss_per_char": 0.2946662902832031, "incorrect_loss_per_char": 0.14191354314486185, "correct_loss_per_token": 1.1786651611328125, "incorrect_loss_per_token": 0.42574062943458557, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1786651611328125, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": false, "logits_per_token": -1.1786651611328125, "logits_per_char": -0.2946662902832031, "num_chars": 4}, {"sum_logits": -0.42574062943458557, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": true, "logits_per_token": -0.42574062943458557, "logits_per_char": -0.14191354314486185, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 751, "native_id": 3085, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5396251678466797, "incorrect_loss_raw": 0.964323878288269, "correct_loss_per_char": 0.17987505594889322, "incorrect_loss_per_char": 0.24108096957206726, "correct_loss_per_token": 0.5396251678466797, "incorrect_loss_per_token": 0.964323878288269, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.964323878288269, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": false, "logits_per_token": -0.964323878288269, "logits_per_char": -0.24108096957206726, "num_chars": 4}, {"sum_logits": -0.5396251678466797, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": true, "logits_per_token": -0.5396251678466797, "logits_per_char": -0.17987505594889322, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 752, "native_id": 73, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9631102085113525, "incorrect_loss_raw": 0.5157071352005005, "correct_loss_per_char": 0.24077755212783813, "incorrect_loss_per_char": 0.17190237840016684, "correct_loss_per_token": 0.9631102085113525, "incorrect_loss_per_token": 0.5157071352005005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9631102085113525, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": false, "logits_per_token": -0.9631102085113525, "logits_per_char": -0.24077755212783813, "num_chars": 4}, {"sum_logits": -0.5157071352005005, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": true, "logits_per_token": -0.5157071352005005, "logits_per_char": -0.17190237840016684, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 753, "native_id": 1739, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7456649541854858, "incorrect_loss_raw": 0.6908268928527832, "correct_loss_per_char": 0.18641623854637146, "incorrect_loss_per_char": 0.23027563095092773, "correct_loss_per_token": 0.7456649541854858, "incorrect_loss_per_token": 0.6908268928527832, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7456649541854858, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": false, "logits_per_token": -0.7456649541854858, "logits_per_char": -0.18641623854637146, "num_chars": 4}, {"sum_logits": -0.6908268928527832, "num_tokens": 1, "num_tokens_all": 946, "is_greedy": true, "logits_per_token": -0.6908268928527832, "logits_per_char": -0.23027563095092773, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 754, "native_id": 2916, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3202136754989624, "incorrect_loss_raw": 0.34639090299606323, "correct_loss_per_char": 0.3300534188747406, "incorrect_loss_per_char": 0.11546363433202107, "correct_loss_per_token": 1.3202136754989624, "incorrect_loss_per_token": 0.34639090299606323, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3202136754989624, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -1.3202136754989624, "logits_per_char": -0.3300534188747406, "num_chars": 4}, {"sum_logits": -0.34639090299606323, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.34639090299606323, "logits_per_char": -0.11546363433202107, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 755, "native_id": 1780, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2232671976089478, "incorrect_loss_raw": 0.37894192337989807, "correct_loss_per_char": 0.30581679940223694, "incorrect_loss_per_char": 0.12631397445996603, "correct_loss_per_token": 1.2232671976089478, "incorrect_loss_per_token": 0.37894192337989807, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2232671976089478, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": false, "logits_per_token": -1.2232671976089478, "logits_per_char": -0.30581679940223694, "num_chars": 4}, {"sum_logits": -0.37894192337989807, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": true, "logits_per_token": -0.37894192337989807, "logits_per_char": -0.12631397445996603, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 756, "native_id": 1388, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8159051537513733, "incorrect_loss_raw": 0.6823217868804932, "correct_loss_per_char": 0.2719683845837911, "incorrect_loss_per_char": 0.1705804467201233, "correct_loss_per_token": 0.8159051537513733, "incorrect_loss_per_token": 0.6823217868804932, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6823217868804932, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.6823217868804932, "logits_per_char": -0.1705804467201233, "num_chars": 4}, {"sum_logits": -0.8159051537513733, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -0.8159051537513733, "logits_per_char": -0.2719683845837911, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 757, "native_id": 1174, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8079717755317688, "incorrect_loss_raw": 0.6261155009269714, "correct_loss_per_char": 0.2693239251772563, "incorrect_loss_per_char": 0.15652887523174286, "correct_loss_per_token": 0.8079717755317688, "incorrect_loss_per_token": 0.6261155009269714, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6261155009269714, "num_tokens": 1, "num_tokens_all": 900, "is_greedy": true, "logits_per_token": -0.6261155009269714, "logits_per_char": -0.15652887523174286, "num_chars": 4}, {"sum_logits": -0.8079717755317688, "num_tokens": 1, "num_tokens_all": 900, "is_greedy": false, "logits_per_token": -0.8079717755317688, "logits_per_char": -0.2693239251772563, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 758, "native_id": 385, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5520454049110413, "incorrect_loss_raw": 0.9133171439170837, "correct_loss_per_char": 0.13801135122776031, "incorrect_loss_per_char": 0.30443904797236127, "correct_loss_per_token": 0.5520454049110413, "incorrect_loss_per_token": 0.9133171439170837, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5520454049110413, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": true, "logits_per_token": -0.5520454049110413, "logits_per_char": -0.13801135122776031, "num_chars": 4}, {"sum_logits": -0.9133171439170837, "num_tokens": 1, "num_tokens_all": 909, "is_greedy": false, "logits_per_token": -0.9133171439170837, "logits_per_char": -0.30443904797236127, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 759, "native_id": 327, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2600451111793518, "incorrect_loss_raw": 1.5769305229187012, "correct_loss_per_char": 0.0866817037264506, "incorrect_loss_per_char": 0.3942326307296753, "correct_loss_per_token": 0.2600451111793518, "incorrect_loss_per_token": 1.5769305229187012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5769305229187012, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": false, "logits_per_token": -1.5769305229187012, "logits_per_char": -0.3942326307296753, "num_chars": 4}, {"sum_logits": -0.2600451111793518, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": true, "logits_per_token": -0.2600451111793518, "logits_per_char": -0.0866817037264506, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 760, "native_id": 2363, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5865468978881836, "incorrect_loss_raw": 0.8619900941848755, "correct_loss_per_char": 0.19551563262939453, "incorrect_loss_per_char": 0.21549752354621887, "correct_loss_per_token": 0.5865468978881836, "incorrect_loss_per_token": 0.8619900941848755, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8619900941848755, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -0.8619900941848755, "logits_per_char": -0.21549752354621887, "num_chars": 4}, {"sum_logits": -0.5865468978881836, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": true, "logits_per_token": -0.5865468978881836, "logits_per_char": -0.19551563262939453, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 761, "native_id": 2575, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6150471568107605, "incorrect_loss_raw": 1.0599325895309448, "correct_loss_per_char": 0.20501571893692017, "incorrect_loss_per_char": 0.2649831473827362, "correct_loss_per_token": 0.6150471568107605, "incorrect_loss_per_token": 1.0599325895309448, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0599325895309448, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -1.0599325895309448, "logits_per_char": -0.2649831473827362, "num_chars": 4}, {"sum_logits": -0.6150471568107605, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.6150471568107605, "logits_per_char": -0.20501571893692017, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 762, "native_id": 2334, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2790561318397522, "incorrect_loss_raw": 1.554459571838379, "correct_loss_per_char": 0.09301871061325073, "incorrect_loss_per_char": 0.3886148929595947, "correct_loss_per_token": 0.2790561318397522, "incorrect_loss_per_token": 1.554459571838379, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.554459571838379, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.554459571838379, "logits_per_char": -0.3886148929595947, "num_chars": 4}, {"sum_logits": -0.2790561318397522, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.2790561318397522, "logits_per_char": -0.09301871061325073, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 763, "native_id": 2779, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5893802642822266, "incorrect_loss_raw": 0.9071964621543884, "correct_loss_per_char": 0.19646008809407553, "incorrect_loss_per_char": 0.2267991155385971, "correct_loss_per_token": 0.5893802642822266, "incorrect_loss_per_token": 0.9071964621543884, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9071964621543884, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -0.9071964621543884, "logits_per_char": -0.2267991155385971, "num_chars": 4}, {"sum_logits": -0.5893802642822266, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.5893802642822266, "logits_per_char": -0.19646008809407553, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 764, "native_id": 2648, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4584472179412842, "incorrect_loss_raw": 1.0614429712295532, "correct_loss_per_char": 0.15281573931376138, "incorrect_loss_per_char": 0.2653607428073883, "correct_loss_per_token": 0.4584472179412842, "incorrect_loss_per_token": 1.0614429712295532, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0614429712295532, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -1.0614429712295532, "logits_per_char": -0.2653607428073883, "num_chars": 4}, {"sum_logits": -0.4584472179412842, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.4584472179412842, "logits_per_char": -0.15281573931376138, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 765, "native_id": 2464, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8691784143447876, "incorrect_loss_raw": 0.5989934802055359, "correct_loss_per_char": 0.2172946035861969, "incorrect_loss_per_char": 0.1996644934018453, "correct_loss_per_token": 0.8691784143447876, "incorrect_loss_per_token": 0.5989934802055359, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8691784143447876, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -0.8691784143447876, "logits_per_char": -0.2172946035861969, "num_chars": 4}, {"sum_logits": -0.5989934802055359, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.5989934802055359, "logits_per_char": -0.1996644934018453, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 766, "native_id": 3120, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2367170751094818, "incorrect_loss_raw": 1.654764175415039, "correct_loss_per_char": 0.0789056917031606, "incorrect_loss_per_char": 0.41369104385375977, "correct_loss_per_token": 0.2367170751094818, "incorrect_loss_per_token": 1.654764175415039, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.654764175415039, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": false, "logits_per_token": -1.654764175415039, "logits_per_char": -0.41369104385375977, "num_chars": 4}, {"sum_logits": -0.2367170751094818, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": true, "logits_per_token": -0.2367170751094818, "logits_per_char": -0.0789056917031606, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 767, "native_id": 2884, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9065740704536438, "incorrect_loss_raw": 0.587139904499054, "correct_loss_per_char": 0.3021913568178813, "incorrect_loss_per_char": 0.1467849761247635, "correct_loss_per_token": 0.9065740704536438, "incorrect_loss_per_token": 0.587139904499054, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.587139904499054, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.587139904499054, "logits_per_char": -0.1467849761247635, "num_chars": 4}, {"sum_logits": -0.9065740704536438, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -0.9065740704536438, "logits_per_char": -0.3021913568178813, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 768, "native_id": 2630, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3384106159210205, "incorrect_loss_raw": 0.3379128575325012, "correct_loss_per_char": 0.3346026539802551, "incorrect_loss_per_char": 0.11263761917750041, "correct_loss_per_token": 1.3384106159210205, "incorrect_loss_per_token": 0.3379128575325012, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3384106159210205, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": false, "logits_per_token": -1.3384106159210205, "logits_per_char": -0.3346026539802551, "num_chars": 4}, {"sum_logits": -0.3379128575325012, "num_tokens": 1, "num_tokens_all": 911, "is_greedy": true, "logits_per_token": -0.3379128575325012, "logits_per_char": -0.11263761917750041, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 769, "native_id": 2147, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6968308687210083, "incorrect_loss_raw": 0.7778476476669312, "correct_loss_per_char": 0.2322769562403361, "incorrect_loss_per_char": 0.1944619119167328, "correct_loss_per_token": 0.6968308687210083, "incorrect_loss_per_token": 0.7778476476669312, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7778476476669312, "num_tokens": 1, "num_tokens_all": 1006, "is_greedy": false, "logits_per_token": -0.7778476476669312, "logits_per_char": -0.1944619119167328, "num_chars": 4}, {"sum_logits": -0.6968308687210083, "num_tokens": 1, "num_tokens_all": 1006, "is_greedy": true, "logits_per_token": -0.6968308687210083, "logits_per_char": -0.2322769562403361, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 770, "native_id": 748, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7495087385177612, "incorrect_loss_raw": 0.6970585584640503, "correct_loss_per_char": 0.24983624617258707, "incorrect_loss_per_char": 0.17426463961601257, "correct_loss_per_token": 0.7495087385177612, "incorrect_loss_per_token": 0.6970585584640503, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6970585584640503, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": true, "logits_per_token": -0.6970585584640503, "logits_per_char": -0.17426463961601257, "num_chars": 4}, {"sum_logits": -0.7495087385177612, "num_tokens": 1, "num_tokens_all": 977, "is_greedy": false, "logits_per_token": -0.7495087385177612, "logits_per_char": -0.24983624617258707, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 771, "native_id": 1662, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4584698975086212, "incorrect_loss_raw": 1.164337158203125, "correct_loss_per_char": 0.1146174743771553, "incorrect_loss_per_char": 0.3881123860677083, "correct_loss_per_token": 0.4584698975086212, "incorrect_loss_per_token": 1.164337158203125, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4584698975086212, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": true, "logits_per_token": -0.4584698975086212, "logits_per_char": -0.1146174743771553, "num_chars": 4}, {"sum_logits": -1.164337158203125, "num_tokens": 1, "num_tokens_all": 984, "is_greedy": false, "logits_per_token": -1.164337158203125, "logits_per_char": -0.3881123860677083, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 772, "native_id": 436, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3750660717487335, "incorrect_loss_raw": 1.301052212715149, "correct_loss_per_char": 0.1250220239162445, "incorrect_loss_per_char": 0.32526305317878723, "correct_loss_per_token": 0.3750660717487335, "incorrect_loss_per_token": 1.301052212715149, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.301052212715149, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": false, "logits_per_token": -1.301052212715149, "logits_per_char": -0.32526305317878723, "num_chars": 4}, {"sum_logits": -0.3750660717487335, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": true, "logits_per_token": -0.3750660717487335, "logits_per_char": -0.1250220239162445, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 773, "native_id": 2275, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6241914629936218, "incorrect_loss_raw": 0.8107422590255737, "correct_loss_per_char": 0.20806382099787393, "incorrect_loss_per_char": 0.20268556475639343, "correct_loss_per_token": 0.6241914629936218, "incorrect_loss_per_token": 0.8107422590255737, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8107422590255737, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": false, "logits_per_token": -0.8107422590255737, "logits_per_char": -0.20268556475639343, "num_chars": 4}, {"sum_logits": -0.6241914629936218, "num_tokens": 1, "num_tokens_all": 1069, "is_greedy": true, "logits_per_token": -0.6241914629936218, "logits_per_char": -0.20806382099787393, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 774, "native_id": 2119, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7607084512710571, "incorrect_loss_raw": 0.6965765357017517, "correct_loss_per_char": 0.25356948375701904, "incorrect_loss_per_char": 0.17414413392543793, "correct_loss_per_token": 0.7607084512710571, "incorrect_loss_per_token": 0.6965765357017517, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6965765357017517, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": true, "logits_per_token": -0.6965765357017517, "logits_per_char": -0.17414413392543793, "num_chars": 4}, {"sum_logits": -0.7607084512710571, "num_tokens": 1, "num_tokens_all": 1004, "is_greedy": false, "logits_per_token": -0.7607084512710571, "logits_per_char": -0.25356948375701904, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 775, "native_id": 2919, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6626682281494141, "incorrect_loss_raw": 0.8745664358139038, "correct_loss_per_char": 0.16566705703735352, "incorrect_loss_per_char": 0.29152214527130127, "correct_loss_per_token": 0.6626682281494141, "incorrect_loss_per_token": 0.8745664358139038, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6626682281494141, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": true, "logits_per_token": -0.6626682281494141, "logits_per_char": -0.16566705703735352, "num_chars": 4}, {"sum_logits": -0.8745664358139038, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": false, "logits_per_token": -0.8745664358139038, "logits_per_char": -0.29152214527130127, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 776, "native_id": 3029, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4946298599243164, "incorrect_loss_raw": 1.1247050762176514, "correct_loss_per_char": 0.1236574649810791, "incorrect_loss_per_char": 0.3749016920725505, "correct_loss_per_token": 0.4946298599243164, "incorrect_loss_per_token": 1.1247050762176514, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4946298599243164, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": true, "logits_per_token": -0.4946298599243164, "logits_per_char": -0.1236574649810791, "num_chars": 4}, {"sum_logits": -1.1247050762176514, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": false, "logits_per_token": -1.1247050762176514, "logits_per_char": -0.3749016920725505, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 777, "native_id": 2122, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9825869798660278, "incorrect_loss_raw": 0.5577390193939209, "correct_loss_per_char": 0.24564674496650696, "incorrect_loss_per_char": 0.1859130064646403, "correct_loss_per_token": 0.9825869798660278, "incorrect_loss_per_token": 0.5577390193939209, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9825869798660278, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": false, "logits_per_token": -0.9825869798660278, "logits_per_char": -0.24564674496650696, "num_chars": 4}, {"sum_logits": -0.5577390193939209, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": true, "logits_per_token": -0.5577390193939209, "logits_per_char": -0.1859130064646403, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 778, "native_id": 2195, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.418364554643631, "incorrect_loss_raw": 1.1665196418762207, "correct_loss_per_char": 0.13945485154787698, "incorrect_loss_per_char": 0.2916299104690552, "correct_loss_per_token": 0.418364554643631, "incorrect_loss_per_token": 1.1665196418762207, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1665196418762207, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": false, "logits_per_token": -1.1665196418762207, "logits_per_char": -0.2916299104690552, "num_chars": 4}, {"sum_logits": -0.418364554643631, "num_tokens": 1, "num_tokens_all": 1014, "is_greedy": true, "logits_per_token": -0.418364554643631, "logits_per_char": -0.13945485154787698, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 779, "native_id": 778, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.36971667408943176, "incorrect_loss_raw": 1.3679856061935425, "correct_loss_per_char": 0.09242916852235794, "incorrect_loss_per_char": 0.45599520206451416, "correct_loss_per_token": 0.36971667408943176, "incorrect_loss_per_token": 1.3679856061935425, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.36971667408943176, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.36971667408943176, "logits_per_char": -0.09242916852235794, "num_chars": 4}, {"sum_logits": -1.3679856061935425, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.3679856061935425, "logits_per_char": -0.45599520206451416, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 780, "native_id": 2549, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8078873753547668, "incorrect_loss_raw": 0.6353784799575806, "correct_loss_per_char": 0.2019718438386917, "incorrect_loss_per_char": 0.21179282665252686, "correct_loss_per_token": 0.8078873753547668, "incorrect_loss_per_token": 0.6353784799575806, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8078873753547668, "num_tokens": 1, "num_tokens_all": 864, "is_greedy": false, "logits_per_token": -0.8078873753547668, "logits_per_char": -0.2019718438386917, "num_chars": 4}, {"sum_logits": -0.6353784799575806, "num_tokens": 1, "num_tokens_all": 864, "is_greedy": true, "logits_per_token": -0.6353784799575806, "logits_per_char": -0.21179282665252686, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 781, "native_id": 410, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8722294569015503, "incorrect_loss_raw": 0.6029778718948364, "correct_loss_per_char": 0.21805736422538757, "incorrect_loss_per_char": 0.20099262396494547, "correct_loss_per_token": 0.8722294569015503, "incorrect_loss_per_token": 0.6029778718948364, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8722294569015503, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -0.8722294569015503, "logits_per_char": -0.21805736422538757, "num_chars": 4}, {"sum_logits": -0.6029778718948364, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.6029778718948364, "logits_per_char": -0.20099262396494547, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 782, "native_id": 1623, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.38038185238838196, "incorrect_loss_raw": 1.404502511024475, "correct_loss_per_char": 0.09509546309709549, "incorrect_loss_per_char": 0.468167503674825, "correct_loss_per_token": 0.38038185238838196, "incorrect_loss_per_token": 1.404502511024475, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.38038185238838196, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.38038185238838196, "logits_per_char": -0.09509546309709549, "num_chars": 4}, {"sum_logits": -1.404502511024475, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -1.404502511024475, "logits_per_char": -0.468167503674825, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 783, "native_id": 367, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6839385628700256, "incorrect_loss_raw": 0.7649374604225159, "correct_loss_per_char": 0.2279795209566752, "incorrect_loss_per_char": 0.19123436510562897, "correct_loss_per_token": 0.6839385628700256, "incorrect_loss_per_token": 0.7649374604225159, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7649374604225159, "num_tokens": 1, "num_tokens_all": 859, "is_greedy": false, "logits_per_token": -0.7649374604225159, "logits_per_char": -0.19123436510562897, "num_chars": 4}, {"sum_logits": -0.6839385628700256, "num_tokens": 1, "num_tokens_all": 859, "is_greedy": true, "logits_per_token": -0.6839385628700256, "logits_per_char": -0.2279795209566752, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 784, "native_id": 1302, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.46545863151550293, "incorrect_loss_raw": 1.107682228088379, "correct_loss_per_char": 0.15515287717183432, "incorrect_loss_per_char": 0.2769205570220947, "correct_loss_per_token": 0.46545863151550293, "incorrect_loss_per_token": 1.107682228088379, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.107682228088379, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -1.107682228088379, "logits_per_char": -0.2769205570220947, "num_chars": 4}, {"sum_logits": -0.46545863151550293, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.46545863151550293, "logits_per_char": -0.15515287717183432, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 785, "native_id": 2100, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.877824068069458, "incorrect_loss_raw": 0.6321352124214172, "correct_loss_per_char": 0.2194560170173645, "incorrect_loss_per_char": 0.21071173747380575, "correct_loss_per_token": 0.877824068069458, "incorrect_loss_per_token": 0.6321352124214172, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.877824068069458, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": false, "logits_per_token": -0.877824068069458, "logits_per_char": -0.2194560170173645, "num_chars": 4}, {"sum_logits": -0.6321352124214172, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": true, "logits_per_token": -0.6321352124214172, "logits_per_char": -0.21071173747380575, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 786, "native_id": 513, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3274859189987183, "incorrect_loss_raw": 0.33624833822250366, "correct_loss_per_char": 0.33187147974967957, "incorrect_loss_per_char": 0.11208277940750122, "correct_loss_per_token": 1.3274859189987183, "incorrect_loss_per_token": 0.33624833822250366, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3274859189987183, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": false, "logits_per_token": -1.3274859189987183, "logits_per_char": -0.33187147974967957, "num_chars": 4}, {"sum_logits": -0.33624833822250366, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": true, "logits_per_token": -0.33624833822250366, "logits_per_char": -0.11208277940750122, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 787, "native_id": 2565, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2796288728713989, "incorrect_loss_raw": 1.5142146348953247, "correct_loss_per_char": 0.09320962429046631, "incorrect_loss_per_char": 0.3785536587238312, "correct_loss_per_token": 0.2796288728713989, "incorrect_loss_per_token": 1.5142146348953247, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5142146348953247, "num_tokens": 1, "num_tokens_all": 859, "is_greedy": false, "logits_per_token": -1.5142146348953247, "logits_per_char": -0.3785536587238312, "num_chars": 4}, {"sum_logits": -0.2796288728713989, "num_tokens": 1, "num_tokens_all": 859, "is_greedy": true, "logits_per_token": -0.2796288728713989, "logits_per_char": -0.09320962429046631, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 788, "native_id": 1353, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3032878041267395, "incorrect_loss_raw": 1.4492359161376953, "correct_loss_per_char": 0.10109593470891316, "incorrect_loss_per_char": 0.36230897903442383, "correct_loss_per_token": 0.3032878041267395, "incorrect_loss_per_token": 1.4492359161376953, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4492359161376953, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": false, "logits_per_token": -1.4492359161376953, "logits_per_char": -0.36230897903442383, "num_chars": 4}, {"sum_logits": -0.3032878041267395, "num_tokens": 1, "num_tokens_all": 932, "is_greedy": true, "logits_per_token": -0.3032878041267395, "logits_per_char": -0.10109593470891316, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 789, "native_id": 1973, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4285445809364319, "incorrect_loss_raw": 1.133302927017212, "correct_loss_per_char": 0.1428481936454773, "incorrect_loss_per_char": 0.283325731754303, "correct_loss_per_token": 0.4285445809364319, "incorrect_loss_per_token": 1.133302927017212, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.133302927017212, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": false, "logits_per_token": -1.133302927017212, "logits_per_char": -0.283325731754303, "num_chars": 4}, {"sum_logits": -0.4285445809364319, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": true, "logits_per_token": -0.4285445809364319, "logits_per_char": -0.1428481936454773, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 790, "native_id": 1073, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8433114886283875, "incorrect_loss_raw": 0.6341260075569153, "correct_loss_per_char": 0.21082787215709686, "incorrect_loss_per_char": 0.21137533585230509, "correct_loss_per_token": 0.8433114886283875, "incorrect_loss_per_token": 0.6341260075569153, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8433114886283875, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": false, "logits_per_token": -0.8433114886283875, "logits_per_char": -0.21082787215709686, "num_chars": 4}, {"sum_logits": -0.6341260075569153, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": true, "logits_per_token": -0.6341260075569153, "logits_per_char": -0.21137533585230509, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 791, "native_id": 3199, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7696006298065186, "incorrect_loss_raw": 0.6794491410255432, "correct_loss_per_char": 0.19240015745162964, "incorrect_loss_per_char": 0.2264830470085144, "correct_loss_per_token": 0.7696006298065186, "incorrect_loss_per_token": 0.6794491410255432, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7696006298065186, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": false, "logits_per_token": -0.7696006298065186, "logits_per_char": -0.19240015745162964, "num_chars": 4}, {"sum_logits": -0.6794491410255432, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": true, "logits_per_token": -0.6794491410255432, "logits_per_char": -0.2264830470085144, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 792, "native_id": 261, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7069575190544128, "incorrect_loss_raw": 0.7374652624130249, "correct_loss_per_char": 0.1767393797636032, "incorrect_loss_per_char": 0.24582175413767496, "correct_loss_per_token": 0.7069575190544128, "incorrect_loss_per_token": 0.7374652624130249, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7069575190544128, "num_tokens": 1, "num_tokens_all": 859, "is_greedy": true, "logits_per_token": -0.7069575190544128, "logits_per_char": -0.1767393797636032, "num_chars": 4}, {"sum_logits": -0.7374652624130249, "num_tokens": 1, "num_tokens_all": 859, "is_greedy": false, "logits_per_token": -0.7374652624130249, "logits_per_char": -0.24582175413767496, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 793, "native_id": 2468, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4487491846084595, "incorrect_loss_raw": 0.2997778356075287, "correct_loss_per_char": 0.36218729615211487, "incorrect_loss_per_char": 0.09992594520250957, "correct_loss_per_token": 1.4487491846084595, "incorrect_loss_per_token": 0.2997778356075287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4487491846084595, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": false, "logits_per_token": -1.4487491846084595, "logits_per_char": -0.36218729615211487, "num_chars": 4}, {"sum_logits": -0.2997778356075287, "num_tokens": 1, "num_tokens_all": 926, "is_greedy": true, "logits_per_token": -0.2997778356075287, "logits_per_char": -0.09992594520250957, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 794, "native_id": 1845, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6306427717208862, "incorrect_loss_raw": 0.8712766170501709, "correct_loss_per_char": 0.15766069293022156, "incorrect_loss_per_char": 0.29042553901672363, "correct_loss_per_token": 0.6306427717208862, "incorrect_loss_per_token": 0.8712766170501709, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6306427717208862, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": true, "logits_per_token": -0.6306427717208862, "logits_per_char": -0.15766069293022156, "num_chars": 4}, {"sum_logits": -0.8712766170501709, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -0.8712766170501709, "logits_per_char": -0.29042553901672363, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 795, "native_id": 43, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.45533910393714905, "incorrect_loss_raw": 1.0855103731155396, "correct_loss_per_char": 0.15177970131238303, "incorrect_loss_per_char": 0.2713775932788849, "correct_loss_per_token": 0.45533910393714905, "incorrect_loss_per_token": 1.0855103731155396, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0855103731155396, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": false, "logits_per_token": -1.0855103731155396, "logits_per_char": -0.2713775932788849, "num_chars": 4}, {"sum_logits": -0.45533910393714905, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": true, "logits_per_token": -0.45533910393714905, "logits_per_char": -0.15177970131238303, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 796, "native_id": 1445, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2554168701171875, "incorrect_loss_raw": 0.3741629123687744, "correct_loss_per_char": 0.3138542175292969, "incorrect_loss_per_char": 0.12472097078959148, "correct_loss_per_token": 1.2554168701171875, "incorrect_loss_per_token": 0.3741629123687744, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2554168701171875, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -1.2554168701171875, "logits_per_char": -0.3138542175292969, "num_chars": 4}, {"sum_logits": -0.3741629123687744, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.3741629123687744, "logits_per_char": -0.12472097078959148, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 797, "native_id": 148, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22882387042045593, "incorrect_loss_raw": 2.0652360916137695, "correct_loss_per_char": 0.0762746234734853, "incorrect_loss_per_char": 0.5163090229034424, "correct_loss_per_token": 0.22882387042045593, "incorrect_loss_per_token": 2.0652360916137695, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -2.0652360916137695, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": false, "logits_per_token": -2.0652360916137695, "logits_per_char": -0.5163090229034424, "num_chars": 4}, {"sum_logits": -0.22882387042045593, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": true, "logits_per_token": -0.22882387042045593, "logits_per_char": -0.0762746234734853, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 798, "native_id": 2427, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6211069226264954, "incorrect_loss_raw": 0.8788015246391296, "correct_loss_per_char": 0.15527673065662384, "incorrect_loss_per_char": 0.2929338415463765, "correct_loss_per_token": 0.6211069226264954, "incorrect_loss_per_token": 0.8788015246391296, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6211069226264954, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -0.6211069226264954, "logits_per_char": -0.15527673065662384, "num_chars": 4}, {"sum_logits": -0.8788015246391296, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -0.8788015246391296, "logits_per_char": -0.2929338415463765, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 799, "native_id": 885, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6042065620422363, "incorrect_loss_raw": 0.8546744585037231, "correct_loss_per_char": 0.2014021873474121, "incorrect_loss_per_char": 0.2136686146259308, "correct_loss_per_token": 0.6042065620422363, "incorrect_loss_per_token": 0.8546744585037231, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8546744585037231, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -0.8546744585037231, "logits_per_char": -0.2136686146259308, "num_chars": 4}, {"sum_logits": -0.6042065620422363, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": true, "logits_per_token": -0.6042065620422363, "logits_per_char": -0.2014021873474121, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 800, "native_id": 442, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8571460247039795, "incorrect_loss_raw": 0.5897457003593445, "correct_loss_per_char": 0.21428650617599487, "incorrect_loss_per_char": 0.1965819001197815, "correct_loss_per_token": 0.8571460247039795, "incorrect_loss_per_token": 0.5897457003593445, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8571460247039795, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -0.8571460247039795, "logits_per_char": -0.21428650617599487, "num_chars": 4}, {"sum_logits": -0.5897457003593445, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.5897457003593445, "logits_per_char": -0.1965819001197815, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 801, "native_id": 1826, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6471341848373413, "incorrect_loss_raw": 0.9204249978065491, "correct_loss_per_char": 0.21571139494578043, "incorrect_loss_per_char": 0.23010624945163727, "correct_loss_per_token": 0.6471341848373413, "incorrect_loss_per_token": 0.9204249978065491, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9204249978065491, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": false, "logits_per_token": -0.9204249978065491, "logits_per_char": -0.23010624945163727, "num_chars": 4}, {"sum_logits": -0.6471341848373413, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": true, "logits_per_token": -0.6471341848373413, "logits_per_char": -0.21571139494578043, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 802, "native_id": 2259, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.38989323377609253, "incorrect_loss_raw": 1.3915257453918457, "correct_loss_per_char": 0.09747330844402313, "incorrect_loss_per_char": 0.46384191513061523, "correct_loss_per_token": 0.38989323377609253, "incorrect_loss_per_token": 1.3915257453918457, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.38989323377609253, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": true, "logits_per_token": -0.38989323377609253, "logits_per_char": -0.09747330844402313, "num_chars": 4}, {"sum_logits": -1.3915257453918457, "num_tokens": 1, "num_tokens_all": 998, "is_greedy": false, "logits_per_token": -1.3915257453918457, "logits_per_char": -0.46384191513061523, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 803, "native_id": 733, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.358124703168869, "incorrect_loss_raw": 1.4338102340698242, "correct_loss_per_char": 0.08953117579221725, "incorrect_loss_per_char": 0.4779367446899414, "correct_loss_per_token": 0.358124703168869, "incorrect_loss_per_token": 1.4338102340698242, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.358124703168869, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": true, "logits_per_token": -0.358124703168869, "logits_per_char": -0.08953117579221725, "num_chars": 4}, {"sum_logits": -1.4338102340698242, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": false, "logits_per_token": -1.4338102340698242, "logits_per_char": -0.4779367446899414, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 804, "native_id": 2348, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6977124810218811, "incorrect_loss_raw": 0.7894620895385742, "correct_loss_per_char": 0.17442812025547028, "incorrect_loss_per_char": 0.2631540298461914, "correct_loss_per_token": 0.6977124810218811, "incorrect_loss_per_token": 0.7894620895385742, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6977124810218811, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.6977124810218811, "logits_per_char": -0.17442812025547028, "num_chars": 4}, {"sum_logits": -0.7894620895385742, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -0.7894620895385742, "logits_per_char": -0.2631540298461914, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 805, "native_id": 169, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.40736427903175354, "incorrect_loss_raw": 1.1610989570617676, "correct_loss_per_char": 0.1357880930105845, "incorrect_loss_per_char": 0.2902747392654419, "correct_loss_per_token": 0.40736427903175354, "incorrect_loss_per_token": 1.1610989570617676, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1610989570617676, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -1.1610989570617676, "logits_per_char": -0.2902747392654419, "num_chars": 4}, {"sum_logits": -0.40736427903175354, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.40736427903175354, "logits_per_char": -0.1357880930105845, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 806, "native_id": 2627, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3214155435562134, "incorrect_loss_raw": 1.5465655326843262, "correct_loss_per_char": 0.10713851451873779, "incorrect_loss_per_char": 0.38664138317108154, "correct_loss_per_token": 0.3214155435562134, "incorrect_loss_per_token": 1.5465655326843262, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5465655326843262, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": false, "logits_per_token": -1.5465655326843262, "logits_per_char": -0.38664138317108154, "num_chars": 4}, {"sum_logits": -0.3214155435562134, "num_tokens": 1, "num_tokens_all": 889, "is_greedy": true, "logits_per_token": -0.3214155435562134, "logits_per_char": -0.10713851451873779, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 807, "native_id": 2057, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.46728092432022095, "incorrect_loss_raw": 1.089249849319458, "correct_loss_per_char": 0.11682023108005524, "incorrect_loss_per_char": 0.363083283106486, "correct_loss_per_token": 0.46728092432022095, "incorrect_loss_per_token": 1.089249849319458, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.46728092432022095, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": true, "logits_per_token": -0.46728092432022095, "logits_per_char": -0.11682023108005524, "num_chars": 4}, {"sum_logits": -1.089249849319458, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": false, "logits_per_token": -1.089249849319458, "logits_per_char": -0.363083283106486, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 808, "native_id": 2373, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8627815246582031, "incorrect_loss_raw": 0.5886576771736145, "correct_loss_per_char": 0.21569538116455078, "incorrect_loss_per_char": 0.19621922572453818, "correct_loss_per_token": 0.8627815246582031, "incorrect_loss_per_token": 0.5886576771736145, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8627815246582031, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -0.8627815246582031, "logits_per_char": -0.21569538116455078, "num_chars": 4}, {"sum_logits": -0.5886576771736145, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.5886576771736145, "logits_per_char": -0.19621922572453818, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 809, "native_id": 3040, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4192710816860199, "incorrect_loss_raw": 1.1509994268417358, "correct_loss_per_char": 0.1397570272286733, "incorrect_loss_per_char": 0.28774985671043396, "correct_loss_per_token": 0.4192710816860199, "incorrect_loss_per_token": 1.1509994268417358, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1509994268417358, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": false, "logits_per_token": -1.1509994268417358, "logits_per_char": -0.28774985671043396, "num_chars": 4}, {"sum_logits": -0.4192710816860199, "num_tokens": 1, "num_tokens_all": 879, "is_greedy": true, "logits_per_token": -0.4192710816860199, "logits_per_char": -0.1397570272286733, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 810, "native_id": 1271, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7303754091262817, "incorrect_loss_raw": 0.7267094850540161, "correct_loss_per_char": 0.18259385228157043, "incorrect_loss_per_char": 0.24223649501800537, "correct_loss_per_token": 0.7303754091262817, "incorrect_loss_per_token": 0.7267094850540161, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7303754091262817, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -0.7303754091262817, "logits_per_char": -0.18259385228157043, "num_chars": 4}, {"sum_logits": -0.7267094850540161, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.7267094850540161, "logits_per_char": -0.24223649501800537, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 811, "native_id": 2368, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5030912160873413, "incorrect_loss_raw": 1.068921446800232, "correct_loss_per_char": 0.12577280402183533, "incorrect_loss_per_char": 0.35630714893341064, "correct_loss_per_token": 0.5030912160873413, "incorrect_loss_per_token": 1.068921446800232, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5030912160873413, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": true, "logits_per_token": -0.5030912160873413, "logits_per_char": -0.12577280402183533, "num_chars": 4}, {"sum_logits": -1.068921446800232, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": false, "logits_per_token": -1.068921446800232, "logits_per_char": -0.35630714893341064, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 812, "native_id": 132, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6864609122276306, "incorrect_loss_raw": 0.898248553276062, "correct_loss_per_char": 0.22882030407587686, "incorrect_loss_per_char": 0.2245621383190155, "correct_loss_per_token": 0.6864609122276306, "incorrect_loss_per_token": 0.898248553276062, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.898248553276062, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": false, "logits_per_token": -0.898248553276062, "logits_per_char": -0.2245621383190155, "num_chars": 4}, {"sum_logits": -0.6864609122276306, "num_tokens": 1, "num_tokens_all": 1037, "is_greedy": true, "logits_per_token": -0.6864609122276306, "logits_per_char": -0.22882030407587686, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 813, "native_id": 2346, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9993022680282593, "incorrect_loss_raw": 0.5012446641921997, "correct_loss_per_char": 0.24982556700706482, "incorrect_loss_per_char": 0.16708155473073324, "correct_loss_per_token": 0.9993022680282593, "incorrect_loss_per_token": 0.5012446641921997, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9993022680282593, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -0.9993022680282593, "logits_per_char": -0.24982556700706482, "num_chars": 4}, {"sum_logits": -0.5012446641921997, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.5012446641921997, "logits_per_char": -0.16708155473073324, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 814, "native_id": 1382, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4293785095214844, "incorrect_loss_raw": 1.1426403522491455, "correct_loss_per_char": 0.14312616984049478, "incorrect_loss_per_char": 0.2856600880622864, "correct_loss_per_token": 0.4293785095214844, "incorrect_loss_per_token": 1.1426403522491455, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1426403522491455, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": false, "logits_per_token": -1.1426403522491455, "logits_per_char": -0.2856600880622864, "num_chars": 4}, {"sum_logits": -0.4293785095214844, "num_tokens": 1, "num_tokens_all": 886, "is_greedy": true, "logits_per_token": -0.4293785095214844, "logits_per_char": -0.14312616984049478, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 815, "native_id": 2222, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1564687490463257, "incorrect_loss_raw": 0.434757262468338, "correct_loss_per_char": 0.2891171872615814, "incorrect_loss_per_char": 0.144919087489446, "correct_loss_per_token": 1.1564687490463257, "incorrect_loss_per_token": 0.434757262468338, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1564687490463257, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -1.1564687490463257, "logits_per_char": -0.2891171872615814, "num_chars": 4}, {"sum_logits": -0.434757262468338, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": true, "logits_per_token": -0.434757262468338, "logits_per_char": -0.144919087489446, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 816, "native_id": 3066, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5995926856994629, "incorrect_loss_raw": 0.9281734228134155, "correct_loss_per_char": 0.14989817142486572, "incorrect_loss_per_char": 0.3093911409378052, "correct_loss_per_token": 0.5995926856994629, "incorrect_loss_per_token": 0.9281734228134155, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5995926856994629, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.5995926856994629, "logits_per_char": -0.14989817142486572, "num_chars": 4}, {"sum_logits": -0.9281734228134155, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -0.9281734228134155, "logits_per_char": -0.3093911409378052, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 817, "native_id": 870, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.940461277961731, "incorrect_loss_raw": 0.5620633959770203, "correct_loss_per_char": 0.23511531949043274, "incorrect_loss_per_char": 0.18735446532567343, "correct_loss_per_token": 0.940461277961731, "incorrect_loss_per_token": 0.5620633959770203, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.940461277961731, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": false, "logits_per_token": -0.940461277961731, "logits_per_char": -0.23511531949043274, "num_chars": 4}, {"sum_logits": -0.5620633959770203, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": true, "logits_per_token": -0.5620633959770203, "logits_per_char": -0.18735446532567343, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 818, "native_id": 3117, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2219542264938354, "incorrect_loss_raw": 0.463485985994339, "correct_loss_per_char": 0.30548855662345886, "incorrect_loss_per_char": 0.15449532866477966, "correct_loss_per_token": 1.2219542264938354, "incorrect_loss_per_token": 0.463485985994339, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2219542264938354, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": false, "logits_per_token": -1.2219542264938354, "logits_per_char": -0.30548855662345886, "num_chars": 4}, {"sum_logits": -0.463485985994339, "num_tokens": 1, "num_tokens_all": 907, "is_greedy": true, "logits_per_token": -0.463485985994339, "logits_per_char": -0.15449532866477966, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 819, "native_id": 2124, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7625219821929932, "incorrect_loss_raw": 0.7350770235061646, "correct_loss_per_char": 0.1906304955482483, "incorrect_loss_per_char": 0.24502567450205484, "correct_loss_per_token": 0.7625219821929932, "incorrect_loss_per_token": 0.7350770235061646, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7625219821929932, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": false, "logits_per_token": -0.7625219821929932, "logits_per_char": -0.1906304955482483, "num_chars": 4}, {"sum_logits": -0.7350770235061646, "num_tokens": 1, "num_tokens_all": 979, "is_greedy": true, "logits_per_token": -0.7350770235061646, "logits_per_char": -0.24502567450205484, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 820, "native_id": 998, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5238076448440552, "incorrect_loss_raw": 1.0010229349136353, "correct_loss_per_char": 0.17460254828135172, "incorrect_loss_per_char": 0.2502557337284088, "correct_loss_per_token": 0.5238076448440552, "incorrect_loss_per_token": 1.0010229349136353, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0010229349136353, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": false, "logits_per_token": -1.0010229349136353, "logits_per_char": -0.2502557337284088, "num_chars": 4}, {"sum_logits": -0.5238076448440552, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": true, "logits_per_token": -0.5238076448440552, "logits_per_char": -0.17460254828135172, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 821, "native_id": 3259, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.43138930201530457, "incorrect_loss_raw": 1.126591444015503, "correct_loss_per_char": 0.14379643400510153, "incorrect_loss_per_char": 0.28164786100387573, "correct_loss_per_token": 0.43138930201530457, "incorrect_loss_per_token": 1.126591444015503, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.126591444015503, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": false, "logits_per_token": -1.126591444015503, "logits_per_char": -0.28164786100387573, "num_chars": 4}, {"sum_logits": -0.43138930201530457, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": true, "logits_per_token": -0.43138930201530457, "logits_per_char": -0.14379643400510153, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 822, "native_id": 1227, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.846663236618042, "incorrect_loss_raw": 0.5917924642562866, "correct_loss_per_char": 0.2116658091545105, "incorrect_loss_per_char": 0.19726415475209555, "correct_loss_per_token": 0.846663236618042, "incorrect_loss_per_token": 0.5917924642562866, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.846663236618042, "num_tokens": 1, "num_tokens_all": 861, "is_greedy": false, "logits_per_token": -0.846663236618042, "logits_per_char": -0.2116658091545105, "num_chars": 4}, {"sum_logits": -0.5917924642562866, "num_tokens": 1, "num_tokens_all": 861, "is_greedy": true, "logits_per_token": -0.5917924642562866, "logits_per_char": -0.19726415475209555, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 823, "native_id": 68, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6088278293609619, "incorrect_loss_raw": 0.8764389753341675, "correct_loss_per_char": 0.2029426097869873, "incorrect_loss_per_char": 0.21910974383354187, "correct_loss_per_token": 0.6088278293609619, "incorrect_loss_per_token": 0.8764389753341675, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8764389753341675, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -0.8764389753341675, "logits_per_char": -0.21910974383354187, "num_chars": 4}, {"sum_logits": -0.6088278293609619, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.6088278293609619, "logits_per_char": -0.2029426097869873, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 824, "native_id": 2907, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7925169467926025, "incorrect_loss_raw": 0.7071759104728699, "correct_loss_per_char": 0.2641723155975342, "incorrect_loss_per_char": 0.17679397761821747, "correct_loss_per_token": 0.7925169467926025, "incorrect_loss_per_token": 0.7071759104728699, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7071759104728699, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.7071759104728699, "logits_per_char": -0.17679397761821747, "num_chars": 4}, {"sum_logits": -0.7925169467926025, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -0.7925169467926025, "logits_per_char": -0.2641723155975342, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 825, "native_id": 344, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8167163133621216, "incorrect_loss_raw": 0.6230316758155823, "correct_loss_per_char": 0.2041790783405304, "incorrect_loss_per_char": 0.20767722527186075, "correct_loss_per_token": 0.8167163133621216, "incorrect_loss_per_token": 0.6230316758155823, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8167163133621216, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": false, "logits_per_token": -0.8167163133621216, "logits_per_char": -0.2041790783405304, "num_chars": 4}, {"sum_logits": -0.6230316758155823, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": true, "logits_per_token": -0.6230316758155823, "logits_per_char": -0.20767722527186075, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 826, "native_id": 114, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.46553635597229004, "incorrect_loss_raw": 1.0861454010009766, "correct_loss_per_char": 0.11638408899307251, "incorrect_loss_per_char": 0.3620484670003255, "correct_loss_per_token": 0.46553635597229004, "incorrect_loss_per_token": 1.0861454010009766, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.46553635597229004, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.46553635597229004, "logits_per_char": -0.11638408899307251, "num_chars": 4}, {"sum_logits": -1.0861454010009766, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.0861454010009766, "logits_per_char": -0.3620484670003255, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 827, "native_id": 3031, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.117072343826294, "incorrect_loss_raw": 0.49389511346817017, "correct_loss_per_char": 0.2792680859565735, "incorrect_loss_per_char": 0.16463170448939005, "correct_loss_per_token": 1.117072343826294, "incorrect_loss_per_token": 0.49389511346817017, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.117072343826294, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": false, "logits_per_token": -1.117072343826294, "logits_per_char": -0.2792680859565735, "num_chars": 4}, {"sum_logits": -0.49389511346817017, "num_tokens": 1, "num_tokens_all": 967, "is_greedy": true, "logits_per_token": -0.49389511346817017, "logits_per_char": -0.16463170448939005, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 828, "native_id": 2283, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.21201813220977783, "incorrect_loss_raw": 2.0166897773742676, "correct_loss_per_char": 0.0706727107365926, "incorrect_loss_per_char": 0.5041724443435669, "correct_loss_per_token": 0.21201813220977783, "incorrect_loss_per_token": 2.0166897773742676, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -2.0166897773742676, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": false, "logits_per_token": -2.0166897773742676, "logits_per_char": -0.5041724443435669, "num_chars": 4}, {"sum_logits": -0.21201813220977783, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": true, "logits_per_token": -0.21201813220977783, "logits_per_char": -0.0706727107365926, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 829, "native_id": 3138, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.32030871510505676, "incorrect_loss_raw": 1.3797938823699951, "correct_loss_per_char": 0.10676957170168559, "incorrect_loss_per_char": 0.3449484705924988, "correct_loss_per_token": 0.32030871510505676, "incorrect_loss_per_token": 1.3797938823699951, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3797938823699951, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": false, "logits_per_token": -1.3797938823699951, "logits_per_char": -0.3449484705924988, "num_chars": 4}, {"sum_logits": -0.32030871510505676, "num_tokens": 1, "num_tokens_all": 920, "is_greedy": true, "logits_per_token": -0.32030871510505676, "logits_per_char": -0.10676957170168559, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 830, "native_id": 2572, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8498573303222656, "incorrect_loss_raw": 0.6696537733078003, "correct_loss_per_char": 0.28328577677408856, "incorrect_loss_per_char": 0.16741344332695007, "correct_loss_per_token": 0.8498573303222656, "incorrect_loss_per_token": 0.6696537733078003, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6696537733078003, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": true, "logits_per_token": -0.6696537733078003, "logits_per_char": -0.16741344332695007, "num_chars": 4}, {"sum_logits": -0.8498573303222656, "num_tokens": 1, "num_tokens_all": 1097, "is_greedy": false, "logits_per_token": -0.8498573303222656, "logits_per_char": -0.28328577677408856, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 831, "native_id": 2517, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7667922973632812, "incorrect_loss_raw": 0.7207340598106384, "correct_loss_per_char": 0.1916980743408203, "incorrect_loss_per_char": 0.24024468660354614, "correct_loss_per_token": 0.7667922973632812, "incorrect_loss_per_token": 0.7207340598106384, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7667922973632812, "num_tokens": 1, "num_tokens_all": 1006, "is_greedy": false, "logits_per_token": -0.7667922973632812, "logits_per_char": -0.1916980743408203, "num_chars": 4}, {"sum_logits": -0.7207340598106384, "num_tokens": 1, "num_tokens_all": 1006, "is_greedy": true, "logits_per_token": -0.7207340598106384, "logits_per_char": -0.24024468660354614, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 832, "native_id": 1601, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.214798927307129, "incorrect_loss_raw": 0.39759108424186707, "correct_loss_per_char": 0.3036997318267822, "incorrect_loss_per_char": 0.1325303614139557, "correct_loss_per_token": 1.214798927307129, "incorrect_loss_per_token": 0.39759108424186707, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.214798927307129, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.214798927307129, "logits_per_char": -0.3036997318267822, "num_chars": 4}, {"sum_logits": -0.39759108424186707, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.39759108424186707, "logits_per_char": -0.1325303614139557, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 833, "native_id": 1866, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4725547432899475, "incorrect_loss_raw": 1.0275776386260986, "correct_loss_per_char": 0.15751824776331583, "incorrect_loss_per_char": 0.25689440965652466, "correct_loss_per_token": 0.4725547432899475, "incorrect_loss_per_token": 1.0275776386260986, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0275776386260986, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": false, "logits_per_token": -1.0275776386260986, "logits_per_char": -0.25689440965652466, "num_chars": 4}, {"sum_logits": -0.4725547432899475, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": true, "logits_per_token": -0.4725547432899475, "logits_per_char": -0.15751824776331583, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 834, "native_id": 3065, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4972513914108276, "incorrect_loss_raw": 0.2850419282913208, "correct_loss_per_char": 0.3743128478527069, "incorrect_loss_per_char": 0.09501397609710693, "correct_loss_per_token": 1.4972513914108276, "incorrect_loss_per_token": 0.2850419282913208, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4972513914108276, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": false, "logits_per_token": -1.4972513914108276, "logits_per_char": -0.3743128478527069, "num_chars": 4}, {"sum_logits": -0.2850419282913208, "num_tokens": 1, "num_tokens_all": 968, "is_greedy": true, "logits_per_token": -0.2850419282913208, "logits_per_char": -0.09501397609710693, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 835, "native_id": 893, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2713837623596191, "incorrect_loss_raw": 0.3788408637046814, "correct_loss_per_char": 0.3178459405899048, "incorrect_loss_per_char": 0.12628028790156046, "correct_loss_per_token": 1.2713837623596191, "incorrect_loss_per_token": 0.3788408637046814, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2713837623596191, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.2713837623596191, "logits_per_char": -0.3178459405899048, "num_chars": 4}, {"sum_logits": -0.3788408637046814, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.3788408637046814, "logits_per_char": -0.12628028790156046, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 836, "native_id": 322, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1603503227233887, "incorrect_loss_raw": 0.42263439297676086, "correct_loss_per_char": 0.29008758068084717, "incorrect_loss_per_char": 0.14087813099225363, "correct_loss_per_token": 1.1603503227233887, "incorrect_loss_per_token": 0.42263439297676086, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1603503227233887, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": false, "logits_per_token": -1.1603503227233887, "logits_per_char": -0.29008758068084717, "num_chars": 4}, {"sum_logits": -0.42263439297676086, "num_tokens": 1, "num_tokens_all": 938, "is_greedy": true, "logits_per_token": -0.42263439297676086, "logits_per_char": -0.14087813099225363, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 837, "native_id": 1427, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.44846901297569275, "incorrect_loss_raw": 1.0981736183166504, "correct_loss_per_char": 0.14948967099189758, "incorrect_loss_per_char": 0.2745434045791626, "correct_loss_per_token": 0.44846901297569275, "incorrect_loss_per_token": 1.0981736183166504, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0981736183166504, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.0981736183166504, "logits_per_char": -0.2745434045791626, "num_chars": 4}, {"sum_logits": -0.44846901297569275, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.44846901297569275, "logits_per_char": -0.14948967099189758, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 838, "native_id": 1370, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.38493919372558594, "incorrect_loss_raw": 1.248831033706665, "correct_loss_per_char": 0.1283130645751953, "incorrect_loss_per_char": 0.31220775842666626, "correct_loss_per_token": 0.38493919372558594, "incorrect_loss_per_token": 1.248831033706665, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.248831033706665, "num_tokens": 1, "num_tokens_all": 1003, "is_greedy": false, "logits_per_token": -1.248831033706665, "logits_per_char": -0.31220775842666626, "num_chars": 4}, {"sum_logits": -0.38493919372558594, "num_tokens": 1, "num_tokens_all": 1003, "is_greedy": true, "logits_per_token": -0.38493919372558594, "logits_per_char": -0.1283130645751953, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 839, "native_id": 1444, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.37415218353271484, "incorrect_loss_raw": 1.2772425413131714, "correct_loss_per_char": 0.09353804588317871, "incorrect_loss_per_char": 0.42574751377105713, "correct_loss_per_token": 0.37415218353271484, "incorrect_loss_per_token": 1.2772425413131714, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.37415218353271484, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.37415218353271484, "logits_per_char": -0.09353804588317871, "num_chars": 4}, {"sum_logits": -1.2772425413131714, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -1.2772425413131714, "logits_per_char": -0.42574751377105713, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 840, "native_id": 1590, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9992523193359375, "incorrect_loss_raw": 0.5995123386383057, "correct_loss_per_char": 0.24981307983398438, "incorrect_loss_per_char": 0.19983744621276855, "correct_loss_per_token": 0.9992523193359375, "incorrect_loss_per_token": 0.5995123386383057, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9992523193359375, "num_tokens": 1, "num_tokens_all": 1005, "is_greedy": false, "logits_per_token": -0.9992523193359375, "logits_per_char": -0.24981307983398438, "num_chars": 4}, {"sum_logits": -0.5995123386383057, "num_tokens": 1, "num_tokens_all": 1005, "is_greedy": true, "logits_per_token": -0.5995123386383057, "logits_per_char": -0.19983744621276855, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 841, "native_id": 1454, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0780749320983887, "incorrect_loss_raw": 0.4413023889064789, "correct_loss_per_char": 0.26951873302459717, "incorrect_loss_per_char": 0.14710079630215964, "correct_loss_per_token": 1.0780749320983887, "incorrect_loss_per_token": 0.4413023889064789, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0780749320983887, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.0780749320983887, "logits_per_char": -0.26951873302459717, "num_chars": 4}, {"sum_logits": -0.4413023889064789, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": true, "logits_per_token": -0.4413023889064789, "logits_per_char": -0.14710079630215964, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 842, "native_id": 389, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7153790593147278, "incorrect_loss_raw": 0.7885692119598389, "correct_loss_per_char": 0.17884476482868195, "incorrect_loss_per_char": 0.262856403986613, "correct_loss_per_token": 0.7153790593147278, "incorrect_loss_per_token": 0.7885692119598389, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7153790593147278, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -0.7153790593147278, "logits_per_char": -0.17884476482868195, "num_chars": 4}, {"sum_logits": -0.7885692119598389, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -0.7885692119598389, "logits_per_char": -0.262856403986613, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 843, "native_id": 127, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5578120946884155, "incorrect_loss_raw": 0.9271799325942993, "correct_loss_per_char": 0.18593736489613852, "incorrect_loss_per_char": 0.23179498314857483, "correct_loss_per_token": 0.5578120946884155, "incorrect_loss_per_token": 0.9271799325942993, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9271799325942993, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": false, "logits_per_token": -0.9271799325942993, "logits_per_char": -0.23179498314857483, "num_chars": 4}, {"sum_logits": -0.5578120946884155, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": true, "logits_per_token": -0.5578120946884155, "logits_per_char": -0.18593736489613852, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 844, "native_id": 529, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.82755446434021, "incorrect_loss_raw": 0.627097487449646, "correct_loss_per_char": 0.2068886160850525, "incorrect_loss_per_char": 0.20903249581654867, "correct_loss_per_token": 0.82755446434021, "incorrect_loss_per_token": 0.627097487449646, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.82755446434021, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -0.82755446434021, "logits_per_char": -0.2068886160850525, "num_chars": 4}, {"sum_logits": -0.627097487449646, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.627097487449646, "logits_per_char": -0.20903249581654867, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 845, "native_id": 3222, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6676233410835266, "incorrect_loss_raw": 0.7852112054824829, "correct_loss_per_char": 0.16690583527088165, "incorrect_loss_per_char": 0.26173706849416095, "correct_loss_per_token": 0.6676233410835266, "incorrect_loss_per_token": 0.7852112054824829, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6676233410835266, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.6676233410835266, "logits_per_char": -0.16690583527088165, "num_chars": 4}, {"sum_logits": -0.7852112054824829, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -0.7852112054824829, "logits_per_char": -0.26173706849416095, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 846, "native_id": 1847, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5283340215682983, "incorrect_loss_raw": 1.0622167587280273, "correct_loss_per_char": 0.13208350539207458, "incorrect_loss_per_char": 0.35407225290934247, "correct_loss_per_token": 0.5283340215682983, "incorrect_loss_per_token": 1.0622167587280273, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5283340215682983, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": true, "logits_per_token": -0.5283340215682983, "logits_per_char": -0.13208350539207458, "num_chars": 4}, {"sum_logits": -1.0622167587280273, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": false, "logits_per_token": -1.0622167587280273, "logits_per_char": -0.35407225290934247, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 847, "native_id": 1467, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.530915379524231, "incorrect_loss_raw": 0.9320129156112671, "correct_loss_per_char": 0.17697179317474365, "incorrect_loss_per_char": 0.23300322890281677, "correct_loss_per_token": 0.530915379524231, "incorrect_loss_per_token": 0.9320129156112671, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9320129156112671, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": false, "logits_per_token": -0.9320129156112671, "logits_per_char": -0.23300322890281677, "num_chars": 4}, {"sum_logits": -0.530915379524231, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": true, "logits_per_token": -0.530915379524231, "logits_per_char": -0.17697179317474365, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 848, "native_id": 515, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3581712245941162, "incorrect_loss_raw": 1.266732096672058, "correct_loss_per_char": 0.11939040819803874, "incorrect_loss_per_char": 0.3166830241680145, "correct_loss_per_token": 0.3581712245941162, "incorrect_loss_per_token": 1.266732096672058, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.266732096672058, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.266732096672058, "logits_per_char": -0.3166830241680145, "num_chars": 4}, {"sum_logits": -0.3581712245941162, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.3581712245941162, "logits_per_char": -0.11939040819803874, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 849, "native_id": 394, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3215991258621216, "incorrect_loss_raw": 0.35984358191490173, "correct_loss_per_char": 0.4405330419540405, "incorrect_loss_per_char": 0.08996089547872543, "correct_loss_per_token": 1.3215991258621216, "incorrect_loss_per_token": 0.35984358191490173, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.35984358191490173, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.35984358191490173, "logits_per_char": -0.08996089547872543, "num_chars": 4}, {"sum_logits": -1.3215991258621216, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -1.3215991258621216, "logits_per_char": -0.4405330419540405, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 850, "native_id": 252, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8430503606796265, "incorrect_loss_raw": 0.6077739000320435, "correct_loss_per_char": 0.21076259016990662, "incorrect_loss_per_char": 0.20259130001068115, "correct_loss_per_token": 0.8430503606796265, "incorrect_loss_per_token": 0.6077739000320435, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8430503606796265, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": false, "logits_per_token": -0.8430503606796265, "logits_per_char": -0.21076259016990662, "num_chars": 4}, {"sum_logits": -0.6077739000320435, "num_tokens": 1, "num_tokens_all": 976, "is_greedy": true, "logits_per_token": -0.6077739000320435, "logits_per_char": -0.20259130001068115, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 851, "native_id": 1090, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6100136041641235, "incorrect_loss_raw": 0.3203139007091522, "correct_loss_per_char": 0.4025034010410309, "incorrect_loss_per_char": 0.10677130023638408, "correct_loss_per_token": 1.6100136041641235, "incorrect_loss_per_token": 0.3203139007091522, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6100136041641235, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": false, "logits_per_token": -1.6100136041641235, "logits_per_char": -0.4025034010410309, "num_chars": 4}, {"sum_logits": -0.3203139007091522, "num_tokens": 1, "num_tokens_all": 898, "is_greedy": true, "logits_per_token": -0.3203139007091522, "logits_per_char": -0.10677130023638408, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 852, "native_id": 2329, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1019419431686401, "incorrect_loss_raw": 0.43028923869132996, "correct_loss_per_char": 0.27548548579216003, "incorrect_loss_per_char": 0.14342974623044333, "correct_loss_per_token": 1.1019419431686401, "incorrect_loss_per_token": 0.43028923869132996, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1019419431686401, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -1.1019419431686401, "logits_per_char": -0.27548548579216003, "num_chars": 4}, {"sum_logits": -0.43028923869132996, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.43028923869132996, "logits_per_char": -0.14342974623044333, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 853, "native_id": 649, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.712470293045044, "incorrect_loss_raw": 0.7874611616134644, "correct_loss_per_char": 0.178117573261261, "incorrect_loss_per_char": 0.2624870538711548, "correct_loss_per_token": 0.712470293045044, "incorrect_loss_per_token": 0.7874611616134644, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.712470293045044, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.712470293045044, "logits_per_char": -0.178117573261261, "num_chars": 4}, {"sum_logits": -0.7874611616134644, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -0.7874611616134644, "logits_per_char": -0.2624870538711548, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 854, "native_id": 129, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7495289444923401, "incorrect_loss_raw": 0.7203336358070374, "correct_loss_per_char": 0.2498429814974467, "incorrect_loss_per_char": 0.18008340895175934, "correct_loss_per_token": 0.7495289444923401, "incorrect_loss_per_token": 0.7203336358070374, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7203336358070374, "num_tokens": 1, "num_tokens_all": 1169, "is_greedy": true, "logits_per_token": -0.7203336358070374, "logits_per_char": -0.18008340895175934, "num_chars": 4}, {"sum_logits": -0.7495289444923401, "num_tokens": 1, "num_tokens_all": 1169, "is_greedy": false, "logits_per_token": -0.7495289444923401, "logits_per_char": -0.2498429814974467, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 855, "native_id": 2962, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4757288694381714, "incorrect_loss_raw": 1.0536432266235352, "correct_loss_per_char": 0.1585762898127238, "incorrect_loss_per_char": 0.2634108066558838, "correct_loss_per_token": 0.4757288694381714, "incorrect_loss_per_token": 1.0536432266235352, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0536432266235352, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": false, "logits_per_token": -1.0536432266235352, "logits_per_char": -0.2634108066558838, "num_chars": 4}, {"sum_logits": -0.4757288694381714, "num_tokens": 1, "num_tokens_all": 1089, "is_greedy": true, "logits_per_token": -0.4757288694381714, "logits_per_char": -0.1585762898127238, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 856, "native_id": 2294, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6871528625488281, "incorrect_loss_raw": 0.7855032682418823, "correct_loss_per_char": 0.17178821563720703, "incorrect_loss_per_char": 0.2618344227472941, "correct_loss_per_token": 0.6871528625488281, "incorrect_loss_per_token": 0.7855032682418823, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6871528625488281, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.6871528625488281, "logits_per_char": -0.17178821563720703, "num_chars": 4}, {"sum_logits": -0.7855032682418823, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -0.7855032682418823, "logits_per_char": -0.2618344227472941, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 857, "native_id": 2022, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.19628645479679108, "incorrect_loss_raw": 1.9140021800994873, "correct_loss_per_char": 0.06542881826559703, "incorrect_loss_per_char": 0.4785005450248718, "correct_loss_per_token": 0.19628645479679108, "incorrect_loss_per_token": 1.9140021800994873, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.9140021800994873, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": false, "logits_per_token": -1.9140021800994873, "logits_per_char": -0.4785005450248718, "num_chars": 4}, {"sum_logits": -0.19628645479679108, "num_tokens": 1, "num_tokens_all": 877, "is_greedy": true, "logits_per_token": -0.19628645479679108, "logits_per_char": -0.06542881826559703, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 858, "native_id": 336, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0520554780960083, "incorrect_loss_raw": 0.47329407930374146, "correct_loss_per_char": 0.2630138695240021, "incorrect_loss_per_char": 0.15776469310124716, "correct_loss_per_token": 1.0520554780960083, "incorrect_loss_per_token": 0.47329407930374146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0520554780960083, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": false, "logits_per_token": -1.0520554780960083, "logits_per_char": -0.2630138695240021, "num_chars": 4}, {"sum_logits": -0.47329407930374146, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": true, "logits_per_token": -0.47329407930374146, "logits_per_char": -0.15776469310124716, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 859, "native_id": 3239, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3473360538482666, "incorrect_loss_raw": 1.4114313125610352, "correct_loss_per_char": 0.08683401346206665, "incorrect_loss_per_char": 0.4704771041870117, "correct_loss_per_token": 0.3473360538482666, "incorrect_loss_per_token": 1.4114313125610352, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.3473360538482666, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": true, "logits_per_token": -0.3473360538482666, "logits_per_char": -0.08683401346206665, "num_chars": 4}, {"sum_logits": -1.4114313125610352, "num_tokens": 1, "num_tokens_all": 975, "is_greedy": false, "logits_per_token": -1.4114313125610352, "logits_per_char": -0.4704771041870117, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 860, "native_id": 1783, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2686117887496948, "incorrect_loss_raw": 1.513450026512146, "correct_loss_per_char": 0.08953726291656494, "incorrect_loss_per_char": 0.3783625066280365, "correct_loss_per_token": 0.2686117887496948, "incorrect_loss_per_token": 1.513450026512146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.513450026512146, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -1.513450026512146, "logits_per_char": -0.3783625066280365, "num_chars": 4}, {"sum_logits": -0.2686117887496948, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.2686117887496948, "logits_per_char": -0.08953726291656494, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 861, "native_id": 1474, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1848459243774414, "incorrect_loss_raw": 0.46339720487594604, "correct_loss_per_char": 0.39494864145914715, "incorrect_loss_per_char": 0.11584930121898651, "correct_loss_per_token": 1.1848459243774414, "incorrect_loss_per_token": 0.46339720487594604, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.46339720487594604, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.46339720487594604, "logits_per_char": -0.11584930121898651, "num_chars": 4}, {"sum_logits": -1.1848459243774414, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -1.1848459243774414, "logits_per_char": -0.39494864145914715, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 862, "native_id": 2438, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6450739502906799, "incorrect_loss_raw": 0.8029314875602722, "correct_loss_per_char": 0.2150246500968933, "incorrect_loss_per_char": 0.20073287189006805, "correct_loss_per_token": 0.6450739502906799, "incorrect_loss_per_token": 0.8029314875602722, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8029314875602722, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": false, "logits_per_token": -0.8029314875602722, "logits_per_char": -0.20073287189006805, "num_chars": 4}, {"sum_logits": -0.6450739502906799, "num_tokens": 1, "num_tokens_all": 930, "is_greedy": true, "logits_per_token": -0.6450739502906799, "logits_per_char": -0.2150246500968933, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 863, "native_id": 1722, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6815008521080017, "incorrect_loss_raw": 0.8145215511322021, "correct_loss_per_char": 0.22716695070266724, "incorrect_loss_per_char": 0.20363038778305054, "correct_loss_per_token": 0.6815008521080017, "incorrect_loss_per_token": 0.8145215511322021, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8145215511322021, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -0.8145215511322021, "logits_per_char": -0.20363038778305054, "num_chars": 4}, {"sum_logits": -0.6815008521080017, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -0.6815008521080017, "logits_per_char": -0.22716695070266724, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 864, "native_id": 1289, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.34329870343208313, "incorrect_loss_raw": 1.3018001317977905, "correct_loss_per_char": 0.11443290114402771, "incorrect_loss_per_char": 0.32545003294944763, "correct_loss_per_token": 0.34329870343208313, "incorrect_loss_per_token": 1.3018001317977905, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3018001317977905, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.3018001317977905, "logits_per_char": -0.32545003294944763, "num_chars": 4}, {"sum_logits": -0.34329870343208313, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.34329870343208313, "logits_per_char": -0.11443290114402771, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 865, "native_id": 786, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5255017280578613, "incorrect_loss_raw": 0.9506956338882446, "correct_loss_per_char": 0.17516724268595377, "incorrect_loss_per_char": 0.23767390847206116, "correct_loss_per_token": 0.5255017280578613, "incorrect_loss_per_token": 0.9506956338882446, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9506956338882446, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": false, "logits_per_token": -0.9506956338882446, "logits_per_char": -0.23767390847206116, "num_chars": 4}, {"sum_logits": -0.5255017280578613, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": true, "logits_per_token": -0.5255017280578613, "logits_per_char": -0.17516724268595377, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 866, "native_id": 2218, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7064025402069092, "incorrect_loss_raw": 0.7271754741668701, "correct_loss_per_char": 0.1766006350517273, "incorrect_loss_per_char": 0.24239182472229004, "correct_loss_per_token": 0.7064025402069092, "incorrect_loss_per_token": 0.7271754741668701, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7064025402069092, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": true, "logits_per_token": -0.7064025402069092, "logits_per_char": -0.1766006350517273, "num_chars": 4}, {"sum_logits": -0.7271754741668701, "num_tokens": 1, "num_tokens_all": 947, "is_greedy": false, "logits_per_token": -0.7271754741668701, "logits_per_char": -0.24239182472229004, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 867, "native_id": 679, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8496949672698975, "incorrect_loss_raw": 0.7085238695144653, "correct_loss_per_char": 0.2832316557566325, "incorrect_loss_per_char": 0.17713096737861633, "correct_loss_per_token": 0.8496949672698975, "incorrect_loss_per_token": 0.7085238695144653, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7085238695144653, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.7085238695144653, "logits_per_char": -0.17713096737861633, "num_chars": 4}, {"sum_logits": -0.8496949672698975, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -0.8496949672698975, "logits_per_char": -0.2832316557566325, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 868, "native_id": 2353, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5536755323410034, "incorrect_loss_raw": 0.8983246088027954, "correct_loss_per_char": 0.18455851078033447, "incorrect_loss_per_char": 0.22458115220069885, "correct_loss_per_token": 0.5536755323410034, "incorrect_loss_per_token": 0.8983246088027954, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8983246088027954, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": false, "logits_per_token": -0.8983246088027954, "logits_per_char": -0.22458115220069885, "num_chars": 4}, {"sum_logits": -0.5536755323410034, "num_tokens": 1, "num_tokens_all": 904, "is_greedy": true, "logits_per_token": -0.5536755323410034, "logits_per_char": -0.18455851078033447, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 869, "native_id": 939, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7880991697311401, "incorrect_loss_raw": 0.7293816804885864, "correct_loss_per_char": 0.19702479243278503, "incorrect_loss_per_char": 0.2431272268295288, "correct_loss_per_token": 0.7880991697311401, "incorrect_loss_per_token": 0.7293816804885864, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7880991697311401, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": false, "logits_per_token": -0.7880991697311401, "logits_per_char": -0.19702479243278503, "num_chars": 4}, {"sum_logits": -0.7293816804885864, "num_tokens": 1, "num_tokens_all": 989, "is_greedy": true, "logits_per_token": -0.7293816804885864, "logits_per_char": -0.2431272268295288, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 870, "native_id": 1734, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1141175031661987, "incorrect_loss_raw": 0.452249139547348, "correct_loss_per_char": 0.2785293757915497, "incorrect_loss_per_char": 0.15074971318244934, "correct_loss_per_token": 1.1141175031661987, "incorrect_loss_per_token": 0.452249139547348, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1141175031661987, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": false, "logits_per_token": -1.1141175031661987, "logits_per_char": -0.2785293757915497, "num_chars": 4}, {"sum_logits": -0.452249139547348, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": true, "logits_per_token": -0.452249139547348, "logits_per_char": -0.15074971318244934, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 871, "native_id": 701, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6296292543411255, "incorrect_loss_raw": 0.24780862033367157, "correct_loss_per_char": 0.40740731358528137, "incorrect_loss_per_char": 0.08260287344455719, "correct_loss_per_token": 1.6296292543411255, "incorrect_loss_per_token": 0.24780862033367157, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6296292543411255, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": false, "logits_per_token": -1.6296292543411255, "logits_per_char": -0.40740731358528137, "num_chars": 4}, {"sum_logits": -0.24780862033367157, "num_tokens": 1, "num_tokens_all": 957, "is_greedy": true, "logits_per_token": -0.24780862033367157, "logits_per_char": -0.08260287344455719, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 872, "native_id": 1771, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9582551121711731, "incorrect_loss_raw": 0.5505804419517517, "correct_loss_per_char": 0.23956377804279327, "incorrect_loss_per_char": 0.18352681398391724, "correct_loss_per_token": 0.9582551121711731, "incorrect_loss_per_token": 0.5505804419517517, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9582551121711731, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -0.9582551121711731, "logits_per_char": -0.23956377804279327, "num_chars": 4}, {"sum_logits": -0.5505804419517517, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.5505804419517517, "logits_per_char": -0.18352681398391724, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 873, "native_id": 2518, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.38800978660583496, "incorrect_loss_raw": 1.2348712682724, "correct_loss_per_char": 0.12933659553527832, "incorrect_loss_per_char": 0.3087178170681, "correct_loss_per_token": 0.38800978660583496, "incorrect_loss_per_token": 1.2348712682724, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2348712682724, "num_tokens": 1, "num_tokens_all": 853, "is_greedy": false, "logits_per_token": -1.2348712682724, "logits_per_char": -0.3087178170681, "num_chars": 4}, {"sum_logits": -0.38800978660583496, "num_tokens": 1, "num_tokens_all": 853, "is_greedy": true, "logits_per_token": -0.38800978660583496, "logits_per_char": -0.12933659553527832, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 874, "native_id": 572, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5331132411956787, "incorrect_loss_raw": 0.987023115158081, "correct_loss_per_char": 0.13327831029891968, "incorrect_loss_per_char": 0.32900770505269367, "correct_loss_per_token": 0.5331132411956787, "incorrect_loss_per_token": 0.987023115158081, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5331132411956787, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": true, "logits_per_token": -0.5331132411956787, "logits_per_char": -0.13327831029891968, "num_chars": 4}, {"sum_logits": -0.987023115158081, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": false, "logits_per_token": -0.987023115158081, "logits_per_char": -0.32900770505269367, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 875, "native_id": 1553, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.7532273530960083, "incorrect_loss_raw": 0.7128866910934448, "correct_loss_per_char": 0.2510757843653361, "incorrect_loss_per_char": 0.1782216727733612, "correct_loss_per_token": 0.7532273530960083, "incorrect_loss_per_token": 0.7128866910934448, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7128866910934448, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": true, "logits_per_token": -0.7128866910934448, "logits_per_char": -0.1782216727733612, "num_chars": 4}, {"sum_logits": -0.7532273530960083, "num_tokens": 1, "num_tokens_all": 949, "is_greedy": false, "logits_per_token": -0.7532273530960083, "logits_per_char": -0.2510757843653361, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 876, "native_id": 2051, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9672967195510864, "incorrect_loss_raw": 0.5566203594207764, "correct_loss_per_char": 0.2418241798877716, "incorrect_loss_per_char": 0.18554011980692545, "correct_loss_per_token": 0.9672967195510864, "incorrect_loss_per_token": 0.5566203594207764, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9672967195510864, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": false, "logits_per_token": -0.9672967195510864, "logits_per_char": -0.2418241798877716, "num_chars": 4}, {"sum_logits": -0.5566203594207764, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": true, "logits_per_token": -0.5566203594207764, "logits_per_char": -0.18554011980692545, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 877, "native_id": 3162, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2634333372116089, "incorrect_loss_raw": 0.38342955708503723, "correct_loss_per_char": 0.3158583343029022, "incorrect_loss_per_char": 0.12780985236167908, "correct_loss_per_token": 1.2634333372116089, "incorrect_loss_per_token": 0.38342955708503723, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2634333372116089, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": false, "logits_per_token": -1.2634333372116089, "logits_per_char": -0.3158583343029022, "num_chars": 4}, {"sum_logits": -0.38342955708503723, "num_tokens": 1, "num_tokens_all": 1039, "is_greedy": true, "logits_per_token": -0.38342955708503723, "logits_per_char": -0.12780985236167908, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 878, "native_id": 2358, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4808119237422943, "incorrect_loss_raw": 1.0131062269210815, "correct_loss_per_char": 0.12020298093557358, "incorrect_loss_per_char": 0.33770207564036053, "correct_loss_per_token": 0.4808119237422943, "incorrect_loss_per_token": 1.0131062269210815, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4808119237422943, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": true, "logits_per_token": -0.4808119237422943, "logits_per_char": -0.12020298093557358, "num_chars": 4}, {"sum_logits": -1.0131062269210815, "num_tokens": 1, "num_tokens_all": 973, "is_greedy": false, "logits_per_token": -1.0131062269210815, "logits_per_char": -0.33770207564036053, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 879, "native_id": 1579, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6274493932723999, "incorrect_loss_raw": 0.8125737905502319, "correct_loss_per_char": 0.20914979775746664, "incorrect_loss_per_char": 0.20314344763755798, "correct_loss_per_token": 0.6274493932723999, "incorrect_loss_per_token": 0.8125737905502319, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8125737905502319, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -0.8125737905502319, "logits_per_char": -0.20314344763755798, "num_chars": 4}, {"sum_logits": -0.6274493932723999, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": true, "logits_per_token": -0.6274493932723999, "logits_per_char": -0.20914979775746664, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 880, "native_id": 3184, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3693500757217407, "incorrect_loss_raw": 0.3367208242416382, "correct_loss_per_char": 0.3423375189304352, "incorrect_loss_per_char": 0.11224027474721272, "correct_loss_per_token": 1.3693500757217407, "incorrect_loss_per_token": 0.3367208242416382, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3693500757217407, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.3693500757217407, "logits_per_char": -0.3423375189304352, "num_chars": 4}, {"sum_logits": -0.3367208242416382, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.3367208242416382, "logits_per_char": -0.11224027474721272, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 881, "native_id": 2507, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.48894941806793213, "incorrect_loss_raw": 1.0267534255981445, "correct_loss_per_char": 0.16298313935597739, "incorrect_loss_per_char": 0.25668835639953613, "correct_loss_per_token": 0.48894941806793213, "incorrect_loss_per_token": 1.0267534255981445, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0267534255981445, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": false, "logits_per_token": -1.0267534255981445, "logits_per_char": -0.25668835639953613, "num_chars": 4}, {"sum_logits": -0.48894941806793213, "num_tokens": 1, "num_tokens_all": 919, "is_greedy": true, "logits_per_token": -0.48894941806793213, "logits_per_char": -0.16298313935597739, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 882, "native_id": 1134, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.28713181614875793, "incorrect_loss_raw": 1.4933384656906128, "correct_loss_per_char": 0.09571060538291931, "incorrect_loss_per_char": 0.3733346164226532, "correct_loss_per_token": 0.28713181614875793, "incorrect_loss_per_token": 1.4933384656906128, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4933384656906128, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": false, "logits_per_token": -1.4933384656906128, "logits_per_char": -0.3733346164226532, "num_chars": 4}, {"sum_logits": -0.28713181614875793, "num_tokens": 1, "num_tokens_all": 873, "is_greedy": true, "logits_per_token": -0.28713181614875793, "logits_per_char": -0.09571060538291931, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 883, "native_id": 2696, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5165660381317139, "incorrect_loss_raw": 1.038438081741333, "correct_loss_per_char": 0.12914150953292847, "incorrect_loss_per_char": 0.346146027247111, "correct_loss_per_token": 0.5165660381317139, "incorrect_loss_per_token": 1.038438081741333, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5165660381317139, "num_tokens": 1, "num_tokens_all": 847, "is_greedy": true, "logits_per_token": -0.5165660381317139, "logits_per_char": -0.12914150953292847, "num_chars": 4}, {"sum_logits": -1.038438081741333, "num_tokens": 1, "num_tokens_all": 847, "is_greedy": false, "logits_per_token": -1.038438081741333, "logits_per_char": -0.346146027247111, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 884, "native_id": 585, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.29016605019569397, "incorrect_loss_raw": 1.6362900733947754, "correct_loss_per_char": 0.096722016731898, "incorrect_loss_per_char": 0.40907251834869385, "correct_loss_per_token": 0.29016605019569397, "incorrect_loss_per_token": 1.6362900733947754, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6362900733947754, "num_tokens": 1, "num_tokens_all": 843, "is_greedy": false, "logits_per_token": -1.6362900733947754, "logits_per_char": -0.40907251834869385, "num_chars": 4}, {"sum_logits": -0.29016605019569397, "num_tokens": 1, "num_tokens_all": 843, "is_greedy": true, "logits_per_token": -0.29016605019569397, "logits_per_char": -0.096722016731898, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 885, "native_id": 1465, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7528775334358215, "incorrect_loss_raw": 0.7229771018028259, "correct_loss_per_char": 0.18821938335895538, "incorrect_loss_per_char": 0.24099236726760864, "correct_loss_per_token": 0.7528775334358215, "incorrect_loss_per_token": 0.7229771018028259, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7528775334358215, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -0.7528775334358215, "logits_per_char": -0.18821938335895538, "num_chars": 4}, {"sum_logits": -0.7229771018028259, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.7229771018028259, "logits_per_char": -0.24099236726760864, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 886, "native_id": 538, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8039714097976685, "incorrect_loss_raw": 0.6631571054458618, "correct_loss_per_char": 0.20099285244941711, "incorrect_loss_per_char": 0.22105236848195395, "correct_loss_per_token": 0.8039714097976685, "incorrect_loss_per_token": 0.6631571054458618, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8039714097976685, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": false, "logits_per_token": -0.8039714097976685, "logits_per_char": -0.20099285244941711, "num_chars": 4}, {"sum_logits": -0.6631571054458618, "num_tokens": 1, "num_tokens_all": 958, "is_greedy": true, "logits_per_token": -0.6631571054458618, "logits_per_char": -0.22105236848195395, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 887, "native_id": 1069, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1253979206085205, "incorrect_loss_raw": 0.47450947761535645, "correct_loss_per_char": 0.2813494801521301, "incorrect_loss_per_char": 0.1581698258717855, "correct_loss_per_token": 1.1253979206085205, "incorrect_loss_per_token": 0.47450947761535645, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1253979206085205, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": false, "logits_per_token": -1.1253979206085205, "logits_per_char": -0.2813494801521301, "num_chars": 4}, {"sum_logits": -0.47450947761535645, "num_tokens": 1, "num_tokens_all": 891, "is_greedy": true, "logits_per_token": -0.47450947761535645, "logits_per_char": -0.1581698258717855, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 888, "native_id": 1275, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2527267336845398, "incorrect_loss_raw": 1.61751127243042, "correct_loss_per_char": 0.08424224456151326, "incorrect_loss_per_char": 0.404377818107605, "correct_loss_per_token": 0.2527267336845398, "incorrect_loss_per_token": 1.61751127243042, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.61751127243042, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": false, "logits_per_token": -1.61751127243042, "logits_per_char": -0.404377818107605, "num_chars": 4}, {"sum_logits": -0.2527267336845398, "num_tokens": 1, "num_tokens_all": 965, "is_greedy": true, "logits_per_token": -0.2527267336845398, "logits_per_char": -0.08424224456151326, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 889, "native_id": 2734, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5268382430076599, "incorrect_loss_raw": 1.082780122756958, "correct_loss_per_char": 0.13170956075191498, "incorrect_loss_per_char": 0.36092670758565265, "correct_loss_per_token": 0.5268382430076599, "incorrect_loss_per_token": 1.082780122756958, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5268382430076599, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.5268382430076599, "logits_per_char": -0.13170956075191498, "num_chars": 4}, {"sum_logits": -1.082780122756958, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -1.082780122756958, "logits_per_char": -0.36092670758565265, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 890, "native_id": 1209, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9321122169494629, "incorrect_loss_raw": 0.5441949963569641, "correct_loss_per_char": 0.23302805423736572, "incorrect_loss_per_char": 0.18139833211898804, "correct_loss_per_token": 0.9321122169494629, "incorrect_loss_per_token": 0.5441949963569641, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9321122169494629, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": false, "logits_per_token": -0.9321122169494629, "logits_per_char": -0.23302805423736572, "num_chars": 4}, {"sum_logits": -0.5441949963569641, "num_tokens": 1, "num_tokens_all": 871, "is_greedy": true, "logits_per_token": -0.5441949963569641, "logits_per_char": -0.18139833211898804, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 891, "native_id": 2634, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6198669672012329, "incorrect_loss_raw": 0.8633217811584473, "correct_loss_per_char": 0.20662232240041098, "incorrect_loss_per_char": 0.21583044528961182, "correct_loss_per_token": 0.6198669672012329, "incorrect_loss_per_token": 0.8633217811584473, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8633217811584473, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -0.8633217811584473, "logits_per_char": -0.21583044528961182, "num_chars": 4}, {"sum_logits": -0.6198669672012329, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": true, "logits_per_token": -0.6198669672012329, "logits_per_char": -0.20662232240041098, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 892, "native_id": 2939, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4930126667022705, "incorrect_loss_raw": 0.9860174059867859, "correct_loss_per_char": 0.12325316667556763, "incorrect_loss_per_char": 0.32867246866226196, "correct_loss_per_token": 0.4930126667022705, "incorrect_loss_per_token": 0.9860174059867859, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4930126667022705, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": true, "logits_per_token": -0.4930126667022705, "logits_per_char": -0.12325316667556763, "num_chars": 4}, {"sum_logits": -0.9860174059867859, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": false, "logits_per_token": -0.9860174059867859, "logits_per_char": -0.32867246866226196, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 893, "native_id": 1865, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.721294105052948, "incorrect_loss_raw": 0.7091646194458008, "correct_loss_per_char": 0.180323526263237, "incorrect_loss_per_char": 0.2363882064819336, "correct_loss_per_token": 0.721294105052948, "incorrect_loss_per_token": 0.7091646194458008, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.721294105052948, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": false, "logits_per_token": -0.721294105052948, "logits_per_char": -0.180323526263237, "num_chars": 4}, {"sum_logits": -0.7091646194458008, "num_tokens": 1, "num_tokens_all": 924, "is_greedy": true, "logits_per_token": -0.7091646194458008, "logits_per_char": -0.2363882064819336, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 894, "native_id": 239, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8027806282043457, "incorrect_loss_raw": 0.6393088102340698, "correct_loss_per_char": 0.2675935427347819, "incorrect_loss_per_char": 0.15982720255851746, "correct_loss_per_token": 0.8027806282043457, "incorrect_loss_per_token": 0.6393088102340698, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6393088102340698, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": true, "logits_per_token": -0.6393088102340698, "logits_per_char": -0.15982720255851746, "num_chars": 4}, {"sum_logits": -0.8027806282043457, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": false, "logits_per_token": -0.8027806282043457, "logits_per_char": -0.2675935427347819, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 895, "native_id": 2931, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8716795444488525, "incorrect_loss_raw": 0.5962620973587036, "correct_loss_per_char": 0.21791988611221313, "incorrect_loss_per_char": 0.1987540324529012, "correct_loss_per_token": 0.8716795444488525, "incorrect_loss_per_token": 0.5962620973587036, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8716795444488525, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": false, "logits_per_token": -0.8716795444488525, "logits_per_char": -0.21791988611221313, "num_chars": 4}, {"sum_logits": -0.5962620973587036, "num_tokens": 1, "num_tokens_all": 1118, "is_greedy": true, "logits_per_token": -0.5962620973587036, "logits_per_char": -0.1987540324529012, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 896, "native_id": 1718, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.27487534284591675, "incorrect_loss_raw": 1.5109903812408447, "correct_loss_per_char": 0.09162511428197224, "incorrect_loss_per_char": 0.3777475953102112, "correct_loss_per_token": 0.27487534284591675, "incorrect_loss_per_token": 1.5109903812408447, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5109903812408447, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": false, "logits_per_token": -1.5109903812408447, "logits_per_char": -0.3777475953102112, "num_chars": 4}, {"sum_logits": -0.27487534284591675, "num_tokens": 1, "num_tokens_all": 863, "is_greedy": true, "logits_per_token": -0.27487534284591675, "logits_per_char": -0.09162511428197224, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 897, "native_id": 1510, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7856438159942627, "incorrect_loss_raw": 0.6382879614830017, "correct_loss_per_char": 0.19641095399856567, "incorrect_loss_per_char": 0.21276265382766724, "correct_loss_per_token": 0.7856438159942627, "incorrect_loss_per_token": 0.6382879614830017, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7856438159942627, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -0.7856438159942627, "logits_per_char": -0.19641095399856567, "num_chars": 4}, {"sum_logits": -0.6382879614830017, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.6382879614830017, "logits_per_char": -0.21276265382766724, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 898, "native_id": 203, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3971262276172638, "incorrect_loss_raw": 1.2735164165496826, "correct_loss_per_char": 0.1323754092057546, "incorrect_loss_per_char": 0.31837910413742065, "correct_loss_per_token": 0.3971262276172638, "incorrect_loss_per_token": 1.2735164165496826, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2735164165496826, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -1.2735164165496826, "logits_per_char": -0.31837910413742065, "num_chars": 4}, {"sum_logits": -0.3971262276172638, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.3971262276172638, "logits_per_char": -0.1323754092057546, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 899, "native_id": 2926, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8164231181144714, "incorrect_loss_raw": 0.6388527750968933, "correct_loss_per_char": 0.20410577952861786, "incorrect_loss_per_char": 0.21295092503229776, "correct_loss_per_token": 0.8164231181144714, "incorrect_loss_per_token": 0.6388527750968933, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8164231181144714, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": false, "logits_per_token": -0.8164231181144714, "logits_per_char": -0.20410577952861786, "num_chars": 4}, {"sum_logits": -0.6388527750968933, "num_tokens": 1, "num_tokens_all": 885, "is_greedy": true, "logits_per_token": -0.6388527750968933, "logits_per_char": -0.21295092503229776, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 900, "native_id": 2824, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5653676986694336, "incorrect_loss_raw": 1.0586620569229126, "correct_loss_per_char": 0.1413419246673584, "incorrect_loss_per_char": 0.3528873523076375, "correct_loss_per_token": 0.5653676986694336, "incorrect_loss_per_token": 1.0586620569229126, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5653676986694336, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": true, "logits_per_token": -0.5653676986694336, "logits_per_char": -0.1413419246673584, "num_chars": 4}, {"sum_logits": -1.0586620569229126, "num_tokens": 1, "num_tokens_all": 1047, "is_greedy": false, "logits_per_token": -1.0586620569229126, "logits_per_char": -0.3528873523076375, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 901, "native_id": 2076, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4474095106124878, "incorrect_loss_raw": 1.121290922164917, "correct_loss_per_char": 0.11185237765312195, "incorrect_loss_per_char": 0.373763640721639, "correct_loss_per_token": 0.4474095106124878, "incorrect_loss_per_token": 1.121290922164917, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4474095106124878, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.4474095106124878, "logits_per_char": -0.11185237765312195, "num_chars": 4}, {"sum_logits": -1.121290922164917, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -1.121290922164917, "logits_per_char": -0.373763640721639, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 902, "native_id": 2944, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.793250322341919, "incorrect_loss_raw": 0.6782826781272888, "correct_loss_per_char": 0.19831258058547974, "incorrect_loss_per_char": 0.2260942260424296, "correct_loss_per_token": 0.793250322341919, "incorrect_loss_per_token": 0.6782826781272888, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.793250322341919, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": false, "logits_per_token": -0.793250322341919, "logits_per_char": -0.19831258058547974, "num_chars": 4}, {"sum_logits": -0.6782826781272888, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": true, "logits_per_token": -0.6782826781272888, "logits_per_char": -0.2260942260424296, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 903, "native_id": 2745, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.797189474105835, "incorrect_loss_raw": 0.6385846138000488, "correct_loss_per_char": 0.19929736852645874, "incorrect_loss_per_char": 0.2128615379333496, "correct_loss_per_token": 0.797189474105835, "incorrect_loss_per_token": 0.6385846138000488, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.797189474105835, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": false, "logits_per_token": -0.797189474105835, "logits_per_char": -0.19929736852645874, "num_chars": 4}, {"sum_logits": -0.6385846138000488, "num_tokens": 1, "num_tokens_all": 982, "is_greedy": true, "logits_per_token": -0.6385846138000488, "logits_per_char": -0.2128615379333496, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 904, "native_id": 1255, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6831145882606506, "incorrect_loss_raw": 0.7923794984817505, "correct_loss_per_char": 0.17077864706516266, "incorrect_loss_per_char": 0.2641264994939168, "correct_loss_per_token": 0.6831145882606506, "incorrect_loss_per_token": 0.7923794984817505, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6831145882606506, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": true, "logits_per_token": -0.6831145882606506, "logits_per_char": -0.17077864706516266, "num_chars": 4}, {"sum_logits": -0.7923794984817505, "num_tokens": 1, "num_tokens_all": 961, "is_greedy": false, "logits_per_token": -0.7923794984817505, "logits_per_char": -0.2641264994939168, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 905, "native_id": 776, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.38951200246810913, "incorrect_loss_raw": 1.20650053024292, "correct_loss_per_char": 0.12983733415603638, "incorrect_loss_per_char": 0.30162513256073, "correct_loss_per_token": 0.38951200246810913, "incorrect_loss_per_token": 1.20650053024292, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.20650053024292, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": false, "logits_per_token": -1.20650053024292, "logits_per_char": -0.30162513256073, "num_chars": 4}, {"sum_logits": -0.38951200246810913, "num_tokens": 1, "num_tokens_all": 866, "is_greedy": true, "logits_per_token": -0.38951200246810913, "logits_per_char": -0.12983733415603638, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 906, "native_id": 2392, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5164769887924194, "incorrect_loss_raw": 0.27138960361480713, "correct_loss_per_char": 0.37911924719810486, "incorrect_loss_per_char": 0.09046320120493571, "correct_loss_per_token": 1.5164769887924194, "incorrect_loss_per_token": 0.27138960361480713, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5164769887924194, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": false, "logits_per_token": -1.5164769887924194, "logits_per_char": -0.37911924719810486, "num_chars": 4}, {"sum_logits": -0.27138960361480713, "num_tokens": 1, "num_tokens_all": 860, "is_greedy": true, "logits_per_token": -0.27138960361480713, "logits_per_char": -0.09046320120493571, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 907, "native_id": 1588, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6908802390098572, "incorrect_loss_raw": 0.7768010497093201, "correct_loss_per_char": 0.23029341300328574, "incorrect_loss_per_char": 0.19420026242733002, "correct_loss_per_token": 0.6908802390098572, "incorrect_loss_per_token": 0.7768010497093201, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7768010497093201, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -0.7768010497093201, "logits_per_char": -0.19420026242733002, "num_chars": 4}, {"sum_logits": -0.6908802390098572, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.6908802390098572, "logits_per_char": -0.23029341300328574, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 908, "native_id": 1156, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9970809817314148, "incorrect_loss_raw": 0.5097202062606812, "correct_loss_per_char": 0.2492702454328537, "incorrect_loss_per_char": 0.16990673542022705, "correct_loss_per_token": 0.9970809817314148, "incorrect_loss_per_token": 0.5097202062606812, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9970809817314148, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": false, "logits_per_token": -0.9970809817314148, "logits_per_char": -0.2492702454328537, "num_chars": 4}, {"sum_logits": -0.5097202062606812, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": true, "logits_per_token": -0.5097202062606812, "logits_per_char": -0.16990673542022705, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 909, "native_id": 1295, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6759793162345886, "incorrect_loss_raw": 0.8188062906265259, "correct_loss_per_char": 0.16899482905864716, "incorrect_loss_per_char": 0.272935430208842, "correct_loss_per_token": 0.6759793162345886, "incorrect_loss_per_token": 0.8188062906265259, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6759793162345886, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": true, "logits_per_token": -0.6759793162345886, "logits_per_char": -0.16899482905864716, "num_chars": 4}, {"sum_logits": -0.8188062906265259, "num_tokens": 1, "num_tokens_all": 867, "is_greedy": false, "logits_per_token": -0.8188062906265259, "logits_per_char": -0.272935430208842, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 910, "native_id": 2298, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6575973629951477, "incorrect_loss_raw": 0.8957104086875916, "correct_loss_per_char": 0.16439934074878693, "incorrect_loss_per_char": 0.2985701362291972, "correct_loss_per_token": 0.6575973629951477, "incorrect_loss_per_token": 0.8957104086875916, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6575973629951477, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": true, "logits_per_token": -0.6575973629951477, "logits_per_char": -0.16439934074878693, "num_chars": 4}, {"sum_logits": -0.8957104086875916, "num_tokens": 1, "num_tokens_all": 915, "is_greedy": false, "logits_per_token": -0.8957104086875916, "logits_per_char": -0.2985701362291972, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 911, "native_id": 1574, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2307404279708862, "incorrect_loss_raw": 0.38462546467781067, "correct_loss_per_char": 0.4102468093236287, "incorrect_loss_per_char": 0.09615636616945267, "correct_loss_per_token": 1.2307404279708862, "incorrect_loss_per_token": 0.38462546467781067, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.38462546467781067, "num_tokens": 1, "num_tokens_all": 893, "is_greedy": true, "logits_per_token": -0.38462546467781067, "logits_per_char": -0.09615636616945267, "num_chars": 4}, {"sum_logits": -1.2307404279708862, "num_tokens": 1, "num_tokens_all": 893, "is_greedy": false, "logits_per_token": -1.2307404279708862, "logits_per_char": -0.4102468093236287, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 912, "native_id": 1702, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1683920621871948, "incorrect_loss_raw": 0.41201552748680115, "correct_loss_per_char": 0.2920980155467987, "incorrect_loss_per_char": 0.13733850916226706, "correct_loss_per_token": 1.1683920621871948, "incorrect_loss_per_token": 0.41201552748680115, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1683920621871948, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": false, "logits_per_token": -1.1683920621871948, "logits_per_char": -0.2920980155467987, "num_chars": 4}, {"sum_logits": -0.41201552748680115, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": true, "logits_per_token": -0.41201552748680115, "logits_per_char": -0.13733850916226706, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 913, "native_id": 3048, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8122555017471313, "incorrect_loss_raw": 0.6314083933830261, "correct_loss_per_char": 0.20306387543678284, "incorrect_loss_per_char": 0.2104694644610087, "correct_loss_per_token": 0.8122555017471313, "incorrect_loss_per_token": 0.6314083933830261, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8122555017471313, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -0.8122555017471313, "logits_per_char": -0.20306387543678284, "num_chars": 4}, {"sum_logits": -0.6314083933830261, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.6314083933830261, "logits_per_char": -0.2104694644610087, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 914, "native_id": 2535, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24035310745239258, "incorrect_loss_raw": 1.7366421222686768, "correct_loss_per_char": 0.08011770248413086, "incorrect_loss_per_char": 0.4341605305671692, "correct_loss_per_token": 0.24035310745239258, "incorrect_loss_per_token": 1.7366421222686768, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7366421222686768, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": false, "logits_per_token": -1.7366421222686768, "logits_per_char": -0.4341605305671692, "num_chars": 4}, {"sum_logits": -0.24035310745239258, "num_tokens": 1, "num_tokens_all": 980, "is_greedy": true, "logits_per_token": -0.24035310745239258, "logits_per_char": -0.08011770248413086, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 915, "native_id": 2998, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.6645442247390747, "incorrect_loss_raw": 0.8271894454956055, "correct_loss_per_char": 0.22151474157969156, "incorrect_loss_per_char": 0.20679736137390137, "correct_loss_per_token": 0.6645442247390747, "incorrect_loss_per_token": 0.8271894454956055, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8271894454956055, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -0.8271894454956055, "logits_per_char": -0.20679736137390137, "num_chars": 4}, {"sum_logits": -0.6645442247390747, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.6645442247390747, "logits_per_char": -0.22151474157969156, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 916, "native_id": 230, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9310269951820374, "incorrect_loss_raw": 0.5705496072769165, "correct_loss_per_char": 0.23275674879550934, "incorrect_loss_per_char": 0.19018320242563883, "correct_loss_per_token": 0.9310269951820374, "incorrect_loss_per_token": 0.5705496072769165, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9310269951820374, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -0.9310269951820374, "logits_per_char": -0.23275674879550934, "num_chars": 4}, {"sum_logits": -0.5705496072769165, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.5705496072769165, "logits_per_char": -0.19018320242563883, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 917, "native_id": 2813, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2513110637664795, "incorrect_loss_raw": 1.7433586120605469, "correct_loss_per_char": 0.08377035458882649, "incorrect_loss_per_char": 0.4358396530151367, "correct_loss_per_token": 0.2513110637664795, "incorrect_loss_per_token": 1.7433586120605469, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7433586120605469, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": false, "logits_per_token": -1.7433586120605469, "logits_per_char": -0.4358396530151367, "num_chars": 4}, {"sum_logits": -0.2513110637664795, "num_tokens": 1, "num_tokens_all": 862, "is_greedy": true, "logits_per_token": -0.2513110637664795, "logits_per_char": -0.08377035458882649, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 918, "native_id": 1052, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.22265782952308655, "incorrect_loss_raw": 1.9375683069229126, "correct_loss_per_char": 0.07421927650769551, "incorrect_loss_per_char": 0.48439207673072815, "correct_loss_per_token": 0.22265782952308655, "incorrect_loss_per_token": 1.9375683069229126, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.9375683069229126, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": false, "logits_per_token": -1.9375683069229126, "logits_per_char": -0.48439207673072815, "num_chars": 4}, {"sum_logits": -0.22265782952308655, "num_tokens": 1, "num_tokens_all": 959, "is_greedy": true, "logits_per_token": -0.22265782952308655, "logits_per_char": -0.07421927650769551, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 919, "native_id": 798, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6624115705490112, "incorrect_loss_raw": 0.861635148525238, "correct_loss_per_char": 0.1656028926372528, "incorrect_loss_per_char": 0.28721171617507935, "correct_loss_per_token": 0.6624115705490112, "incorrect_loss_per_token": 0.861635148525238, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6624115705490112, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -0.6624115705490112, "logits_per_char": -0.1656028926372528, "num_chars": 4}, {"sum_logits": -0.861635148525238, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -0.861635148525238, "logits_per_char": -0.28721171617507935, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 920, "native_id": 1291, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8250247240066528, "incorrect_loss_raw": 0.6174672842025757, "correct_loss_per_char": 0.2062561810016632, "incorrect_loss_per_char": 0.20582242806752524, "correct_loss_per_token": 0.8250247240066528, "incorrect_loss_per_token": 0.6174672842025757, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8250247240066528, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": false, "logits_per_token": -0.8250247240066528, "logits_per_char": -0.2062561810016632, "num_chars": 4}, {"sum_logits": -0.6174672842025757, "num_tokens": 1, "num_tokens_all": 955, "is_greedy": true, "logits_per_token": -0.6174672842025757, "logits_per_char": -0.20582242806752524, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 921, "native_id": 388, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.6247549057006836, "incorrect_loss_raw": 0.2649347484111786, "correct_loss_per_char": 0.4061887264251709, "incorrect_loss_per_char": 0.0883115828037262, "correct_loss_per_token": 1.6247549057006836, "incorrect_loss_per_token": 0.2649347484111786, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6247549057006836, "num_tokens": 1, "num_tokens_all": 855, "is_greedy": false, "logits_per_token": -1.6247549057006836, "logits_per_char": -0.4061887264251709, "num_chars": 4}, {"sum_logits": -0.2649347484111786, "num_tokens": 1, "num_tokens_all": 855, "is_greedy": true, "logits_per_token": -0.2649347484111786, "logits_per_char": -0.0883115828037262, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 922, "native_id": 1650, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3385191559791565, "incorrect_loss_raw": 1.3811907768249512, "correct_loss_per_char": 0.11283971865971883, "incorrect_loss_per_char": 0.3452976942062378, "correct_loss_per_token": 0.3385191559791565, "incorrect_loss_per_token": 1.3811907768249512, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3811907768249512, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -1.3811907768249512, "logits_per_char": -0.3452976942062378, "num_chars": 4}, {"sum_logits": -0.3385191559791565, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": true, "logits_per_token": -0.3385191559791565, "logits_per_char": -0.11283971865971883, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 923, "native_id": 1495, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5536925792694092, "incorrect_loss_raw": 1.0329444408416748, "correct_loss_per_char": 0.1384231448173523, "incorrect_loss_per_char": 0.3443148136138916, "correct_loss_per_token": 0.5536925792694092, "incorrect_loss_per_token": 1.0329444408416748, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5536925792694092, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.5536925792694092, "logits_per_char": -0.1384231448173523, "num_chars": 4}, {"sum_logits": -1.0329444408416748, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -1.0329444408416748, "logits_per_char": -0.3443148136138916, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 924, "native_id": 1493, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6163997650146484, "incorrect_loss_raw": 0.883701503276825, "correct_loss_per_char": 0.1540999412536621, "incorrect_loss_per_char": 0.29456716775894165, "correct_loss_per_token": 0.6163997650146484, "incorrect_loss_per_token": 0.883701503276825, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6163997650146484, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -0.6163997650146484, "logits_per_char": -0.1540999412536621, "num_chars": 4}, {"sum_logits": -0.883701503276825, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -0.883701503276825, "logits_per_char": -0.29456716775894165, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 925, "native_id": 1749, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9641603231430054, "incorrect_loss_raw": 0.5284323692321777, "correct_loss_per_char": 0.32138677438100177, "incorrect_loss_per_char": 0.13210809230804443, "correct_loss_per_token": 0.9641603231430054, "incorrect_loss_per_token": 0.5284323692321777, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5284323692321777, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": true, "logits_per_token": -0.5284323692321777, "logits_per_char": -0.13210809230804443, "num_chars": 4}, {"sum_logits": -0.9641603231430054, "num_tokens": 1, "num_tokens_all": 842, "is_greedy": false, "logits_per_token": -0.9641603231430054, "logits_per_char": -0.32138677438100177, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 926, "native_id": 1214, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9857839345932007, "incorrect_loss_raw": 0.5055574178695679, "correct_loss_per_char": 0.24644598364830017, "incorrect_loss_per_char": 0.16851913928985596, "correct_loss_per_token": 0.9857839345932007, "incorrect_loss_per_token": 0.5055574178695679, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9857839345932007, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": false, "logits_per_token": -0.9857839345932007, "logits_per_char": -0.24644598364830017, "num_chars": 4}, {"sum_logits": -0.5055574178695679, "num_tokens": 1, "num_tokens_all": 908, "is_greedy": true, "logits_per_token": -0.5055574178695679, "logits_per_char": -0.16851913928985596, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 927, "native_id": 1592, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8489692211151123, "incorrect_loss_raw": 0.6224784255027771, "correct_loss_per_char": 0.2829897403717041, "incorrect_loss_per_char": 0.15561960637569427, "correct_loss_per_token": 0.8489692211151123, "incorrect_loss_per_token": 0.6224784255027771, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6224784255027771, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.6224784255027771, "logits_per_char": -0.15561960637569427, "num_chars": 4}, {"sum_logits": -0.8489692211151123, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -0.8489692211151123, "logits_per_char": -0.2829897403717041, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 928, "native_id": 2799, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.574215829372406, "incorrect_loss_raw": 0.9083628058433533, "correct_loss_per_char": 0.19140527645746866, "incorrect_loss_per_char": 0.22709070146083832, "correct_loss_per_token": 0.574215829372406, "incorrect_loss_per_token": 0.9083628058433533, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9083628058433533, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": false, "logits_per_token": -0.9083628058433533, "logits_per_char": -0.22709070146083832, "num_chars": 4}, {"sum_logits": -0.574215829372406, "num_tokens": 1, "num_tokens_all": 868, "is_greedy": true, "logits_per_token": -0.574215829372406, "logits_per_char": -0.19140527645746866, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 929, "native_id": 1154, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.498825192451477, "incorrect_loss_raw": 0.29127392172813416, "correct_loss_per_char": 0.37470629811286926, "incorrect_loss_per_char": 0.09709130724271138, "correct_loss_per_token": 1.498825192451477, "incorrect_loss_per_token": 0.29127392172813416, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.498825192451477, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.498825192451477, "logits_per_char": -0.37470629811286926, "num_chars": 4}, {"sum_logits": -0.29127392172813416, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.29127392172813416, "logits_per_char": -0.09709130724271138, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 930, "native_id": 2351, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8661980032920837, "incorrect_loss_raw": 0.5857257843017578, "correct_loss_per_char": 0.21654950082302094, "incorrect_loss_per_char": 0.19524192810058594, "correct_loss_per_token": 0.8661980032920837, "incorrect_loss_per_token": 0.5857257843017578, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8661980032920837, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": false, "logits_per_token": -0.8661980032920837, "logits_per_char": -0.21654950082302094, "num_chars": 4}, {"sum_logits": -0.5857257843017578, "num_tokens": 1, "num_tokens_all": 999, "is_greedy": true, "logits_per_token": -0.5857257843017578, "logits_per_char": -0.19524192810058594, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 931, "native_id": 694, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.42468926310539246, "incorrect_loss_raw": 1.1716810464859009, "correct_loss_per_char": 0.10617231577634811, "incorrect_loss_per_char": 0.3905603488286336, "correct_loss_per_token": 0.42468926310539246, "incorrect_loss_per_token": 1.1716810464859009, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.42468926310539246, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": true, "logits_per_token": -0.42468926310539246, "logits_per_char": -0.10617231577634811, "num_chars": 4}, {"sum_logits": -1.1716810464859009, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": false, "logits_per_token": -1.1716810464859009, "logits_per_char": -0.3905603488286336, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 932, "native_id": 3183, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8589590787887573, "incorrect_loss_raw": 0.6352590918540955, "correct_loss_per_char": 0.21473976969718933, "incorrect_loss_per_char": 0.21175303061803183, "correct_loss_per_token": 0.8589590787887573, "incorrect_loss_per_token": 0.6352590918540955, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8589590787887573, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": false, "logits_per_token": -0.8589590787887573, "logits_per_char": -0.21473976969718933, "num_chars": 4}, {"sum_logits": -0.6352590918540955, "num_tokens": 1, "num_tokens_all": 917, "is_greedy": true, "logits_per_token": -0.6352590918540955, "logits_per_char": -0.21175303061803183, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 933, "native_id": 2327, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4997207522392273, "incorrect_loss_raw": 1.0163629055023193, "correct_loss_per_char": 0.16657358407974243, "incorrect_loss_per_char": 0.25409072637557983, "correct_loss_per_token": 0.4997207522392273, "incorrect_loss_per_token": 1.0163629055023193, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0163629055023193, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": false, "logits_per_token": -1.0163629055023193, "logits_per_char": -0.25409072637557983, "num_chars": 4}, {"sum_logits": -0.4997207522392273, "num_tokens": 1, "num_tokens_all": 881, "is_greedy": true, "logits_per_token": -0.4997207522392273, "logits_per_char": -0.16657358407974243, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 934, "native_id": 1470, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8779122829437256, "incorrect_loss_raw": 0.6112648844718933, "correct_loss_per_char": 0.2926374276479085, "incorrect_loss_per_char": 0.15281622111797333, "correct_loss_per_token": 0.8779122829437256, "incorrect_loss_per_token": 0.6112648844718933, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6112648844718933, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": true, "logits_per_token": -0.6112648844718933, "logits_per_char": -0.15281622111797333, "num_chars": 4}, {"sum_logits": -0.8779122829437256, "num_tokens": 1, "num_tokens_all": 934, "is_greedy": false, "logits_per_token": -0.8779122829437256, "logits_per_char": -0.2926374276479085, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 935, "native_id": 822, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5978901982307434, "incorrect_loss_raw": 0.8460478782653809, "correct_loss_per_char": 0.19929673274358115, "incorrect_loss_per_char": 0.21151196956634521, "correct_loss_per_token": 0.5978901982307434, "incorrect_loss_per_token": 0.8460478782653809, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8460478782653809, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": false, "logits_per_token": -0.8460478782653809, "logits_per_char": -0.21151196956634521, "num_chars": 4}, {"sum_logits": -0.5978901982307434, "num_tokens": 1, "num_tokens_all": 969, "is_greedy": true, "logits_per_token": -0.5978901982307434, "logits_per_char": -0.19929673274358115, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 936, "native_id": 3095, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.26410529017448425, "incorrect_loss_raw": 1.5568393468856812, "correct_loss_per_char": 0.08803509672482808, "incorrect_loss_per_char": 0.3892098367214203, "correct_loss_per_token": 0.26410529017448425, "incorrect_loss_per_token": 1.5568393468856812, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5568393468856812, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": false, "logits_per_token": -1.5568393468856812, "logits_per_char": -0.3892098367214203, "num_chars": 4}, {"sum_logits": -0.26410529017448425, "num_tokens": 1, "num_tokens_all": 918, "is_greedy": true, "logits_per_token": -0.26410529017448425, "logits_per_char": -0.08803509672482808, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 937, "native_id": 3243, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.829628586769104, "incorrect_loss_raw": 0.6426610946655273, "correct_loss_per_char": 0.207407146692276, "incorrect_loss_per_char": 0.21422036488850912, "correct_loss_per_token": 0.829628586769104, "incorrect_loss_per_token": 0.6426610946655273, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.829628586769104, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": false, "logits_per_token": -0.829628586769104, "logits_per_char": -0.207407146692276, "num_chars": 4}, {"sum_logits": -0.6426610946655273, "num_tokens": 1, "num_tokens_all": 990, "is_greedy": true, "logits_per_token": -0.6426610946655273, "logits_per_char": -0.21422036488850912, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 938, "native_id": 254, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7862908244132996, "incorrect_loss_raw": 0.6649875044822693, "correct_loss_per_char": 0.1965727061033249, "incorrect_loss_per_char": 0.22166250149408975, "correct_loss_per_token": 0.7862908244132996, "incorrect_loss_per_token": 0.6649875044822693, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7862908244132996, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": false, "logits_per_token": -0.7862908244132996, "logits_per_char": -0.1965727061033249, "num_chars": 4}, {"sum_logits": -0.6649875044822693, "num_tokens": 1, "num_tokens_all": 987, "is_greedy": true, "logits_per_token": -0.6649875044822693, "logits_per_char": -0.22166250149408975, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 939, "native_id": 1544, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8064147233963013, "incorrect_loss_raw": 0.7599949240684509, "correct_loss_per_char": 0.20160368084907532, "incorrect_loss_per_char": 0.2533316413561503, "correct_loss_per_token": 0.8064147233963013, "incorrect_loss_per_token": 0.7599949240684509, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8064147233963013, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": false, "logits_per_token": -0.8064147233963013, "logits_per_char": -0.20160368084907532, "num_chars": 4}, {"sum_logits": -0.7599949240684509, "num_tokens": 1, "num_tokens_all": 1091, "is_greedy": true, "logits_per_token": -0.7599949240684509, "logits_per_char": -0.2533316413561503, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 940, "native_id": 2997, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1956216096878052, "incorrect_loss_raw": 0.4041808843612671, "correct_loss_per_char": 0.2989054024219513, "incorrect_loss_per_char": 0.1347269614537557, "correct_loss_per_token": 1.1956216096878052, "incorrect_loss_per_token": 0.4041808843612671, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1956216096878052, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": false, "logits_per_token": -1.1956216096878052, "logits_per_char": -0.2989054024219513, "num_chars": 4}, {"sum_logits": -0.4041808843612671, "num_tokens": 1, "num_tokens_all": 888, "is_greedy": true, "logits_per_token": -0.4041808843612671, "logits_per_char": -0.1347269614537557, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 941, "native_id": 2337, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4662168025970459, "incorrect_loss_raw": 1.1464850902557373, "correct_loss_per_char": 0.11655420064926147, "incorrect_loss_per_char": 0.3821616967519124, "correct_loss_per_token": 0.4662168025970459, "incorrect_loss_per_token": 1.1464850902557373, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4662168025970459, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": true, "logits_per_token": -0.4662168025970459, "logits_per_char": -0.11655420064926147, "num_chars": 4}, {"sum_logits": -1.1464850902557373, "num_tokens": 1, "num_tokens_all": 942, "is_greedy": false, "logits_per_token": -1.1464850902557373, "logits_per_char": -0.3821616967519124, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 942, "native_id": 543, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.47038042545318604, "incorrect_loss_raw": 1.0311845541000366, "correct_loss_per_char": 0.156793475151062, "incorrect_loss_per_char": 0.25779613852500916, "correct_loss_per_token": 0.47038042545318604, "incorrect_loss_per_token": 1.0311845541000366, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0311845541000366, "num_tokens": 1, "num_tokens_all": 839, "is_greedy": false, "logits_per_token": -1.0311845541000366, "logits_per_char": -0.25779613852500916, "num_chars": 4}, {"sum_logits": -0.47038042545318604, "num_tokens": 1, "num_tokens_all": 839, "is_greedy": true, "logits_per_token": -0.47038042545318604, "logits_per_char": -0.156793475151062, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 943, "native_id": 970, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.887488603591919, "incorrect_loss_raw": 0.5668916702270508, "correct_loss_per_char": 0.22187215089797974, "incorrect_loss_per_char": 0.1889638900756836, "correct_loss_per_token": 0.887488603591919, "incorrect_loss_per_token": 0.5668916702270508, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.887488603591919, "num_tokens": 1, "num_tokens_all": 1227, "is_greedy": false, "logits_per_token": -0.887488603591919, "logits_per_char": -0.22187215089797974, "num_chars": 4}, {"sum_logits": -0.5668916702270508, "num_tokens": 1, "num_tokens_all": 1227, "is_greedy": true, "logits_per_token": -0.5668916702270508, "logits_per_char": -0.1889638900756836, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 944, "native_id": 1538, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4872362017631531, "incorrect_loss_raw": 1.0817840099334717, "correct_loss_per_char": 0.12180905044078827, "incorrect_loss_per_char": 0.3605946699778239, "correct_loss_per_token": 0.4872362017631531, "incorrect_loss_per_token": 1.0817840099334717, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4872362017631531, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": true, "logits_per_token": -0.4872362017631531, "logits_per_char": -0.12180905044078827, "num_chars": 4}, {"sum_logits": -1.0817840099334717, "num_tokens": 1, "num_tokens_all": 936, "is_greedy": false, "logits_per_token": -1.0817840099334717, "logits_per_char": -0.3605946699778239, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 945, "native_id": 3051, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.23201937973499298, "incorrect_loss_raw": 1.7581210136413574, "correct_loss_per_char": 0.07733979324499766, "incorrect_loss_per_char": 0.43953025341033936, "correct_loss_per_token": 0.23201937973499298, "incorrect_loss_per_token": 1.7581210136413574, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7581210136413574, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.7581210136413574, "logits_per_char": -0.43953025341033936, "num_chars": 4}, {"sum_logits": -0.23201937973499298, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.23201937973499298, "logits_per_char": -0.07733979324499766, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 946, "native_id": 2948, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2441046237945557, "incorrect_loss_raw": 0.36683371663093567, "correct_loss_per_char": 0.3110261559486389, "incorrect_loss_per_char": 0.12227790554364522, "correct_loss_per_token": 1.2441046237945557, "incorrect_loss_per_token": 0.36683371663093567, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2441046237945557, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": false, "logits_per_token": -1.2441046237945557, "logits_per_char": -0.3110261559486389, "num_chars": 4}, {"sum_logits": -0.36683371663093567, "num_tokens": 1, "num_tokens_all": 895, "is_greedy": true, "logits_per_token": -0.36683371663093567, "logits_per_char": -0.12227790554364522, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 947, "native_id": 1683, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5143203735351562, "incorrect_loss_raw": 0.9899283647537231, "correct_loss_per_char": 0.17144012451171875, "incorrect_loss_per_char": 0.2474820911884308, "correct_loss_per_token": 0.5143203735351562, "incorrect_loss_per_token": 0.9899283647537231, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9899283647537231, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": false, "logits_per_token": -0.9899283647537231, "logits_per_char": -0.2474820911884308, "num_chars": 4}, {"sum_logits": -0.5143203735351562, "num_tokens": 1, "num_tokens_all": 921, "is_greedy": true, "logits_per_token": -0.5143203735351562, "logits_per_char": -0.17144012451171875, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 948, "native_id": 1040, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8687623143196106, "incorrect_loss_raw": 0.6218448281288147, "correct_loss_per_char": 0.28958743810653687, "incorrect_loss_per_char": 0.15546120703220367, "correct_loss_per_token": 0.8687623143196106, "incorrect_loss_per_token": 0.6218448281288147, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6218448281288147, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": true, "logits_per_token": -0.6218448281288147, "logits_per_char": -0.15546120703220367, "num_chars": 4}, {"sum_logits": -0.8687623143196106, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": false, "logits_per_token": -0.8687623143196106, "logits_per_char": -0.28958743810653687, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 949, "native_id": 914, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7865099310874939, "incorrect_loss_raw": 0.6476924419403076, "correct_loss_per_char": 0.19662748277187347, "incorrect_loss_per_char": 0.2158974806467692, "correct_loss_per_token": 0.7865099310874939, "incorrect_loss_per_token": 0.6476924419403076, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7865099310874939, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -0.7865099310874939, "logits_per_char": -0.19662748277187347, "num_chars": 4}, {"sum_logits": -0.6476924419403076, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.6476924419403076, "logits_per_char": -0.2158974806467692, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 950, "native_id": 2897, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4366755485534668, "incorrect_loss_raw": 1.2392544746398926, "correct_loss_per_char": 0.1091688871383667, "incorrect_loss_per_char": 0.4130848248799642, "correct_loss_per_token": 0.4366755485534668, "incorrect_loss_per_token": 1.2392544746398926, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4366755485534668, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": true, "logits_per_token": -0.4366755485534668, "logits_per_char": -0.1091688871383667, "num_chars": 4}, {"sum_logits": -1.2392544746398926, "num_tokens": 1, "num_tokens_all": 963, "is_greedy": false, "logits_per_token": -1.2392544746398926, "logits_per_char": -0.4130848248799642, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 951, "native_id": 2274, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0597997903823853, "incorrect_loss_raw": 0.4960218667984009, "correct_loss_per_char": 0.2649499475955963, "incorrect_loss_per_char": 0.16534062226613364, "correct_loss_per_token": 1.0597997903823853, "incorrect_loss_per_token": 0.4960218667984009, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0597997903823853, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": false, "logits_per_token": -1.0597997903823853, "logits_per_char": -0.2649499475955963, "num_chars": 4}, {"sum_logits": -0.4960218667984009, "num_tokens": 1, "num_tokens_all": 902, "is_greedy": true, "logits_per_token": -0.4960218667984009, "logits_per_char": -0.16534062226613364, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 952, "native_id": 1810, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6984332203865051, "incorrect_loss_raw": 0.7531092762947083, "correct_loss_per_char": 0.17460830509662628, "incorrect_loss_per_char": 0.2510364254315694, "correct_loss_per_token": 0.6984332203865051, "incorrect_loss_per_token": 0.7531092762947083, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.6984332203865051, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": true, "logits_per_token": -0.6984332203865051, "logits_per_char": -0.17460830509662628, "num_chars": 4}, {"sum_logits": -0.7531092762947083, "num_tokens": 1, "num_tokens_all": 892, "is_greedy": false, "logits_per_token": -0.7531092762947083, "logits_per_char": -0.2510364254315694, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 953, "native_id": 1285, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8631171584129333, "incorrect_loss_raw": 0.633868932723999, "correct_loss_per_char": 0.2877057194709778, "incorrect_loss_per_char": 0.15846723318099976, "correct_loss_per_token": 0.8631171584129333, "incorrect_loss_per_token": 0.633868932723999, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.633868932723999, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -0.633868932723999, "logits_per_char": -0.15846723318099976, "num_chars": 4}, {"sum_logits": -0.8631171584129333, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -0.8631171584129333, "logits_per_char": -0.2877057194709778, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 954, "native_id": 3151, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8934148550033569, "incorrect_loss_raw": 0.5552643537521362, "correct_loss_per_char": 0.22335371375083923, "incorrect_loss_per_char": 0.18508811791737875, "correct_loss_per_token": 0.8934148550033569, "incorrect_loss_per_token": 0.5552643537521362, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8934148550033569, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": false, "logits_per_token": -0.8934148550033569, "logits_per_char": -0.22335371375083923, "num_chars": 4}, {"sum_logits": -0.5552643537521362, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": true, "logits_per_token": -0.5552643537521362, "logits_per_char": -0.18508811791737875, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 955, "native_id": 2402, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5288776755332947, "incorrect_loss_raw": 1.0089852809906006, "correct_loss_per_char": 0.17629255851109824, "incorrect_loss_per_char": 0.25224632024765015, "correct_loss_per_token": 0.5288776755332947, "incorrect_loss_per_token": 1.0089852809906006, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0089852809906006, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": false, "logits_per_token": -1.0089852809906006, "logits_per_char": -0.25224632024765015, "num_chars": 4}, {"sum_logits": -0.5288776755332947, "num_tokens": 1, "num_tokens_all": 884, "is_greedy": true, "logits_per_token": -0.5288776755332947, "logits_per_char": -0.17629255851109824, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 956, "native_id": 2954, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7593490481376648, "incorrect_loss_raw": 0.6782634854316711, "correct_loss_per_char": 0.1898372620344162, "incorrect_loss_per_char": 0.22608782847722372, "correct_loss_per_token": 0.7593490481376648, "incorrect_loss_per_token": 0.6782634854316711, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7593490481376648, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": false, "logits_per_token": -0.7593490481376648, "logits_per_char": -0.1898372620344162, "num_chars": 4}, {"sum_logits": -0.6782634854316711, "num_tokens": 1, "num_tokens_all": 906, "is_greedy": true, "logits_per_token": -0.6782634854316711, "logits_per_char": -0.22608782847722372, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 957, "native_id": 1027, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.644953191280365, "incorrect_loss_raw": 0.8215095400810242, "correct_loss_per_char": 0.214984397093455, "incorrect_loss_per_char": 0.20537738502025604, "correct_loss_per_token": 0.644953191280365, "incorrect_loss_per_token": 0.8215095400810242, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8215095400810242, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": false, "logits_per_token": -0.8215095400810242, "logits_per_char": -0.20537738502025604, "num_chars": 4}, {"sum_logits": -0.644953191280365, "num_tokens": 1, "num_tokens_all": 945, "is_greedy": true, "logits_per_token": -0.644953191280365, "logits_per_char": -0.214984397093455, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 958, "native_id": 2804, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.278555154800415, "incorrect_loss_raw": 0.44580137729644775, "correct_loss_per_char": 0.31963878870010376, "incorrect_loss_per_char": 0.14860045909881592, "correct_loss_per_token": 1.278555154800415, "incorrect_loss_per_token": 0.44580137729644775, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.278555154800415, "num_tokens": 1, "num_tokens_all": 834, "is_greedy": false, "logits_per_token": -1.278555154800415, "logits_per_char": -0.31963878870010376, "num_chars": 4}, {"sum_logits": -0.44580137729644775, "num_tokens": 1, "num_tokens_all": 834, "is_greedy": true, "logits_per_token": -0.44580137729644775, "logits_per_char": -0.14860045909881592, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 959, "native_id": 2674, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7821872234344482, "incorrect_loss_raw": 0.20607703924179077, "correct_loss_per_char": 0.44554680585861206, "incorrect_loss_per_char": 0.06869234641393025, "correct_loss_per_token": 1.7821872234344482, "incorrect_loss_per_token": 0.20607703924179077, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7821872234344482, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": false, "logits_per_token": -1.7821872234344482, "logits_per_char": -0.44554680585861206, "num_chars": 4}, {"sum_logits": -0.20607703924179077, "num_tokens": 1, "num_tokens_all": 983, "is_greedy": true, "logits_per_token": -0.20607703924179077, "logits_per_char": -0.06869234641393025, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 960, "native_id": 1841, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4857276678085327, "incorrect_loss_raw": 1.0300763845443726, "correct_loss_per_char": 0.16190922260284424, "incorrect_loss_per_char": 0.25751909613609314, "correct_loss_per_token": 0.4857276678085327, "incorrect_loss_per_token": 1.0300763845443726, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0300763845443726, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": false, "logits_per_token": -1.0300763845443726, "logits_per_char": -0.25751909613609314, "num_chars": 4}, {"sum_logits": -0.4857276678085327, "num_tokens": 1, "num_tokens_all": 894, "is_greedy": true, "logits_per_token": -0.4857276678085327, "logits_per_char": -0.16190922260284424, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 961, "native_id": 2728, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9105150699615479, "incorrect_loss_raw": 0.5455635786056519, "correct_loss_per_char": 0.22762876749038696, "incorrect_loss_per_char": 0.18185452620188394, "correct_loss_per_token": 0.9105150699615479, "incorrect_loss_per_token": 0.5455635786056519, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9105150699615479, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": false, "logits_per_token": -0.9105150699615479, "logits_per_char": -0.22762876749038696, "num_chars": 4}, {"sum_logits": -0.5455635786056519, "num_tokens": 1, "num_tokens_all": 912, "is_greedy": true, "logits_per_token": -0.5455635786056519, "logits_per_char": -0.18185452620188394, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 962, "native_id": 3038, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6075268983840942, "incorrect_loss_raw": 0.8390117883682251, "correct_loss_per_char": 0.2025089661280314, "incorrect_loss_per_char": 0.20975294709205627, "correct_loss_per_token": 0.6075268983840942, "incorrect_loss_per_token": 0.8390117883682251, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8390117883682251, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": false, "logits_per_token": -0.8390117883682251, "logits_per_char": -0.20975294709205627, "num_chars": 4}, {"sum_logits": -0.6075268983840942, "num_tokens": 1, "num_tokens_all": 916, "is_greedy": true, "logits_per_token": -0.6075268983840942, "logits_per_char": -0.2025089661280314, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 963, "native_id": 2475, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5886611342430115, "incorrect_loss_raw": 0.9040527939796448, "correct_loss_per_char": 0.14716528356075287, "incorrect_loss_per_char": 0.3013509313265483, "correct_loss_per_token": 0.5886611342430115, "incorrect_loss_per_token": 0.9040527939796448, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5886611342430115, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": true, "logits_per_token": -0.5886611342430115, "logits_per_char": -0.14716528356075287, "num_chars": 4}, {"sum_logits": -0.9040527939796448, "num_tokens": 1, "num_tokens_all": 899, "is_greedy": false, "logits_per_token": -0.9040527939796448, "logits_per_char": -0.3013509313265483, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 964, "native_id": 372, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2367409467697144, "incorrect_loss_raw": 0.41186022758483887, "correct_loss_per_char": 0.3091852366924286, "incorrect_loss_per_char": 0.13728674252827963, "correct_loss_per_token": 1.2367409467697144, "incorrect_loss_per_token": 0.41186022758483887, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2367409467697144, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": false, "logits_per_token": -1.2367409467697144, "logits_per_char": -0.3091852366924286, "num_chars": 4}, {"sum_logits": -0.41186022758483887, "num_tokens": 1, "num_tokens_all": 896, "is_greedy": true, "logits_per_token": -0.41186022758483887, "logits_per_char": -0.13728674252827963, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 965, "native_id": 2902, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7853178977966309, "incorrect_loss_raw": 0.6801198720932007, "correct_loss_per_char": 0.19632947444915771, "incorrect_loss_per_char": 0.2267066240310669, "correct_loss_per_token": 0.7853178977966309, "incorrect_loss_per_token": 0.6801198720932007, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7853178977966309, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": false, "logits_per_token": -0.7853178977966309, "logits_per_char": -0.19632947444915771, "num_chars": 4}, {"sum_logits": -0.6801198720932007, "num_tokens": 1, "num_tokens_all": 872, "is_greedy": true, "logits_per_token": -0.6801198720932007, "logits_per_char": -0.2267066240310669, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 966, "native_id": 2141, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.36700451374053955, "incorrect_loss_raw": 1.236238956451416, "correct_loss_per_char": 0.12233483791351318, "incorrect_loss_per_char": 0.309059739112854, "correct_loss_per_token": 0.36700451374053955, "incorrect_loss_per_token": 1.236238956451416, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.236238956451416, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": false, "logits_per_token": -1.236238956451416, "logits_per_char": -0.309059739112854, "num_chars": 4}, {"sum_logits": -0.36700451374053955, "num_tokens": 1, "num_tokens_all": 966, "is_greedy": true, "logits_per_token": -0.36700451374053955, "logits_per_char": -0.12233483791351318, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 967, "native_id": 2524, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8445529341697693, "incorrect_loss_raw": 0.6157605051994324, "correct_loss_per_char": 0.21113823354244232, "incorrect_loss_per_char": 0.20525350173314413, "correct_loss_per_token": 0.8445529341697693, "incorrect_loss_per_token": 0.6157605051994324, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8445529341697693, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": false, "logits_per_token": -0.8445529341697693, "logits_per_char": -0.21113823354244232, "num_chars": 4}, {"sum_logits": -0.6157605051994324, "num_tokens": 1, "num_tokens_all": 940, "is_greedy": true, "logits_per_token": -0.6157605051994324, "logits_per_char": -0.20525350173314413, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 968, "native_id": 2008, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1764178276062012, "incorrect_loss_raw": 0.41179752349853516, "correct_loss_per_char": 0.3921392758687337, "incorrect_loss_per_char": 0.10294938087463379, "correct_loss_per_token": 1.1764178276062012, "incorrect_loss_per_token": 0.41179752349853516, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.41179752349853516, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": true, "logits_per_token": -0.41179752349853516, "logits_per_char": -0.10294938087463379, "num_chars": 4}, {"sum_logits": -1.1764178276062012, "num_tokens": 1, "num_tokens_all": 905, "is_greedy": false, "logits_per_token": -1.1764178276062012, "logits_per_char": -0.3921392758687337, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 969, "native_id": 3122, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2741955518722534, "incorrect_loss_raw": 1.5751683712005615, "correct_loss_per_char": 0.09139851729075114, "incorrect_loss_per_char": 0.3937920928001404, "correct_loss_per_token": 0.2741955518722534, "incorrect_loss_per_token": 1.5751683712005615, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5751683712005615, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": false, "logits_per_token": -1.5751683712005615, "logits_per_char": -0.3937920928001404, "num_chars": 4}, {"sum_logits": -0.2741955518722534, "num_tokens": 1, "num_tokens_all": 1001, "is_greedy": true, "logits_per_token": -0.2741955518722534, "logits_per_char": -0.09139851729075114, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 970, "native_id": 237, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7276507616043091, "incorrect_loss_raw": 0.7384495139122009, "correct_loss_per_char": 0.18191269040107727, "incorrect_loss_per_char": 0.24614983797073364, "correct_loss_per_token": 0.7276507616043091, "incorrect_loss_per_token": 0.7384495139122009, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7276507616043091, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": true, "logits_per_token": -0.7276507616043091, "logits_per_char": -0.18191269040107727, "num_chars": 4}, {"sum_logits": -0.7384495139122009, "num_tokens": 1, "num_tokens_all": 953, "is_greedy": false, "logits_per_token": -0.7384495139122009, "logits_per_char": -0.24614983797073364, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 971, "native_id": 1232, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4921380281448364, "incorrect_loss_raw": 1.0143203735351562, "correct_loss_per_char": 0.1230345070362091, "incorrect_loss_per_char": 0.33810679117838544, "correct_loss_per_token": 0.4921380281448364, "incorrect_loss_per_token": 1.0143203735351562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.4921380281448364, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": true, "logits_per_token": -0.4921380281448364, "logits_per_char": -0.1230345070362091, "num_chars": 4}, {"sum_logits": -1.0143203735351562, "num_tokens": 1, "num_tokens_all": 943, "is_greedy": false, "logits_per_token": -1.0143203735351562, "logits_per_char": -0.33810679117838544, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 972, "native_id": 867, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9338279962539673, "incorrect_loss_raw": 0.5412979125976562, "correct_loss_per_char": 0.31127599875132245, "incorrect_loss_per_char": 0.13532447814941406, "correct_loss_per_token": 0.9338279962539673, "incorrect_loss_per_token": 0.5412979125976562, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5412979125976562, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": true, "logits_per_token": -0.5412979125976562, "logits_per_char": -0.13532447814941406, "num_chars": 4}, {"sum_logits": -0.9338279962539673, "num_tokens": 1, "num_tokens_all": 960, "is_greedy": false, "logits_per_token": -0.9338279962539673, "logits_per_char": -0.31127599875132245, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 973, "native_id": 1552, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0700337886810303, "incorrect_loss_raw": 0.4538779854774475, "correct_loss_per_char": 0.26750844717025757, "incorrect_loss_per_char": 0.15129266182581583, "correct_loss_per_token": 1.0700337886810303, "incorrect_loss_per_token": 0.4538779854774475, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0700337886810303, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": false, "logits_per_token": -1.0700337886810303, "logits_per_char": -0.26750844717025757, "num_chars": 4}, {"sum_logits": -0.4538779854774475, "num_tokens": 1, "num_tokens_all": 991, "is_greedy": true, "logits_per_token": -0.4538779854774475, "logits_per_char": -0.15129266182581583, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 974, "native_id": 2336, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.37604647874832153, "incorrect_loss_raw": 1.2929019927978516, "correct_loss_per_char": 0.12534882624944052, "incorrect_loss_per_char": 0.3232254981994629, "correct_loss_per_token": 0.37604647874832153, "incorrect_loss_per_token": 1.2929019927978516, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2929019927978516, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": false, "logits_per_token": -1.2929019927978516, "logits_per_char": -0.3232254981994629, "num_chars": 4}, {"sum_logits": -0.37604647874832153, "num_tokens": 1, "num_tokens_all": 890, "is_greedy": true, "logits_per_token": -0.37604647874832153, "logits_per_char": -0.12534882624944052, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 975, "native_id": 1684, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9419234991073608, "incorrect_loss_raw": 0.5375062227249146, "correct_loss_per_char": 0.3139744997024536, "incorrect_loss_per_char": 0.13437655568122864, "correct_loss_per_token": 0.9419234991073608, "incorrect_loss_per_token": 0.5375062227249146, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5375062227249146, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": true, "logits_per_token": -0.5375062227249146, "logits_per_char": -0.13437655568122864, "num_chars": 4}, {"sum_logits": -0.9419234991073608, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": false, "logits_per_token": -0.9419234991073608, "logits_per_char": -0.3139744997024536, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 976, "native_id": 291, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.3042655289173126, "incorrect_loss_raw": 1.7980858087539673, "correct_loss_per_char": 0.10142184297243755, "incorrect_loss_per_char": 0.4495214521884918, "correct_loss_per_token": 0.3042655289173126, "incorrect_loss_per_token": 1.7980858087539673, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7980858087539673, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": false, "logits_per_token": -1.7980858087539673, "logits_per_char": -0.4495214521884918, "num_chars": 4}, {"sum_logits": -0.3042655289173126, "num_tokens": 1, "num_tokens_all": 878, "is_greedy": true, "logits_per_token": -0.3042655289173126, "logits_per_char": -0.10142184297243755, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 977, "native_id": 775, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5706617832183838, "incorrect_loss_raw": 0.9276736974716187, "correct_loss_per_char": 0.19022059440612793, "incorrect_loss_per_char": 0.23191842436790466, "correct_loss_per_token": 0.5706617832183838, "incorrect_loss_per_token": 0.9276736974716187, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9276736974716187, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": false, "logits_per_token": -0.9276736974716187, "logits_per_char": -0.23191842436790466, "num_chars": 4}, {"sum_logits": -0.5706617832183838, "num_tokens": 1, "num_tokens_all": 931, "is_greedy": true, "logits_per_token": -0.5706617832183838, "logits_per_char": -0.19022059440612793, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 978, "native_id": 625, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9898271560668945, "incorrect_loss_raw": 0.49548399448394775, "correct_loss_per_char": 0.24745678901672363, "incorrect_loss_per_char": 0.16516133149464926, "correct_loss_per_token": 0.9898271560668945, "incorrect_loss_per_token": 0.49548399448394775, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9898271560668945, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": false, "logits_per_token": -0.9898271560668945, "logits_per_char": -0.24745678901672363, "num_chars": 4}, {"sum_logits": -0.49548399448394775, "num_tokens": 1, "num_tokens_all": 1010, "is_greedy": true, "logits_per_token": -0.49548399448394775, "logits_per_char": -0.16516133149464926, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 979, "native_id": 2979, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0138825178146362, "incorrect_loss_raw": 0.5120047330856323, "correct_loss_per_char": 0.25347062945365906, "incorrect_loss_per_char": 0.17066824436187744, "correct_loss_per_token": 1.0138825178146362, "incorrect_loss_per_token": 0.5120047330856323, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0138825178146362, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": false, "logits_per_token": -1.0138825178146362, "logits_per_char": -0.25347062945365906, "num_chars": 4}, {"sum_logits": -0.5120047330856323, "num_tokens": 1, "num_tokens_all": 913, "is_greedy": true, "logits_per_token": -0.5120047330856323, "logits_per_char": -0.17066824436187744, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 980, "native_id": 2782, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.24875257909297943, "incorrect_loss_raw": 1.6157796382904053, "correct_loss_per_char": 0.08291752636432648, "incorrect_loss_per_char": 0.4039449095726013, "correct_loss_per_token": 0.24875257909297943, "incorrect_loss_per_token": 1.6157796382904053, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6157796382904053, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": false, "logits_per_token": -1.6157796382904053, "logits_per_char": -0.4039449095726013, "num_chars": 4}, {"sum_logits": -0.24875257909297943, "num_tokens": 1, "num_tokens_all": 937, "is_greedy": true, "logits_per_token": -0.24875257909297943, "logits_per_char": -0.08291752636432648, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 981, "native_id": 1193, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5483190417289734, "incorrect_loss_raw": 0.9686980247497559, "correct_loss_per_char": 0.13707976043224335, "incorrect_loss_per_char": 0.32289934158325195, "correct_loss_per_token": 0.5483190417289734, "incorrect_loss_per_token": 0.9686980247497559, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.5483190417289734, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": true, "logits_per_token": -0.5483190417289734, "logits_per_char": -0.13707976043224335, "num_chars": 4}, {"sum_logits": -0.9686980247497559, "num_tokens": 1, "num_tokens_all": 929, "is_greedy": false, "logits_per_token": -0.9686980247497559, "logits_per_char": -0.32289934158325195, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 982, "native_id": 740, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.6056745648384094, "incorrect_loss_raw": 0.976183295249939, "correct_loss_per_char": 0.20189152161280313, "incorrect_loss_per_char": 0.24404582381248474, "correct_loss_per_token": 0.6056745648384094, "incorrect_loss_per_token": 0.976183295249939, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.976183295249939, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": false, "logits_per_token": -0.976183295249939, "logits_per_char": -0.24404582381248474, "num_chars": 4}, {"sum_logits": -0.6056745648384094, "num_tokens": 1, "num_tokens_all": 1086, "is_greedy": true, "logits_per_token": -0.6056745648384094, "logits_per_char": -0.20189152161280313, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 983, "native_id": 2206, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9820523262023926, "incorrect_loss_raw": 0.5120149254798889, "correct_loss_per_char": 0.24551308155059814, "incorrect_loss_per_char": 0.17067164182662964, "correct_loss_per_token": 0.9820523262023926, "incorrect_loss_per_token": 0.5120149254798889, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9820523262023926, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": false, "logits_per_token": -0.9820523262023926, "logits_per_char": -0.24551308155059814, "num_chars": 4}, {"sum_logits": -0.5120149254798889, "num_tokens": 1, "num_tokens_all": 952, "is_greedy": true, "logits_per_token": -0.5120149254798889, "logits_per_char": -0.17067164182662964, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 984, "native_id": 1784, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.7529051303863525, "incorrect_loss_raw": 0.305902898311615, "correct_loss_per_char": 0.43822628259658813, "incorrect_loss_per_char": 0.10196763277053833, "correct_loss_per_token": 1.7529051303863525, "incorrect_loss_per_token": 0.305902898311615, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7529051303863525, "num_tokens": 1, "num_tokens_all": 843, "is_greedy": false, "logits_per_token": -1.7529051303863525, "logits_per_char": -0.43822628259658813, "num_chars": 4}, {"sum_logits": -0.305902898311615, "num_tokens": 1, "num_tokens_all": 843, "is_greedy": true, "logits_per_token": -0.305902898311615, "logits_per_char": -0.10196763277053833, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 985, "native_id": 1923, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4502415657043457, "incorrect_loss_raw": 0.30780160427093506, "correct_loss_per_char": 0.3625603914260864, "incorrect_loss_per_char": 0.10260053475697835, "correct_loss_per_token": 1.4502415657043457, "incorrect_loss_per_token": 0.30780160427093506, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4502415657043457, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": false, "logits_per_token": -1.4502415657043457, "logits_per_char": -0.3625603914260864, "num_chars": 4}, {"sum_logits": -0.30780160427093506, "num_tokens": 1, "num_tokens_all": 869, "is_greedy": true, "logits_per_token": -0.30780160427093506, "logits_per_char": -0.10260053475697835, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 986, "native_id": 2869, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.49846717715263367, "incorrect_loss_raw": 1.0660364627838135, "correct_loss_per_char": 0.16615572571754456, "incorrect_loss_per_char": 0.26650911569595337, "correct_loss_per_token": 0.49846717715263367, "incorrect_loss_per_token": 1.0660364627838135, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0660364627838135, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": false, "logits_per_token": -1.0660364627838135, "logits_per_char": -0.26650911569595337, "num_chars": 4}, {"sum_logits": -0.49846717715263367, "num_tokens": 1, "num_tokens_all": 870, "is_greedy": true, "logits_per_token": -0.49846717715263367, "logits_per_char": -0.16615572571754456, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 987, "native_id": 990, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.9749606847763062, "incorrect_loss_raw": 0.5546181201934814, "correct_loss_per_char": 0.24374017119407654, "incorrect_loss_per_char": 0.1848727067311605, "correct_loss_per_token": 0.9749606847763062, "incorrect_loss_per_token": 0.5546181201934814, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9749606847763062, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": false, "logits_per_token": -0.9749606847763062, "logits_per_char": -0.24374017119407654, "num_chars": 4}, {"sum_logits": -0.5546181201934814, "num_tokens": 1, "num_tokens_all": 914, "is_greedy": true, "logits_per_token": -0.5546181201934814, "logits_per_char": -0.1848727067311605, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 988, "native_id": 1955, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.4038757383823395, "incorrect_loss_raw": 1.1824599504470825, "correct_loss_per_char": 0.1346252461274465, "incorrect_loss_per_char": 0.29561498761177063, "correct_loss_per_token": 0.4038757383823395, "incorrect_loss_per_token": 1.1824599504470825, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1824599504470825, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": false, "logits_per_token": -1.1824599504470825, "logits_per_char": -0.29561498761177063, "num_chars": 4}, {"sum_logits": -0.4038757383823395, "num_tokens": 1, "num_tokens_all": 1027, "is_greedy": true, "logits_per_token": -0.4038757383823395, "logits_per_char": -0.1346252461274465, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 989, "native_id": 2437, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.7998232841491699, "incorrect_loss_raw": 0.6692807078361511, "correct_loss_per_char": 0.19995582103729248, "incorrect_loss_per_char": 0.22309356927871704, "correct_loss_per_token": 0.7998232841491699, "incorrect_loss_per_token": 0.6692807078361511, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.7998232841491699, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": false, "logits_per_token": -0.7998232841491699, "logits_per_char": -0.19995582103729248, "num_chars": 4}, {"sum_logits": -0.6692807078361511, "num_tokens": 1, "num_tokens_all": 903, "is_greedy": true, "logits_per_token": -0.6692807078361511, "logits_per_char": -0.22309356927871704, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 990, "native_id": 393, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.2529088258743286, "incorrect_loss_raw": 1.7415691614151, "correct_loss_per_char": 0.08430294195810954, "incorrect_loss_per_char": 0.435392290353775, "correct_loss_per_token": 0.2529088258743286, "incorrect_loss_per_token": 1.7415691614151, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.7415691614151, "num_tokens": 1, "num_tokens_all": 854, "is_greedy": false, "logits_per_token": -1.7415691614151, "logits_per_char": -0.435392290353775, "num_chars": 4}, {"sum_logits": -0.2529088258743286, "num_tokens": 1, "num_tokens_all": 854, "is_greedy": true, "logits_per_token": -0.2529088258743286, "logits_per_char": -0.08430294195810954, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 991, "native_id": 650, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.09376859664917, "incorrect_loss_raw": 0.5018212795257568, "correct_loss_per_char": 0.2734421491622925, "incorrect_loss_per_char": 0.16727375984191895, "correct_loss_per_token": 1.09376859664917, "incorrect_loss_per_token": 0.5018212795257568, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.09376859664917, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": false, "logits_per_token": -1.09376859664917, "logits_per_char": -0.2734421491622925, "num_chars": 4}, {"sum_logits": -0.5018212795257568, "num_tokens": 1, "num_tokens_all": 923, "is_greedy": true, "logits_per_token": -0.5018212795257568, "logits_per_char": -0.16727375984191895, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 992, "native_id": 3200, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.8166199326515198, "incorrect_loss_raw": 0.6651341319084167, "correct_loss_per_char": 0.20415498316287994, "incorrect_loss_per_char": 0.22171137730280557, "correct_loss_per_token": 0.8166199326515198, "incorrect_loss_per_token": 0.6651341319084167, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8166199326515198, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": false, "logits_per_token": -0.8166199326515198, "logits_per_char": -0.20415498316287994, "num_chars": 4}, {"sum_logits": -0.6651341319084167, "num_tokens": 1, "num_tokens_all": 928, "is_greedy": true, "logits_per_token": -0.6651341319084167, "logits_per_char": -0.22171137730280557, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 993, "native_id": 470, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.712085485458374, "incorrect_loss_raw": 0.7819900512695312, "correct_loss_per_char": 0.1780213713645935, "incorrect_loss_per_char": 0.26066335042317706, "correct_loss_per_token": 0.712085485458374, "incorrect_loss_per_token": 0.7819900512695312, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.712085485458374, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": true, "logits_per_token": -0.712085485458374, "logits_per_char": -0.1780213713645935, "num_chars": 4}, {"sum_logits": -0.7819900512695312, "num_tokens": 1, "num_tokens_all": 933, "is_greedy": false, "logits_per_token": -0.7819900512695312, "logits_per_char": -0.26066335042317706, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 994, "native_id": 399, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.0607950687408447, "incorrect_loss_raw": 0.4749823212623596, "correct_loss_per_char": 0.2651987671852112, "incorrect_loss_per_char": 0.15832744042078653, "correct_loss_per_token": 1.0607950687408447, "incorrect_loss_per_token": 0.4749823212623596, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0607950687408447, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": false, "logits_per_token": -1.0607950687408447, "logits_per_char": -0.2651987671852112, "num_chars": 4}, {"sum_logits": -0.4749823212623596, "num_tokens": 1, "num_tokens_all": 971, "is_greedy": true, "logits_per_token": -0.4749823212623596, "logits_per_char": -0.15832744042078653, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 995, "native_id": 600, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 0.8260307908058167, "incorrect_loss_raw": 0.6056718230247498, "correct_loss_per_char": 0.20650769770145416, "incorrect_loss_per_char": 0.2018906076749166, "correct_loss_per_token": 0.8260307908058167, "incorrect_loss_per_token": 0.6056718230247498, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.8260307908058167, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": false, "logits_per_token": -0.8260307908058167, "logits_per_char": -0.20650769770145416, "num_chars": 4}, {"sum_logits": -0.6056718230247498, "num_tokens": 1, "num_tokens_all": 1000, "is_greedy": true, "logits_per_token": -0.6056718230247498, "logits_per_char": -0.2018906076749166, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 996, "native_id": 531, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.345848798751831, "incorrect_loss_raw": 0.35413187742233276, "correct_loss_per_char": 0.44861626625061035, "incorrect_loss_per_char": 0.08853296935558319, "correct_loss_per_token": 1.345848798751831, "incorrect_loss_per_token": 0.35413187742233276, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.35413187742233276, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": true, "logits_per_token": -0.35413187742233276, "logits_per_char": -0.08853296935558319, "num_chars": 4}, {"sum_logits": -1.345848798751831, "num_tokens": 1, "num_tokens_all": 922, "is_greedy": false, "logits_per_token": -1.345848798751831, "logits_per_char": -0.44861626625061035, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 997, "native_id": 508, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.5571856498718262, "incorrect_loss_raw": 0.9003562927246094, "correct_loss_per_char": 0.1857285499572754, "incorrect_loss_per_char": 0.22508907318115234, "correct_loss_per_token": 0.5571856498718262, "incorrect_loss_per_token": 0.9003562927246094, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -0.9003562927246094, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": false, "logits_per_token": -0.9003562927246094, "logits_per_char": -0.22508907318115234, "num_chars": 4}, {"sum_logits": -0.5571856498718262, "num_tokens": 1, "num_tokens_all": 910, "is_greedy": true, "logits_per_token": -0.5571856498718262, "logits_per_char": -0.1857285499572754, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 998, "native_id": 1929, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.1978950500488281, "incorrect_loss_raw": 0.3994075059890747, "correct_loss_per_char": 0.29947376251220703, "incorrect_loss_per_char": 0.13313583532969156, "correct_loss_per_token": 1.1978950500488281, "incorrect_loss_per_token": 0.3994075059890747, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1978950500488281, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": false, "logits_per_token": -1.1978950500488281, "logits_per_char": -0.29947376251220703, "num_chars": 4}, {"sum_logits": -0.3994075059890747, "num_tokens": 1, "num_tokens_all": 1108, "is_greedy": true, "logits_per_token": -0.3994075059890747, "logits_per_char": -0.13313583532969156, "num_chars": 3}], "label": 0, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"} {"doc_id": 999, "native_id": 1517, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 0.33638128638267517, "incorrect_loss_raw": 1.368202567100525, "correct_loss_per_char": 0.11212709546089172, "incorrect_loss_per_char": 0.3420506417751312, "correct_loss_per_token": 0.33638128638267517, "incorrect_loss_per_token": 1.368202567100525, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.368202567100525, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": false, "logits_per_token": -1.368202567100525, "logits_per_char": -0.3420506417751312, "num_chars": 4}, {"sum_logits": -0.33638128638267517, "num_tokens": 1, "num_tokens_all": 927, "is_greedy": true, "logits_per_token": -0.33638128638267517, "logits_per_char": -0.11212709546089172, "num_chars": 3}], "label": 1, "task_hash": "116b9d7a3c43d4d92986e54a7cec0bd5", "model_hash": "03418cf8091a9882619950ffb07429a5"}