Text Generation
Transformers
Safetensors
openlm
LM-1b_1x-DCLMFasttext / evals /mmlu /task-006-mmlu_college_chemistry:mc-predictions.jsonl
princeton-nlp's picture
Upload folder using huggingface_hub
fc4aea2 verified
{"doc_id": 0, "native_id": 0, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4273712635040283, "incorrect_loss_raw": 1.3813818295796711, "correct_loss_per_char": 0.7136856317520142, "incorrect_loss_per_char": 0.6906909147898356, "correct_loss_per_token": 1.4273712635040283, "incorrect_loss_per_token": 1.3813818295796711, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3586517572402954, "num_tokens": 1, "num_tokens_all": 525, "is_greedy": false, "logits_per_token": -1.3586517572402954, "logits_per_char": -0.6793258786201477, "num_chars": 2}, {"sum_logits": -1.3118865489959717, "num_tokens": 1, "num_tokens_all": 525, "is_greedy": true, "logits_per_token": -1.3118865489959717, "logits_per_char": -0.6559432744979858, "num_chars": 2}, {"sum_logits": -1.4736071825027466, "num_tokens": 1, "num_tokens_all": 525, "is_greedy": false, "logits_per_token": -1.4736071825027466, "logits_per_char": -0.7368035912513733, "num_chars": 2}, {"sum_logits": -1.4273712635040283, "num_tokens": 1, "num_tokens_all": 525, "is_greedy": false, "logits_per_token": -1.4273712635040283, "logits_per_char": -0.7136856317520142, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 1, "native_id": 1, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.437366008758545, "incorrect_loss_raw": 1.3791935841242473, "correct_loss_per_char": 0.7186830043792725, "incorrect_loss_per_char": 0.6895967920621237, "correct_loss_per_token": 1.437366008758545, "incorrect_loss_per_token": 1.3791935841242473, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3459089994430542, "num_tokens": 1, "num_tokens_all": 533, "is_greedy": false, "logits_per_token": -1.3459089994430542, "logits_per_char": -0.6729544997215271, "num_chars": 2}, {"sum_logits": -1.2993086576461792, "num_tokens": 1, "num_tokens_all": 533, "is_greedy": true, "logits_per_token": -1.2993086576461792, "logits_per_char": -0.6496543288230896, "num_chars": 2}, {"sum_logits": -1.437366008758545, "num_tokens": 1, "num_tokens_all": 533, "is_greedy": false, "logits_per_token": -1.437366008758545, "logits_per_char": -0.7186830043792725, "num_chars": 2}, {"sum_logits": -1.4923630952835083, "num_tokens": 1, "num_tokens_all": 533, "is_greedy": false, "logits_per_token": -1.4923630952835083, "logits_per_char": -0.7461815476417542, "num_chars": 2}], "label": 2, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 2, "native_id": 2, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4558030366897583, "incorrect_loss_raw": 1.3727350234985352, "correct_loss_per_char": 0.7279015183448792, "incorrect_loss_per_char": 0.6863675117492676, "correct_loss_per_token": 1.4558030366897583, "incorrect_loss_per_token": 1.3727350234985352, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2979768514633179, "num_tokens": 1, "num_tokens_all": 503, "is_greedy": true, "logits_per_token": -1.2979768514633179, "logits_per_char": -0.6489884257316589, "num_chars": 2}, {"sum_logits": -1.4711848497390747, "num_tokens": 1, "num_tokens_all": 503, "is_greedy": false, "logits_per_token": -1.4711848497390747, "logits_per_char": -0.7355924248695374, "num_chars": 2}, {"sum_logits": -1.349043369293213, "num_tokens": 1, "num_tokens_all": 503, "is_greedy": false, "logits_per_token": -1.349043369293213, "logits_per_char": -0.6745216846466064, "num_chars": 2}, {"sum_logits": -1.4558030366897583, "num_tokens": 1, "num_tokens_all": 503, "is_greedy": false, "logits_per_token": -1.4558030366897583, "logits_per_char": -0.7279015183448792, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 3, "native_id": 3, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2523913383483887, "incorrect_loss_raw": 1.4451964696248372, "correct_loss_per_char": 0.6261956691741943, "incorrect_loss_per_char": 0.7225982348124186, "correct_loss_per_token": 1.2523913383483887, "incorrect_loss_per_token": 1.4451964696248372, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3382381200790405, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.3382381200790405, "logits_per_char": -0.6691190600395203, "num_chars": 2}, {"sum_logits": -1.2523913383483887, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": true, "logits_per_token": -1.2523913383483887, "logits_per_char": -0.6261956691741943, "num_chars": 2}, {"sum_logits": -1.5053908824920654, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.5053908824920654, "logits_per_char": -0.7526954412460327, "num_chars": 2}, {"sum_logits": -1.4919604063034058, "num_tokens": 1, "num_tokens_all": 576, "is_greedy": false, "logits_per_token": -1.4919604063034058, "logits_per_char": -0.7459802031517029, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 4, "native_id": 4, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4288660287857056, "incorrect_loss_raw": 1.3862011830012004, "correct_loss_per_char": 0.7144330143928528, "incorrect_loss_per_char": 0.6931005915006002, "correct_loss_per_token": 1.4288660287857056, "incorrect_loss_per_token": 1.3862011830012004, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2175573110580444, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": true, "logits_per_token": -1.2175573110580444, "logits_per_char": -0.6087786555290222, "num_chars": 2}, {"sum_logits": -1.4288660287857056, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": false, "logits_per_token": -1.4288660287857056, "logits_per_char": -0.7144330143928528, "num_chars": 2}, {"sum_logits": -1.4600290060043335, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": false, "logits_per_token": -1.4600290060043335, "logits_per_char": -0.7300145030021667, "num_chars": 2}, {"sum_logits": -1.4810172319412231, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": false, "logits_per_token": -1.4810172319412231, "logits_per_char": -0.7405086159706116, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 5, "native_id": 5, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.365483283996582, "incorrect_loss_raw": 1.4007941484451294, "correct_loss_per_char": 0.682741641998291, "incorrect_loss_per_char": 0.7003970742225647, "correct_loss_per_token": 1.365483283996582, "incorrect_loss_per_token": 1.4007941484451294, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3192683458328247, "num_tokens": 1, "num_tokens_all": 539, "is_greedy": true, "logits_per_token": -1.3192683458328247, "logits_per_char": -0.6596341729164124, "num_chars": 2}, {"sum_logits": -1.365483283996582, "num_tokens": 1, "num_tokens_all": 539, "is_greedy": false, "logits_per_token": -1.365483283996582, "logits_per_char": -0.682741641998291, "num_chars": 2}, {"sum_logits": -1.4618314504623413, "num_tokens": 1, "num_tokens_all": 539, "is_greedy": false, "logits_per_token": -1.4618314504623413, "logits_per_char": -0.7309157252311707, "num_chars": 2}, {"sum_logits": -1.4212826490402222, "num_tokens": 1, "num_tokens_all": 539, "is_greedy": false, "logits_per_token": -1.4212826490402222, "logits_per_char": -0.7106413245201111, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 6, "native_id": 6, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.257138729095459, "incorrect_loss_raw": 1.44273046652476, "correct_loss_per_char": 0.6285693645477295, "incorrect_loss_per_char": 0.72136523326238, "correct_loss_per_token": 1.257138729095459, "incorrect_loss_per_token": 1.44273046652476, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.257138729095459, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": true, "logits_per_token": -1.257138729095459, "logits_per_char": -0.6285693645477295, "num_chars": 2}, {"sum_logits": -1.4029701948165894, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": false, "logits_per_token": -1.4029701948165894, "logits_per_char": -0.7014850974082947, "num_chars": 2}, {"sum_logits": -1.4554483890533447, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": false, "logits_per_token": -1.4554483890533447, "logits_per_char": -0.7277241945266724, "num_chars": 2}, {"sum_logits": -1.4697728157043457, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": false, "logits_per_token": -1.4697728157043457, "logits_per_char": -0.7348864078521729, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 7, "native_id": 7, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4820061922073364, "incorrect_loss_raw": 1.3722089926401775, "correct_loss_per_char": 0.7410030961036682, "incorrect_loss_per_char": 0.6861044963200887, "correct_loss_per_token": 1.4820061922073364, "incorrect_loss_per_token": 1.3722089926401775, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2455782890319824, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": true, "logits_per_token": -1.2455782890319824, "logits_per_char": -0.6227891445159912, "num_chars": 2}, {"sum_logits": -1.3190475702285767, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": false, "logits_per_token": -1.3190475702285767, "logits_per_char": -0.6595237851142883, "num_chars": 2}, {"sum_logits": -1.5520011186599731, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": false, "logits_per_token": -1.5520011186599731, "logits_per_char": -0.7760005593299866, "num_chars": 2}, {"sum_logits": -1.4820061922073364, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": false, "logits_per_token": -1.4820061922073364, "logits_per_char": -0.7410030961036682, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 8, "native_id": 8, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.497285008430481, "incorrect_loss_raw": 1.3576287825902302, "correct_loss_per_char": 0.7486425042152405, "incorrect_loss_per_char": 0.6788143912951151, "correct_loss_per_token": 1.497285008430481, "incorrect_loss_per_token": 1.3576287825902302, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3967249393463135, "num_tokens": 1, "num_tokens_all": 542, "is_greedy": false, "logits_per_token": -1.3967249393463135, "logits_per_char": -0.6983624696731567, "num_chars": 2}, {"sum_logits": -1.334296464920044, "num_tokens": 1, "num_tokens_all": 542, "is_greedy": true, "logits_per_token": -1.334296464920044, "logits_per_char": -0.667148232460022, "num_chars": 2}, {"sum_logits": -1.497285008430481, "num_tokens": 1, "num_tokens_all": 542, "is_greedy": false, "logits_per_token": -1.497285008430481, "logits_per_char": -0.7486425042152405, "num_chars": 2}, {"sum_logits": -1.3418649435043335, "num_tokens": 1, "num_tokens_all": 542, "is_greedy": false, "logits_per_token": -1.3418649435043335, "logits_per_char": -0.6709324717521667, "num_chars": 2}], "label": 2, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 9, "native_id": 9, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5329241752624512, "incorrect_loss_raw": 1.3508535623550415, "correct_loss_per_char": 0.7664620876312256, "incorrect_loss_per_char": 0.6754267811775208, "correct_loss_per_token": 1.5329241752624512, "incorrect_loss_per_token": 1.3508535623550415, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.41851806640625, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.41851806640625, "logits_per_char": -0.709259033203125, "num_chars": 2}, {"sum_logits": -1.2432588338851929, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": true, "logits_per_token": -1.2432588338851929, "logits_per_char": -0.6216294169425964, "num_chars": 2}, {"sum_logits": -1.3907837867736816, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.3907837867736816, "logits_per_char": -0.6953918933868408, "num_chars": 2}, {"sum_logits": -1.5329241752624512, "num_tokens": 1, "num_tokens_all": 573, "is_greedy": false, "logits_per_token": -1.5329241752624512, "logits_per_char": -0.7664620876312256, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 10, "native_id": 10, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3578547239303589, "incorrect_loss_raw": 1.4005762736002605, "correct_loss_per_char": 0.6789273619651794, "incorrect_loss_per_char": 0.7002881368001302, "correct_loss_per_token": 1.3578547239303589, "incorrect_loss_per_token": 1.4005762736002605, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3937691450119019, "num_tokens": 1, "num_tokens_all": 549, "is_greedy": false, "logits_per_token": -1.3937691450119019, "logits_per_char": -0.6968845725059509, "num_chars": 2}, {"sum_logits": -1.3845940828323364, "num_tokens": 1, "num_tokens_all": 549, "is_greedy": false, "logits_per_token": -1.3845940828323364, "logits_per_char": -0.6922970414161682, "num_chars": 2}, {"sum_logits": -1.423365592956543, "num_tokens": 1, "num_tokens_all": 549, "is_greedy": false, "logits_per_token": -1.423365592956543, "logits_per_char": -0.7116827964782715, "num_chars": 2}, {"sum_logits": -1.3578547239303589, "num_tokens": 1, "num_tokens_all": 549, "is_greedy": true, "logits_per_token": -1.3578547239303589, "logits_per_char": -0.6789273619651794, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 11, "native_id": 11, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.308651328086853, "incorrect_loss_raw": 1.421695629755656, "correct_loss_per_char": 0.6543256640434265, "incorrect_loss_per_char": 0.710847814877828, "correct_loss_per_token": 1.308651328086853, "incorrect_loss_per_token": 1.421695629755656, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.487007975578308, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.487007975578308, "logits_per_char": -0.743503987789154, "num_chars": 2}, {"sum_logits": -1.308651328086853, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": true, "logits_per_token": -1.308651328086853, "logits_per_char": -0.6543256640434265, "num_chars": 2}, {"sum_logits": -1.4485231637954712, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.4485231637954712, "logits_per_char": -0.7242615818977356, "num_chars": 2}, {"sum_logits": -1.3295557498931885, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.3295557498931885, "logits_per_char": -0.6647778749465942, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 12, "native_id": 12, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2407162189483643, "incorrect_loss_raw": 1.4507322311401367, "correct_loss_per_char": 0.6203581094741821, "incorrect_loss_per_char": 0.7253661155700684, "correct_loss_per_token": 1.2407162189483643, "incorrect_loss_per_token": 1.4507322311401367, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2407162189483643, "num_tokens": 1, "num_tokens_all": 531, "is_greedy": true, "logits_per_token": -1.2407162189483643, "logits_per_char": -0.6203581094741821, "num_chars": 2}, {"sum_logits": -1.3365896940231323, "num_tokens": 1, "num_tokens_all": 531, "is_greedy": false, "logits_per_token": -1.3365896940231323, "logits_per_char": -0.6682948470115662, "num_chars": 2}, {"sum_logits": -1.5755386352539062, "num_tokens": 1, "num_tokens_all": 531, "is_greedy": false, "logits_per_token": -1.5755386352539062, "logits_per_char": -0.7877693176269531, "num_chars": 2}, {"sum_logits": -1.4400683641433716, "num_tokens": 1, "num_tokens_all": 531, "is_greedy": false, "logits_per_token": -1.4400683641433716, "logits_per_char": -0.7200341820716858, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 13, "native_id": 13, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4100759029388428, "incorrect_loss_raw": 1.3923656543095906, "correct_loss_per_char": 0.7050379514694214, "incorrect_loss_per_char": 0.6961828271547953, "correct_loss_per_token": 1.4100759029388428, "incorrect_loss_per_token": 1.3923656543095906, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4137934446334839, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.4137934446334839, "logits_per_char": -0.7068967223167419, "num_chars": 2}, {"sum_logits": -1.2298064231872559, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": true, "logits_per_token": -1.2298064231872559, "logits_per_char": -0.6149032115936279, "num_chars": 2}, {"sum_logits": -1.5334970951080322, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.5334970951080322, "logits_per_char": -0.7667485475540161, "num_chars": 2}, {"sum_logits": -1.4100759029388428, "num_tokens": 1, "num_tokens_all": 578, "is_greedy": false, "logits_per_token": -1.4100759029388428, "logits_per_char": -0.7050379514694214, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 14, "native_id": 14, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3956503868103027, "incorrect_loss_raw": 1.3936831553777058, "correct_loss_per_char": 0.6978251934051514, "incorrect_loss_per_char": 0.6968415776888529, "correct_loss_per_token": 1.3956503868103027, "incorrect_loss_per_token": 1.3936831553777058, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4826292991638184, "num_tokens": 1, "num_tokens_all": 550, "is_greedy": false, "logits_per_token": -1.4826292991638184, "logits_per_char": -0.7413146495819092, "num_chars": 2}, {"sum_logits": -1.3048112392425537, "num_tokens": 1, "num_tokens_all": 550, "is_greedy": true, "logits_per_token": -1.3048112392425537, "logits_per_char": -0.6524056196212769, "num_chars": 2}, {"sum_logits": -1.3936089277267456, "num_tokens": 1, "num_tokens_all": 550, "is_greedy": false, "logits_per_token": -1.3936089277267456, "logits_per_char": -0.6968044638633728, "num_chars": 2}, {"sum_logits": -1.3956503868103027, "num_tokens": 1, "num_tokens_all": 550, "is_greedy": false, "logits_per_token": -1.3956503868103027, "logits_per_char": -0.6978251934051514, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 15, "native_id": 15, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3931095600128174, "incorrect_loss_raw": 1.3907535076141357, "correct_loss_per_char": 0.6965547800064087, "incorrect_loss_per_char": 0.6953767538070679, "correct_loss_per_token": 1.3931095600128174, "incorrect_loss_per_token": 1.3907535076141357, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4614883661270142, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": false, "logits_per_token": -1.4614883661270142, "logits_per_char": -0.7307441830635071, "num_chars": 2}, {"sum_logits": -1.341437816619873, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": true, "logits_per_token": -1.341437816619873, "logits_per_char": -0.6707189083099365, "num_chars": 2}, {"sum_logits": -1.3931095600128174, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": false, "logits_per_token": -1.3931095600128174, "logits_per_char": -0.6965547800064087, "num_chars": 2}, {"sum_logits": -1.36933434009552, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": false, "logits_per_token": -1.36933434009552, "logits_per_char": -0.68466717004776, "num_chars": 2}], "label": 2, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 16, "native_id": 16, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.417011022567749, "incorrect_loss_raw": 1.4157315492630005, "correct_loss_per_char": 0.7085055112838745, "incorrect_loss_per_char": 0.7078657746315002, "correct_loss_per_token": 1.417011022567749, "incorrect_loss_per_token": 1.4157315492630005, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3266209363937378, "num_tokens": 1, "num_tokens_all": 524, "is_greedy": true, "logits_per_token": -1.3266209363937378, "logits_per_char": -0.6633104681968689, "num_chars": 2}, {"sum_logits": -1.4632328748703003, "num_tokens": 1, "num_tokens_all": 524, "is_greedy": false, "logits_per_token": -1.4632328748703003, "logits_per_char": -0.7316164374351501, "num_chars": 2}, {"sum_logits": -1.4573408365249634, "num_tokens": 1, "num_tokens_all": 524, "is_greedy": false, "logits_per_token": -1.4573408365249634, "logits_per_char": -0.7286704182624817, "num_chars": 2}, {"sum_logits": -1.417011022567749, "num_tokens": 1, "num_tokens_all": 524, "is_greedy": false, "logits_per_token": -1.417011022567749, "logits_per_char": -0.7085055112838745, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 17, "native_id": 17, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4184579849243164, "incorrect_loss_raw": 1.3843876123428345, "correct_loss_per_char": 0.7092289924621582, "incorrect_loss_per_char": 0.6921938061714172, "correct_loss_per_token": 1.4184579849243164, "incorrect_loss_per_token": 1.3843876123428345, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4619026184082031, "num_tokens": 1, "num_tokens_all": 567, "is_greedy": false, "logits_per_token": -1.4619026184082031, "logits_per_char": -0.7309513092041016, "num_chars": 2}, {"sum_logits": -1.3129128217697144, "num_tokens": 1, "num_tokens_all": 567, "is_greedy": true, "logits_per_token": -1.3129128217697144, "logits_per_char": -0.6564564108848572, "num_chars": 2}, {"sum_logits": -1.378347396850586, "num_tokens": 1, "num_tokens_all": 567, "is_greedy": false, "logits_per_token": -1.378347396850586, "logits_per_char": -0.689173698425293, "num_chars": 2}, {"sum_logits": -1.4184579849243164, "num_tokens": 1, "num_tokens_all": 567, "is_greedy": false, "logits_per_token": -1.4184579849243164, "logits_per_char": -0.7092289924621582, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 18, "native_id": 18, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4487801790237427, "incorrect_loss_raw": 1.3802501360575359, "correct_loss_per_char": 0.7243900895118713, "incorrect_loss_per_char": 0.6901250680287679, "correct_loss_per_token": 1.4487801790237427, "incorrect_loss_per_token": 1.3802501360575359, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2541050910949707, "num_tokens": 1, "num_tokens_all": 537, "is_greedy": true, "logits_per_token": -1.2541050910949707, "logits_per_char": -0.6270525455474854, "num_chars": 2}, {"sum_logits": -1.4017630815505981, "num_tokens": 1, "num_tokens_all": 537, "is_greedy": false, "logits_per_token": -1.4017630815505981, "logits_per_char": -0.7008815407752991, "num_chars": 2}, {"sum_logits": -1.4487801790237427, "num_tokens": 1, "num_tokens_all": 537, "is_greedy": false, "logits_per_token": -1.4487801790237427, "logits_per_char": -0.7243900895118713, "num_chars": 2}, {"sum_logits": -1.4848822355270386, "num_tokens": 1, "num_tokens_all": 537, "is_greedy": false, "logits_per_token": -1.4848822355270386, "logits_per_char": -0.7424411177635193, "num_chars": 2}], "label": 2, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 19, "native_id": 19, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3665262460708618, "incorrect_loss_raw": 1.4041951497395833, "correct_loss_per_char": 0.6832631230354309, "incorrect_loss_per_char": 0.7020975748697916, "correct_loss_per_token": 1.3665262460708618, "incorrect_loss_per_token": 1.4041951497395833, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2773158550262451, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": true, "logits_per_token": -1.2773158550262451, "logits_per_char": -0.6386579275131226, "num_chars": 2}, {"sum_logits": -1.3665262460708618, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": false, "logits_per_token": -1.3665262460708618, "logits_per_char": -0.6832631230354309, "num_chars": 2}, {"sum_logits": -1.4556459188461304, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": false, "logits_per_token": -1.4556459188461304, "logits_per_char": -0.7278229594230652, "num_chars": 2}, {"sum_logits": -1.4796236753463745, "num_tokens": 1, "num_tokens_all": 511, "is_greedy": false, "logits_per_token": -1.4796236753463745, "logits_per_char": -0.7398118376731873, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 20, "native_id": 20, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5053809881210327, "incorrect_loss_raw": 1.3586903810501099, "correct_loss_per_char": 0.7526904940605164, "incorrect_loss_per_char": 0.6793451905250549, "correct_loss_per_token": 1.5053809881210327, "incorrect_loss_per_token": 1.3586903810501099, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3058353662490845, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": true, "logits_per_token": -1.3058353662490845, "logits_per_char": -0.6529176831245422, "num_chars": 2}, {"sum_logits": -1.314477801322937, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.314477801322937, "logits_per_char": -0.6572389006614685, "num_chars": 2}, {"sum_logits": -1.455757975578308, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.455757975578308, "logits_per_char": -0.727878987789154, "num_chars": 2}, {"sum_logits": -1.5053809881210327, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.5053809881210327, "logits_per_char": -0.7526904940605164, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 21, "native_id": 21, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5244207382202148, "incorrect_loss_raw": 1.3608237107594807, "correct_loss_per_char": 0.7622103691101074, "incorrect_loss_per_char": 0.6804118553797404, "correct_loss_per_token": 1.5244207382202148, "incorrect_loss_per_token": 1.3608237107594807, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.1988648176193237, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": true, "logits_per_token": -1.1988648176193237, "logits_per_char": -0.5994324088096619, "num_chars": 2}, {"sum_logits": -1.3572299480438232, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": false, "logits_per_token": -1.3572299480438232, "logits_per_char": -0.6786149740219116, "num_chars": 2}, {"sum_logits": -1.5263763666152954, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": false, "logits_per_token": -1.5263763666152954, "logits_per_char": -0.7631881833076477, "num_chars": 2}, {"sum_logits": -1.5244207382202148, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": false, "logits_per_token": -1.5244207382202148, "logits_per_char": -0.7622103691101074, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 22, "native_id": 22, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4534926414489746, "incorrect_loss_raw": 1.3736776908238728, "correct_loss_per_char": 0.7267463207244873, "incorrect_loss_per_char": 0.6868388454119364, "correct_loss_per_token": 1.4534926414489746, "incorrect_loss_per_token": 1.3736776908238728, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2991758584976196, "num_tokens": 1, "num_tokens_all": 557, "is_greedy": true, "logits_per_token": -1.2991758584976196, "logits_per_char": -0.6495879292488098, "num_chars": 2}, {"sum_logits": -1.32748281955719, "num_tokens": 1, "num_tokens_all": 557, "is_greedy": false, "logits_per_token": -1.32748281955719, "logits_per_char": -0.663741409778595, "num_chars": 2}, {"sum_logits": -1.494374394416809, "num_tokens": 1, "num_tokens_all": 557, "is_greedy": false, "logits_per_token": -1.494374394416809, "logits_per_char": -0.7471871972084045, "num_chars": 2}, {"sum_logits": -1.4534926414489746, "num_tokens": 1, "num_tokens_all": 557, "is_greedy": false, "logits_per_token": -1.4534926414489746, "logits_per_char": -0.7267463207244873, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 23, "native_id": 23, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.368753433227539, "incorrect_loss_raw": 1.4033985137939453, "correct_loss_per_char": 0.6843767166137695, "incorrect_loss_per_char": 0.7016992568969727, "correct_loss_per_token": 1.368753433227539, "incorrect_loss_per_token": 1.4033985137939453, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2850282192230225, "num_tokens": 1, "num_tokens_all": 518, "is_greedy": true, "logits_per_token": -1.2850282192230225, "logits_per_char": -0.6425141096115112, "num_chars": 2}, {"sum_logits": -1.368753433227539, "num_tokens": 1, "num_tokens_all": 518, "is_greedy": false, "logits_per_token": -1.368753433227539, "logits_per_char": -0.6843767166137695, "num_chars": 2}, {"sum_logits": -1.446237325668335, "num_tokens": 1, "num_tokens_all": 518, "is_greedy": false, "logits_per_token": -1.446237325668335, "logits_per_char": -0.7231186628341675, "num_chars": 2}, {"sum_logits": -1.4789299964904785, "num_tokens": 1, "num_tokens_all": 518, "is_greedy": false, "logits_per_token": -1.4789299964904785, "logits_per_char": -0.7394649982452393, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 24, "native_id": 24, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.433279037475586, "incorrect_loss_raw": 1.3833765188852947, "correct_loss_per_char": 0.716639518737793, "incorrect_loss_per_char": 0.6916882594426473, "correct_loss_per_token": 1.433279037475586, "incorrect_loss_per_token": 1.3833765188852947, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2686412334442139, "num_tokens": 1, "num_tokens_all": 506, "is_greedy": true, "logits_per_token": -1.2686412334442139, "logits_per_char": -0.6343206167221069, "num_chars": 2}, {"sum_logits": -1.4753609895706177, "num_tokens": 1, "num_tokens_all": 506, "is_greedy": false, "logits_per_token": -1.4753609895706177, "logits_per_char": -0.7376804947853088, "num_chars": 2}, {"sum_logits": -1.4061273336410522, "num_tokens": 1, "num_tokens_all": 506, "is_greedy": false, "logits_per_token": -1.4061273336410522, "logits_per_char": -0.7030636668205261, "num_chars": 2}, {"sum_logits": -1.433279037475586, "num_tokens": 1, "num_tokens_all": 506, "is_greedy": false, "logits_per_token": -1.433279037475586, "logits_per_char": -0.716639518737793, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 25, "native_id": 25, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.276816964149475, "incorrect_loss_raw": 1.4347671270370483, "correct_loss_per_char": 0.6384084820747375, "incorrect_loss_per_char": 0.7173835635185242, "correct_loss_per_token": 1.276816964149475, "incorrect_loss_per_token": 1.4347671270370483, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.276816964149475, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": true, "logits_per_token": -1.276816964149475, "logits_per_char": -0.6384084820747375, "num_chars": 2}, {"sum_logits": -1.457187294960022, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": false, "logits_per_token": -1.457187294960022, "logits_per_char": -0.728593647480011, "num_chars": 2}, {"sum_logits": -1.4410982131958008, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": false, "logits_per_token": -1.4410982131958008, "logits_per_char": -0.7205491065979004, "num_chars": 2}, {"sum_logits": -1.4060158729553223, "num_tokens": 1, "num_tokens_all": 508, "is_greedy": false, "logits_per_token": -1.4060158729553223, "logits_per_char": -0.7030079364776611, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 26, "native_id": 26, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3693073987960815, "incorrect_loss_raw": 1.4222184022267659, "correct_loss_per_char": 0.6846536993980408, "incorrect_loss_per_char": 0.7111092011133829, "correct_loss_per_token": 1.3693073987960815, "incorrect_loss_per_token": 1.4222184022267659, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.257188081741333, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": true, "logits_per_token": -1.257188081741333, "logits_per_char": -0.6285940408706665, "num_chars": 2}, {"sum_logits": -1.4311485290527344, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.4311485290527344, "logits_per_char": -0.7155742645263672, "num_chars": 2}, {"sum_logits": -1.5783185958862305, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.5783185958862305, "logits_per_char": -0.7891592979431152, "num_chars": 2}, {"sum_logits": -1.3693073987960815, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.3693073987960815, "logits_per_char": -0.6846536993980408, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 27, "native_id": 27, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4579813480377197, "incorrect_loss_raw": 1.3750243186950684, "correct_loss_per_char": 0.7289906740188599, "incorrect_loss_per_char": 0.6875121593475342, "correct_loss_per_token": 1.4579813480377197, "incorrect_loss_per_token": 1.3750243186950684, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2430208921432495, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": true, "logits_per_token": -1.2430208921432495, "logits_per_char": -0.6215104460716248, "num_chars": 2}, {"sum_logits": -1.3471403121948242, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": false, "logits_per_token": -1.3471403121948242, "logits_per_char": -0.6735701560974121, "num_chars": 2}, {"sum_logits": -1.5349117517471313, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": false, "logits_per_token": -1.5349117517471313, "logits_per_char": -0.7674558758735657, "num_chars": 2}, {"sum_logits": -1.4579813480377197, "num_tokens": 1, "num_tokens_all": 563, "is_greedy": false, "logits_per_token": -1.4579813480377197, "logits_per_char": -0.7289906740188599, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 28, "native_id": 28, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3646057844161987, "incorrect_loss_raw": 1.4194170236587524, "correct_loss_per_char": 0.6823028922080994, "incorrect_loss_per_char": 0.7097085118293762, "correct_loss_per_token": 1.3646057844161987, "incorrect_loss_per_token": 1.4194170236587524, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3153467178344727, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": true, "logits_per_token": -1.3153467178344727, "logits_per_char": -0.6576733589172363, "num_chars": 2}, {"sum_logits": -1.5062898397445679, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.5062898397445679, "logits_per_char": -0.7531449198722839, "num_chars": 2}, {"sum_logits": -1.4366145133972168, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.4366145133972168, "logits_per_char": -0.7183072566986084, "num_chars": 2}, {"sum_logits": -1.3646057844161987, "num_tokens": 1, "num_tokens_all": 577, "is_greedy": false, "logits_per_token": -1.3646057844161987, "logits_per_char": -0.6823028922080994, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 29, "native_id": 29, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.434610366821289, "incorrect_loss_raw": 1.3763182163238525, "correct_loss_per_char": 0.7173051834106445, "incorrect_loss_per_char": 0.6881591081619263, "correct_loss_per_token": 1.434610366821289, "incorrect_loss_per_token": 1.3763182163238525, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3624799251556396, "num_tokens": 1, "num_tokens_all": 541, "is_greedy": false, "logits_per_token": -1.3624799251556396, "logits_per_char": -0.6812399625778198, "num_chars": 2}, {"sum_logits": -1.3287981748580933, "num_tokens": 1, "num_tokens_all": 541, "is_greedy": true, "logits_per_token": -1.3287981748580933, "logits_per_char": -0.6643990874290466, "num_chars": 2}, {"sum_logits": -1.434610366821289, "num_tokens": 1, "num_tokens_all": 541, "is_greedy": false, "logits_per_token": -1.434610366821289, "logits_per_char": -0.7173051834106445, "num_chars": 2}, {"sum_logits": -1.4376765489578247, "num_tokens": 1, "num_tokens_all": 541, "is_greedy": false, "logits_per_token": -1.4376765489578247, "logits_per_char": -0.7188382744789124, "num_chars": 2}], "label": 2, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 30, "native_id": 30, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5227141380310059, "incorrect_loss_raw": 1.350684682528178, "correct_loss_per_char": 0.7613570690155029, "incorrect_loss_per_char": 0.675342341264089, "correct_loss_per_token": 1.5227141380310059, "incorrect_loss_per_token": 1.350684682528178, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3734734058380127, "num_tokens": 1, "num_tokens_all": 550, "is_greedy": false, "logits_per_token": -1.3734734058380127, "logits_per_char": -0.6867367029190063, "num_chars": 2}, {"sum_logits": -1.5227141380310059, "num_tokens": 1, "num_tokens_all": 550, "is_greedy": false, "logits_per_token": -1.5227141380310059, "logits_per_char": -0.7613570690155029, "num_chars": 2}, {"sum_logits": -1.4098007678985596, "num_tokens": 1, "num_tokens_all": 550, "is_greedy": false, "logits_per_token": -1.4098007678985596, "logits_per_char": -0.7049003839492798, "num_chars": 2}, {"sum_logits": -1.2687798738479614, "num_tokens": 1, "num_tokens_all": 550, "is_greedy": true, "logits_per_token": -1.2687798738479614, "logits_per_char": -0.6343899369239807, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 31, "native_id": 31, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4300304651260376, "incorrect_loss_raw": 1.3864490588506062, "correct_loss_per_char": 0.7150152325630188, "incorrect_loss_per_char": 0.6932245294253031, "correct_loss_per_token": 1.4300304651260376, "incorrect_loss_per_token": 1.3864490588506062, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4794567823410034, "num_tokens": 1, "num_tokens_all": 594, "is_greedy": false, "logits_per_token": -1.4794567823410034, "logits_per_char": -0.7397283911705017, "num_chars": 2}, {"sum_logits": -1.2039767503738403, "num_tokens": 1, "num_tokens_all": 594, "is_greedy": true, "logits_per_token": -1.2039767503738403, "logits_per_char": -0.6019883751869202, "num_chars": 2}, {"sum_logits": -1.4300304651260376, "num_tokens": 1, "num_tokens_all": 594, "is_greedy": false, "logits_per_token": -1.4300304651260376, "logits_per_char": -0.7150152325630188, "num_chars": 2}, {"sum_logits": -1.475913643836975, "num_tokens": 1, "num_tokens_all": 594, "is_greedy": false, "logits_per_token": -1.475913643836975, "logits_per_char": -0.7379568219184875, "num_chars": 2}], "label": 2, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 32, "native_id": 32, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.13642156124115, "incorrect_loss_raw": 1.494049112002055, "correct_loss_per_char": 0.568210780620575, "incorrect_loss_per_char": 0.7470245560010275, "correct_loss_per_token": 1.13642156124115, "incorrect_loss_per_token": 1.494049112002055, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4609670639038086, "num_tokens": 1, "num_tokens_all": 610, "is_greedy": false, "logits_per_token": -1.4609670639038086, "logits_per_char": -0.7304835319519043, "num_chars": 2}, {"sum_logits": -1.13642156124115, "num_tokens": 1, "num_tokens_all": 610, "is_greedy": true, "logits_per_token": -1.13642156124115, "logits_per_char": -0.568210780620575, "num_chars": 2}, {"sum_logits": -1.493557333946228, "num_tokens": 1, "num_tokens_all": 610, "is_greedy": false, "logits_per_token": -1.493557333946228, "logits_per_char": -0.746778666973114, "num_chars": 2}, {"sum_logits": -1.527622938156128, "num_tokens": 1, "num_tokens_all": 610, "is_greedy": false, "logits_per_token": -1.527622938156128, "logits_per_char": -0.763811469078064, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 33, "native_id": 33, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.421126365661621, "incorrect_loss_raw": 1.3835655053456624, "correct_loss_per_char": 0.7105631828308105, "incorrect_loss_per_char": 0.6917827526728312, "correct_loss_per_token": 1.421126365661621, "incorrect_loss_per_token": 1.3835655053456624, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3485201597213745, "num_tokens": 1, "num_tokens_all": 536, "is_greedy": true, "logits_per_token": -1.3485201597213745, "logits_per_char": -0.6742600798606873, "num_chars": 2}, {"sum_logits": -1.3679898977279663, "num_tokens": 1, "num_tokens_all": 536, "is_greedy": false, "logits_per_token": -1.3679898977279663, "logits_per_char": -0.6839949488639832, "num_chars": 2}, {"sum_logits": -1.4341864585876465, "num_tokens": 1, "num_tokens_all": 536, "is_greedy": false, "logits_per_token": -1.4341864585876465, "logits_per_char": -0.7170932292938232, "num_chars": 2}, {"sum_logits": -1.421126365661621, "num_tokens": 1, "num_tokens_all": 536, "is_greedy": false, "logits_per_token": -1.421126365661621, "logits_per_char": -0.7105631828308105, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 34, "native_id": 34, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3350754976272583, "incorrect_loss_raw": 1.413959542910258, "correct_loss_per_char": 0.6675377488136292, "incorrect_loss_per_char": 0.706979771455129, "correct_loss_per_token": 1.3350754976272583, "incorrect_loss_per_token": 1.413959542910258, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3350754976272583, "num_tokens": 1, "num_tokens_all": 551, "is_greedy": false, "logits_per_token": -1.3350754976272583, "logits_per_char": -0.6675377488136292, "num_chars": 2}, {"sum_logits": -1.3020989894866943, "num_tokens": 1, "num_tokens_all": 551, "is_greedy": true, "logits_per_token": -1.3020989894866943, "logits_per_char": -0.6510494947433472, "num_chars": 2}, {"sum_logits": -1.5409411191940308, "num_tokens": 1, "num_tokens_all": 551, "is_greedy": false, "logits_per_token": -1.5409411191940308, "logits_per_char": -0.7704705595970154, "num_chars": 2}, {"sum_logits": -1.3988385200500488, "num_tokens": 1, "num_tokens_all": 551, "is_greedy": false, "logits_per_token": -1.3988385200500488, "logits_per_char": -0.6994192600250244, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 35, "native_id": 35, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4103189706802368, "incorrect_loss_raw": 1.3858151038487752, "correct_loss_per_char": 0.7051594853401184, "incorrect_loss_per_char": 0.6929075519243876, "correct_loss_per_token": 1.4103189706802368, "incorrect_loss_per_token": 1.3858151038487752, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4300930500030518, "num_tokens": 1, "num_tokens_all": 530, "is_greedy": false, "logits_per_token": -1.4300930500030518, "logits_per_char": -0.7150465250015259, "num_chars": 2}, {"sum_logits": -1.3185878992080688, "num_tokens": 1, "num_tokens_all": 530, "is_greedy": true, "logits_per_token": -1.3185878992080688, "logits_per_char": -0.6592939496040344, "num_chars": 2}, {"sum_logits": -1.4103189706802368, "num_tokens": 1, "num_tokens_all": 530, "is_greedy": false, "logits_per_token": -1.4103189706802368, "logits_per_char": -0.7051594853401184, "num_chars": 2}, {"sum_logits": -1.408764362335205, "num_tokens": 1, "num_tokens_all": 530, "is_greedy": false, "logits_per_token": -1.408764362335205, "logits_per_char": -0.7043821811676025, "num_chars": 2}], "label": 2, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 36, "native_id": 36, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4034967422485352, "incorrect_loss_raw": 1.3923840125401814, "correct_loss_per_char": 0.7017483711242676, "incorrect_loss_per_char": 0.6961920062700907, "correct_loss_per_token": 1.4034967422485352, "incorrect_loss_per_token": 1.3923840125401814, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2744157314300537, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": true, "logits_per_token": -1.2744157314300537, "logits_per_char": -0.6372078657150269, "num_chars": 2}, {"sum_logits": -1.382563591003418, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": false, "logits_per_token": -1.382563591003418, "logits_per_char": -0.691281795501709, "num_chars": 2}, {"sum_logits": -1.4034967422485352, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": false, "logits_per_token": -1.4034967422485352, "logits_per_char": -0.7017483711242676, "num_chars": 2}, {"sum_logits": -1.5201727151870728, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": false, "logits_per_token": -1.5201727151870728, "logits_per_char": -0.7600863575935364, "num_chars": 2}], "label": 2, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 37, "native_id": 37, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3324389457702637, "incorrect_loss_raw": 1.411541263262431, "correct_loss_per_char": 0.6662194728851318, "incorrect_loss_per_char": 0.7057706316312155, "correct_loss_per_token": 1.3324389457702637, "incorrect_loss_per_token": 1.411541263262431, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3746269941329956, "num_tokens": 1, "num_tokens_all": 520, "is_greedy": false, "logits_per_token": -1.3746269941329956, "logits_per_char": -0.6873134970664978, "num_chars": 2}, {"sum_logits": -1.3324389457702637, "num_tokens": 1, "num_tokens_all": 520, "is_greedy": true, "logits_per_token": -1.3324389457702637, "logits_per_char": -0.6662194728851318, "num_chars": 2}, {"sum_logits": -1.4360973834991455, "num_tokens": 1, "num_tokens_all": 520, "is_greedy": false, "logits_per_token": -1.4360973834991455, "logits_per_char": -0.7180486917495728, "num_chars": 2}, {"sum_logits": -1.4238994121551514, "num_tokens": 1, "num_tokens_all": 520, "is_greedy": false, "logits_per_token": -1.4238994121551514, "logits_per_char": -0.7119497060775757, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 38, "native_id": 38, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4021967649459839, "incorrect_loss_raw": 1.3886798620224, "correct_loss_per_char": 0.7010983824729919, "incorrect_loss_per_char": 0.6943399310112, "correct_loss_per_token": 1.4021967649459839, "incorrect_loss_per_token": 1.3886798620224, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4021967649459839, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": false, "logits_per_token": -1.4021967649459839, "logits_per_char": -0.7010983824729919, "num_chars": 2}, {"sum_logits": -1.3714239597320557, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": false, "logits_per_token": -1.3714239597320557, "logits_per_char": -0.6857119798660278, "num_chars": 2}, {"sum_logits": -1.459372878074646, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": false, "logits_per_token": -1.459372878074646, "logits_per_char": -0.729686439037323, "num_chars": 2}, {"sum_logits": -1.335242748260498, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": true, "logits_per_token": -1.335242748260498, "logits_per_char": -0.667621374130249, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 39, "native_id": 39, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3790870904922485, "incorrect_loss_raw": 1.3985718091328938, "correct_loss_per_char": 0.6895435452461243, "incorrect_loss_per_char": 0.6992859045664469, "correct_loss_per_token": 1.3790870904922485, "incorrect_loss_per_token": 1.3985718091328938, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3790870904922485, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": false, "logits_per_token": -1.3790870904922485, "logits_per_char": -0.6895435452461243, "num_chars": 2}, {"sum_logits": -1.307408332824707, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": true, "logits_per_token": -1.307408332824707, "logits_per_char": -0.6537041664123535, "num_chars": 2}, {"sum_logits": -1.471867561340332, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": false, "logits_per_token": -1.471867561340332, "logits_per_char": -0.735933780670166, "num_chars": 2}, {"sum_logits": -1.4164395332336426, "num_tokens": 1, "num_tokens_all": 499, "is_greedy": false, "logits_per_token": -1.4164395332336426, "logits_per_char": -0.7082197666168213, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 40, "native_id": 40, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3449894189834595, "incorrect_loss_raw": 1.4104835589726765, "correct_loss_per_char": 0.6724947094917297, "incorrect_loss_per_char": 0.7052417794863383, "correct_loss_per_token": 1.3449894189834595, "incorrect_loss_per_token": 1.4104835589726765, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3785043954849243, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": false, "logits_per_token": -1.3785043954849243, "logits_per_char": -0.6892521977424622, "num_chars": 2}, {"sum_logits": -1.3449894189834595, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": true, "logits_per_token": -1.3449894189834595, "logits_per_char": -0.6724947094917297, "num_chars": 2}, {"sum_logits": -1.3881852626800537, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": false, "logits_per_token": -1.3881852626800537, "logits_per_char": -0.6940926313400269, "num_chars": 2}, {"sum_logits": -1.4647610187530518, "num_tokens": 1, "num_tokens_all": 554, "is_greedy": false, "logits_per_token": -1.4647610187530518, "logits_per_char": -0.7323805093765259, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 41, "native_id": 41, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4727174043655396, "incorrect_loss_raw": 1.3695072730382283, "correct_loss_per_char": 0.7363587021827698, "incorrect_loss_per_char": 0.6847536365191141, "correct_loss_per_token": 1.4727174043655396, "incorrect_loss_per_token": 1.3695072730382283, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2377917766571045, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": true, "logits_per_token": -1.2377917766571045, "logits_per_char": -0.6188958883285522, "num_chars": 2}, {"sum_logits": -1.3929588794708252, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": false, "logits_per_token": -1.3929588794708252, "logits_per_char": -0.6964794397354126, "num_chars": 2}, {"sum_logits": -1.4777711629867554, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": false, "logits_per_token": -1.4777711629867554, "logits_per_char": -0.7388855814933777, "num_chars": 2}, {"sum_logits": -1.4727174043655396, "num_tokens": 1, "num_tokens_all": 521, "is_greedy": false, "logits_per_token": -1.4727174043655396, "logits_per_char": -0.7363587021827698, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 42, "native_id": 42, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3304744958877563, "incorrect_loss_raw": 1.4227250417073567, "correct_loss_per_char": 0.6652372479438782, "incorrect_loss_per_char": 0.7113625208536783, "correct_loss_per_token": 1.3304744958877563, "incorrect_loss_per_token": 1.4227250417073567, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2220523357391357, "num_tokens": 1, "num_tokens_all": 497, "is_greedy": true, "logits_per_token": -1.2220523357391357, "logits_per_char": -0.6110261678695679, "num_chars": 2}, {"sum_logits": -1.3304744958877563, "num_tokens": 1, "num_tokens_all": 497, "is_greedy": false, "logits_per_token": -1.3304744958877563, "logits_per_char": -0.6652372479438782, "num_chars": 2}, {"sum_logits": -1.552707314491272, "num_tokens": 1, "num_tokens_all": 497, "is_greedy": false, "logits_per_token": -1.552707314491272, "logits_per_char": -0.776353657245636, "num_chars": 2}, {"sum_logits": -1.4934154748916626, "num_tokens": 1, "num_tokens_all": 497, "is_greedy": false, "logits_per_token": -1.4934154748916626, "logits_per_char": -0.7467077374458313, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 43, "native_id": 43, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.506374478340149, "incorrect_loss_raw": 1.3562349875768025, "correct_loss_per_char": 0.7531872391700745, "incorrect_loss_per_char": 0.6781174937884012, "correct_loss_per_token": 1.506374478340149, "incorrect_loss_per_token": 1.3562349875768025, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.506374478340149, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.506374478340149, "logits_per_char": -0.7531872391700745, "num_chars": 2}, {"sum_logits": -1.294687032699585, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": true, "logits_per_token": -1.294687032699585, "logits_per_char": -0.6473435163497925, "num_chars": 2}, {"sum_logits": -1.4213275909423828, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.4213275909423828, "logits_per_char": -0.7106637954711914, "num_chars": 2}, {"sum_logits": -1.35269033908844, "num_tokens": 1, "num_tokens_all": 601, "is_greedy": false, "logits_per_token": -1.35269033908844, "logits_per_char": -0.67634516954422, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 44, "native_id": 44, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.471792459487915, "incorrect_loss_raw": 1.368195613225301, "correct_loss_per_char": 0.7358962297439575, "incorrect_loss_per_char": 0.6840978066126505, "correct_loss_per_token": 1.471792459487915, "incorrect_loss_per_token": 1.368195613225301, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2808380126953125, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": true, "logits_per_token": -1.2808380126953125, "logits_per_char": -0.6404190063476562, "num_chars": 2}, {"sum_logits": -1.3745083808898926, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": false, "logits_per_token": -1.3745083808898926, "logits_per_char": -0.6872541904449463, "num_chars": 2}, {"sum_logits": -1.4492404460906982, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": false, "logits_per_token": -1.4492404460906982, "logits_per_char": -0.7246202230453491, "num_chars": 2}, {"sum_logits": -1.471792459487915, "num_tokens": 1, "num_tokens_all": 516, "is_greedy": false, "logits_per_token": -1.471792459487915, "logits_per_char": -0.7358962297439575, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 45, "native_id": 45, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3168413639068604, "incorrect_loss_raw": 1.417616367340088, "correct_loss_per_char": 0.6584206819534302, "incorrect_loss_per_char": 0.708808183670044, "correct_loss_per_token": 1.3168413639068604, "incorrect_loss_per_token": 1.417616367340088, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3168413639068604, "num_tokens": 1, "num_tokens_all": 535, "is_greedy": true, "logits_per_token": -1.3168413639068604, "logits_per_char": -0.6584206819534302, "num_chars": 2}, {"sum_logits": -1.4016494750976562, "num_tokens": 1, "num_tokens_all": 535, "is_greedy": false, "logits_per_token": -1.4016494750976562, "logits_per_char": -0.7008247375488281, "num_chars": 2}, {"sum_logits": -1.3736950159072876, "num_tokens": 1, "num_tokens_all": 535, "is_greedy": false, "logits_per_token": -1.3736950159072876, "logits_per_char": -0.6868475079536438, "num_chars": 2}, {"sum_logits": -1.4775046110153198, "num_tokens": 1, "num_tokens_all": 535, "is_greedy": false, "logits_per_token": -1.4775046110153198, "logits_per_char": -0.7387523055076599, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 46, "native_id": 46, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3576220273971558, "incorrect_loss_raw": 1.4059507449467976, "correct_loss_per_char": 0.6788110136985779, "incorrect_loss_per_char": 0.7029753724733988, "correct_loss_per_token": 1.3576220273971558, "incorrect_loss_per_token": 1.4059507449467976, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3576220273971558, "num_tokens": 1, "num_tokens_all": 520, "is_greedy": false, "logits_per_token": -1.3576220273971558, "logits_per_char": -0.6788110136985779, "num_chars": 2}, {"sum_logits": -1.3066929578781128, "num_tokens": 1, "num_tokens_all": 520, "is_greedy": true, "logits_per_token": -1.3066929578781128, "logits_per_char": -0.6533464789390564, "num_chars": 2}, {"sum_logits": -1.5241128206253052, "num_tokens": 1, "num_tokens_all": 520, "is_greedy": false, "logits_per_token": -1.5241128206253052, "logits_per_char": -0.7620564103126526, "num_chars": 2}, {"sum_logits": -1.387046456336975, "num_tokens": 1, "num_tokens_all": 520, "is_greedy": false, "logits_per_token": -1.387046456336975, "logits_per_char": -0.6935232281684875, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 47, "native_id": 47, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.380523443222046, "incorrect_loss_raw": 1.394987940788269, "correct_loss_per_char": 0.690261721611023, "incorrect_loss_per_char": 0.6974939703941345, "correct_loss_per_token": 1.380523443222046, "incorrect_loss_per_token": 1.394987940788269, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3278858661651611, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": true, "logits_per_token": -1.3278858661651611, "logits_per_char": -0.6639429330825806, "num_chars": 2}, {"sum_logits": -1.380523443222046, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": false, "logits_per_token": -1.380523443222046, "logits_per_char": -0.690261721611023, "num_chars": 2}, {"sum_logits": -1.4462082386016846, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": false, "logits_per_token": -1.4462082386016846, "logits_per_char": -0.7231041193008423, "num_chars": 2}, {"sum_logits": -1.4108697175979614, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": false, "logits_per_token": -1.4108697175979614, "logits_per_char": -0.7054348587989807, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 48, "native_id": 48, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.0519928932189941, "incorrect_loss_raw": 1.5530803600947063, "correct_loss_per_char": 0.5259964466094971, "incorrect_loss_per_char": 0.7765401800473531, "correct_loss_per_token": 1.0519928932189941, "incorrect_loss_per_token": 1.5530803600947063, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.0519928932189941, "num_tokens": 1, "num_tokens_all": 512, "is_greedy": true, "logits_per_token": -1.0519928932189941, "logits_per_char": -0.5259964466094971, "num_chars": 2}, {"sum_logits": -1.3269636631011963, "num_tokens": 1, "num_tokens_all": 512, "is_greedy": false, "logits_per_token": -1.3269636631011963, "logits_per_char": -0.6634818315505981, "num_chars": 2}, {"sum_logits": -1.6550053358078003, "num_tokens": 1, "num_tokens_all": 512, "is_greedy": false, "logits_per_token": -1.6550053358078003, "logits_per_char": -0.8275026679039001, "num_chars": 2}, {"sum_logits": -1.677272081375122, "num_tokens": 1, "num_tokens_all": 512, "is_greedy": false, "logits_per_token": -1.677272081375122, "logits_per_char": -0.838636040687561, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 49, "native_id": 49, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.287298560142517, "incorrect_loss_raw": 1.429794152577718, "correct_loss_per_char": 0.6436492800712585, "incorrect_loss_per_char": 0.714897076288859, "correct_loss_per_token": 1.287298560142517, "incorrect_loss_per_token": 1.429794152577718, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4959101676940918, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": false, "logits_per_token": -1.4959101676940918, "logits_per_char": -0.7479550838470459, "num_chars": 2}, {"sum_logits": -1.3214094638824463, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": false, "logits_per_token": -1.3214094638824463, "logits_per_char": -0.6607047319412231, "num_chars": 2}, {"sum_logits": -1.4720628261566162, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": false, "logits_per_token": -1.4720628261566162, "logits_per_char": -0.7360314130783081, "num_chars": 2}, {"sum_logits": -1.287298560142517, "num_tokens": 1, "num_tokens_all": 553, "is_greedy": true, "logits_per_token": -1.287298560142517, "logits_per_char": -0.6436492800712585, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 50, "native_id": 50, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2820172309875488, "incorrect_loss_raw": 1.432099183400472, "correct_loss_per_char": 0.6410086154937744, "incorrect_loss_per_char": 0.716049591700236, "correct_loss_per_token": 1.2820172309875488, "incorrect_loss_per_token": 1.432099183400472, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3385740518569946, "num_tokens": 1, "num_tokens_all": 509, "is_greedy": false, "logits_per_token": -1.3385740518569946, "logits_per_char": -0.6692870259284973, "num_chars": 2}, {"sum_logits": -1.4249447584152222, "num_tokens": 1, "num_tokens_all": 509, "is_greedy": false, "logits_per_token": -1.4249447584152222, "logits_per_char": -0.7124723792076111, "num_chars": 2}, {"sum_logits": -1.2820172309875488, "num_tokens": 1, "num_tokens_all": 509, "is_greedy": true, "logits_per_token": -1.2820172309875488, "logits_per_char": -0.6410086154937744, "num_chars": 2}, {"sum_logits": -1.5327787399291992, "num_tokens": 1, "num_tokens_all": 509, "is_greedy": false, "logits_per_token": -1.5327787399291992, "logits_per_char": -0.7663893699645996, "num_chars": 2}], "label": 2, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 51, "native_id": 51, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5170576572418213, "incorrect_loss_raw": 1.3648773829142253, "correct_loss_per_char": 0.7585288286209106, "incorrect_loss_per_char": 0.6824386914571127, "correct_loss_per_token": 1.5170576572418213, "incorrect_loss_per_token": 1.3648773829142253, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2496663331985474, "num_tokens": 1, "num_tokens_all": 510, "is_greedy": true, "logits_per_token": -1.2496663331985474, "logits_per_char": -0.6248331665992737, "num_chars": 2}, {"sum_logits": -1.2774382829666138, "num_tokens": 1, "num_tokens_all": 510, "is_greedy": false, "logits_per_token": -1.2774382829666138, "logits_per_char": -0.6387191414833069, "num_chars": 2}, {"sum_logits": -1.5675275325775146, "num_tokens": 1, "num_tokens_all": 510, "is_greedy": false, "logits_per_token": -1.5675275325775146, "logits_per_char": -0.7837637662887573, "num_chars": 2}, {"sum_logits": -1.5170576572418213, "num_tokens": 1, "num_tokens_all": 510, "is_greedy": false, "logits_per_token": -1.5170576572418213, "logits_per_char": -0.7585288286209106, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 52, "native_id": 52, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.1508598327636719, "incorrect_loss_raw": 1.492849866549174, "correct_loss_per_char": 0.5754299163818359, "incorrect_loss_per_char": 0.746424933274587, "correct_loss_per_token": 1.1508598327636719, "incorrect_loss_per_token": 1.492849866549174, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6280674934387207, "num_tokens": 1, "num_tokens_all": 629, "is_greedy": false, "logits_per_token": -1.6280674934387207, "logits_per_char": -0.8140337467193604, "num_chars": 2}, {"sum_logits": -1.1508598327636719, "num_tokens": 1, "num_tokens_all": 629, "is_greedy": true, "logits_per_token": -1.1508598327636719, "logits_per_char": -0.5754299163818359, "num_chars": 2}, {"sum_logits": -1.3308075666427612, "num_tokens": 1, "num_tokens_all": 629, "is_greedy": false, "logits_per_token": -1.3308075666427612, "logits_per_char": -0.6654037833213806, "num_chars": 2}, {"sum_logits": -1.51967453956604, "num_tokens": 1, "num_tokens_all": 629, "is_greedy": false, "logits_per_token": -1.51967453956604, "logits_per_char": -0.75983726978302, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 53, "native_id": 53, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4073269367218018, "incorrect_loss_raw": 1.403599699338277, "correct_loss_per_char": 0.7036634683609009, "incorrect_loss_per_char": 0.7017998496691386, "correct_loss_per_token": 1.4073269367218018, "incorrect_loss_per_token": 1.403599699338277, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4073269367218018, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.4073269367218018, "logits_per_char": -0.7036634683609009, "num_chars": 2}, {"sum_logits": -1.1596845388412476, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": true, "logits_per_token": -1.1596845388412476, "logits_per_char": -0.5798422694206238, "num_chars": 2}, {"sum_logits": -1.528008222579956, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.528008222579956, "logits_per_char": -0.764004111289978, "num_chars": 2}, {"sum_logits": -1.523106336593628, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.523106336593628, "logits_per_char": -0.761553168296814, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 54, "native_id": 54, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3908562660217285, "incorrect_loss_raw": 1.3944621880849202, "correct_loss_per_char": 0.6954281330108643, "incorrect_loss_per_char": 0.6972310940424601, "correct_loss_per_token": 1.3908562660217285, "incorrect_loss_per_token": 1.3944621880849202, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2715928554534912, "num_tokens": 1, "num_tokens_all": 530, "is_greedy": true, "logits_per_token": -1.2715928554534912, "logits_per_char": -0.6357964277267456, "num_chars": 2}, {"sum_logits": -1.4322168827056885, "num_tokens": 1, "num_tokens_all": 530, "is_greedy": false, "logits_per_token": -1.4322168827056885, "logits_per_char": -0.7161084413528442, "num_chars": 2}, {"sum_logits": -1.3908562660217285, "num_tokens": 1, "num_tokens_all": 530, "is_greedy": false, "logits_per_token": -1.3908562660217285, "logits_per_char": -0.6954281330108643, "num_chars": 2}, {"sum_logits": -1.479576826095581, "num_tokens": 1, "num_tokens_all": 530, "is_greedy": false, "logits_per_token": -1.479576826095581, "logits_per_char": -0.7397884130477905, "num_chars": 2}], "label": 2, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 55, "native_id": 55, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5110076665878296, "incorrect_loss_raw": 1.362574537595113, "correct_loss_per_char": 0.7555038332939148, "incorrect_loss_per_char": 0.6812872687975565, "correct_loss_per_token": 1.5110076665878296, "incorrect_loss_per_token": 1.362574537595113, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.314400315284729, "num_tokens": 1, "num_tokens_all": 533, "is_greedy": false, "logits_per_token": -1.314400315284729, "logits_per_char": -0.6572001576423645, "num_chars": 2}, {"sum_logits": -1.2264471054077148, "num_tokens": 1, "num_tokens_all": 533, "is_greedy": true, "logits_per_token": -1.2264471054077148, "logits_per_char": -0.6132235527038574, "num_chars": 2}, {"sum_logits": -1.5110076665878296, "num_tokens": 1, "num_tokens_all": 533, "is_greedy": false, "logits_per_token": -1.5110076665878296, "logits_per_char": -0.7555038332939148, "num_chars": 2}, {"sum_logits": -1.5468761920928955, "num_tokens": 1, "num_tokens_all": 533, "is_greedy": false, "logits_per_token": -1.5468761920928955, "logits_per_char": -0.7734380960464478, "num_chars": 2}], "label": 2, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 56, "native_id": 56, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5569744110107422, "incorrect_loss_raw": 1.3545157512029011, "correct_loss_per_char": 0.7784872055053711, "incorrect_loss_per_char": 0.6772578756014506, "correct_loss_per_token": 1.5569744110107422, "incorrect_loss_per_token": 1.3545157512029011, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2872059345245361, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.2872059345245361, "logits_per_char": -0.6436029672622681, "num_chars": 2}, {"sum_logits": -1.220651388168335, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": true, "logits_per_token": -1.220651388168335, "logits_per_char": -0.6103256940841675, "num_chars": 2}, {"sum_logits": -1.5569744110107422, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.5569744110107422, "logits_per_char": -0.7784872055053711, "num_chars": 2}, {"sum_logits": -1.5556899309158325, "num_tokens": 1, "num_tokens_all": 580, "is_greedy": false, "logits_per_token": -1.5556899309158325, "logits_per_char": -0.7778449654579163, "num_chars": 2}], "label": 2, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 57, "native_id": 57, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4638030529022217, "incorrect_loss_raw": 1.3686405817667644, "correct_loss_per_char": 0.7319015264511108, "incorrect_loss_per_char": 0.6843202908833822, "correct_loss_per_token": 1.4638030529022217, "incorrect_loss_per_token": 1.3686405817667644, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4154762029647827, "num_tokens": 1, "num_tokens_all": 546, "is_greedy": false, "logits_per_token": -1.4154762029647827, "logits_per_char": -0.7077381014823914, "num_chars": 2}, {"sum_logits": -1.3037381172180176, "num_tokens": 1, "num_tokens_all": 546, "is_greedy": true, "logits_per_token": -1.3037381172180176, "logits_per_char": -0.6518690586090088, "num_chars": 2}, {"sum_logits": -1.4638030529022217, "num_tokens": 1, "num_tokens_all": 546, "is_greedy": false, "logits_per_token": -1.4638030529022217, "logits_per_char": -0.7319015264511108, "num_chars": 2}, {"sum_logits": -1.3867074251174927, "num_tokens": 1, "num_tokens_all": 546, "is_greedy": false, "logits_per_token": -1.3867074251174927, "logits_per_char": -0.6933537125587463, "num_chars": 2}], "label": 2, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 58, "native_id": 58, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4915393590927124, "incorrect_loss_raw": 1.3599619468053181, "correct_loss_per_char": 0.7457696795463562, "incorrect_loss_per_char": 0.6799809734026591, "correct_loss_per_token": 1.4915393590927124, "incorrect_loss_per_token": 1.3599619468053181, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4915393590927124, "num_tokens": 1, "num_tokens_all": 535, "is_greedy": false, "logits_per_token": -1.4915393590927124, "logits_per_char": -0.7457696795463562, "num_chars": 2}, {"sum_logits": -1.3557509183883667, "num_tokens": 1, "num_tokens_all": 535, "is_greedy": false, "logits_per_token": -1.3557509183883667, "logits_per_char": -0.6778754591941833, "num_chars": 2}, {"sum_logits": -1.4059475660324097, "num_tokens": 1, "num_tokens_all": 535, "is_greedy": false, "logits_per_token": -1.4059475660324097, "logits_per_char": -0.7029737830162048, "num_chars": 2}, {"sum_logits": -1.3181873559951782, "num_tokens": 1, "num_tokens_all": 535, "is_greedy": true, "logits_per_token": -1.3181873559951782, "logits_per_char": -0.6590936779975891, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 59, "native_id": 59, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3715296983718872, "incorrect_loss_raw": 1.4067635933558147, "correct_loss_per_char": 0.6857648491859436, "incorrect_loss_per_char": 0.7033817966779073, "correct_loss_per_token": 1.3715296983718872, "incorrect_loss_per_token": 1.4067635933558147, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3715296983718872, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.3715296983718872, "logits_per_char": -0.6857648491859436, "num_chars": 2}, {"sum_logits": -1.2611393928527832, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": true, "logits_per_token": -1.2611393928527832, "logits_per_char": -0.6305696964263916, "num_chars": 2}, {"sum_logits": -1.4057577848434448, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.4057577848434448, "logits_per_char": -0.7028788924217224, "num_chars": 2}, {"sum_logits": -1.5533936023712158, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.5533936023712158, "logits_per_char": -0.7766968011856079, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 60, "native_id": 60, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4737298488616943, "incorrect_loss_raw": 1.3660527467727661, "correct_loss_per_char": 0.7368649244308472, "incorrect_loss_per_char": 0.6830263733863831, "correct_loss_per_token": 1.4737298488616943, "incorrect_loss_per_token": 1.3660527467727661, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3409929275512695, "num_tokens": 1, "num_tokens_all": 520, "is_greedy": false, "logits_per_token": -1.3409929275512695, "logits_per_char": -0.6704964637756348, "num_chars": 2}, {"sum_logits": -1.4737298488616943, "num_tokens": 1, "num_tokens_all": 520, "is_greedy": false, "logits_per_token": -1.4737298488616943, "logits_per_char": -0.7368649244308472, "num_chars": 2}, {"sum_logits": -1.4260379076004028, "num_tokens": 1, "num_tokens_all": 520, "is_greedy": false, "logits_per_token": -1.4260379076004028, "logits_per_char": -0.7130189538002014, "num_chars": 2}, {"sum_logits": -1.331127405166626, "num_tokens": 1, "num_tokens_all": 520, "is_greedy": true, "logits_per_token": -1.331127405166626, "logits_per_char": -0.665563702583313, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 61, "native_id": 61, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.3691062927246094, "incorrect_loss_raw": 1.399014671643575, "correct_loss_per_char": 0.6845531463623047, "incorrect_loss_per_char": 0.6995073358217875, "correct_loss_per_token": 1.3691062927246094, "incorrect_loss_per_token": 1.399014671643575, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3983154296875, "num_tokens": 1, "num_tokens_all": 526, "is_greedy": false, "logits_per_token": -1.3983154296875, "logits_per_char": -0.69915771484375, "num_chars": 2}, {"sum_logits": -1.4171571731567383, "num_tokens": 1, "num_tokens_all": 526, "is_greedy": false, "logits_per_token": -1.4171571731567383, "logits_per_char": -0.7085785865783691, "num_chars": 2}, {"sum_logits": -1.3815714120864868, "num_tokens": 1, "num_tokens_all": 526, "is_greedy": false, "logits_per_token": -1.3815714120864868, "logits_per_char": -0.6907857060432434, "num_chars": 2}, {"sum_logits": -1.3691062927246094, "num_tokens": 1, "num_tokens_all": 526, "is_greedy": true, "logits_per_token": -1.3691062927246094, "logits_per_char": -0.6845531463623047, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 62, "native_id": 62, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.318956971168518, "incorrect_loss_raw": 1.420493761698405, "correct_loss_per_char": 0.659478485584259, "incorrect_loss_per_char": 0.7102468808492025, "correct_loss_per_token": 1.318956971168518, "incorrect_loss_per_token": 1.420493761698405, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3066668510437012, "num_tokens": 1, "num_tokens_all": 529, "is_greedy": true, "logits_per_token": -1.3066668510437012, "logits_per_char": -0.6533334255218506, "num_chars": 2}, {"sum_logits": -1.318956971168518, "num_tokens": 1, "num_tokens_all": 529, "is_greedy": false, "logits_per_token": -1.318956971168518, "logits_per_char": -0.659478485584259, "num_chars": 2}, {"sum_logits": -1.50217604637146, "num_tokens": 1, "num_tokens_all": 529, "is_greedy": false, "logits_per_token": -1.50217604637146, "logits_per_char": -0.75108802318573, "num_chars": 2}, {"sum_logits": -1.4526383876800537, "num_tokens": 1, "num_tokens_all": 529, "is_greedy": false, "logits_per_token": -1.4526383876800537, "logits_per_char": -0.7263191938400269, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 63, "native_id": 63, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4272772073745728, "incorrect_loss_raw": 1.4055387576421101, "correct_loss_per_char": 0.7136386036872864, "incorrect_loss_per_char": 0.7027693788210551, "correct_loss_per_token": 1.4272772073745728, "incorrect_loss_per_token": 1.4055387576421101, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2428138256072998, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": true, "logits_per_token": -1.2428138256072998, "logits_per_char": -0.6214069128036499, "num_chars": 2}, {"sum_logits": -1.4457297325134277, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": false, "logits_per_token": -1.4457297325134277, "logits_per_char": -0.7228648662567139, "num_chars": 2}, {"sum_logits": -1.528072714805603, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": false, "logits_per_token": -1.528072714805603, "logits_per_char": -0.7640363574028015, "num_chars": 2}, {"sum_logits": -1.4272772073745728, "num_tokens": 1, "num_tokens_all": 561, "is_greedy": false, "logits_per_token": -1.4272772073745728, "logits_per_char": -0.7136386036872864, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 64, "native_id": 64, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4533129930496216, "incorrect_loss_raw": 1.3798767725626628, "correct_loss_per_char": 0.7266564965248108, "incorrect_loss_per_char": 0.6899383862813314, "correct_loss_per_token": 1.4533129930496216, "incorrect_loss_per_token": 1.3798767725626628, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2304736375808716, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": true, "logits_per_token": -1.2304736375808716, "logits_per_char": -0.6152368187904358, "num_chars": 2}, {"sum_logits": -1.4936928749084473, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.4936928749084473, "logits_per_char": -0.7468464374542236, "num_chars": 2}, {"sum_logits": -1.4533129930496216, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.4533129930496216, "logits_per_char": -0.7266564965248108, "num_chars": 2}, {"sum_logits": -1.4154638051986694, "num_tokens": 1, "num_tokens_all": 504, "is_greedy": false, "logits_per_token": -1.4154638051986694, "logits_per_char": -0.7077319025993347, "num_chars": 2}], "label": 2, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 65, "native_id": 65, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2707340717315674, "incorrect_loss_raw": 1.433720588684082, "correct_loss_per_char": 0.6353670358657837, "incorrect_loss_per_char": 0.716860294342041, "correct_loss_per_token": 1.2707340717315674, "incorrect_loss_per_token": 1.433720588684082, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3962043523788452, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.3962043523788452, "logits_per_char": -0.6981021761894226, "num_chars": 2}, {"sum_logits": -1.4600977897644043, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.4600977897644043, "logits_per_char": -0.7300488948822021, "num_chars": 2}, {"sum_logits": -1.4448596239089966, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": false, "logits_per_token": -1.4448596239089966, "logits_per_char": -0.7224298119544983, "num_chars": 2}, {"sum_logits": -1.2707340717315674, "num_tokens": 1, "num_tokens_all": 558, "is_greedy": true, "logits_per_token": -1.2707340717315674, "logits_per_char": -0.6353670358657837, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 66, "native_id": 66, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.543077826499939, "incorrect_loss_raw": 1.3552446365356445, "correct_loss_per_char": 0.7715389132499695, "incorrect_loss_per_char": 0.6776223182678223, "correct_loss_per_token": 1.543077826499939, "incorrect_loss_per_token": 1.3552446365356445, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.29590904712677, "num_tokens": 1, "num_tokens_all": 572, "is_greedy": false, "logits_per_token": -1.29590904712677, "logits_per_char": -0.647954523563385, "num_chars": 2}, {"sum_logits": -1.2142232656478882, "num_tokens": 1, "num_tokens_all": 572, "is_greedy": true, "logits_per_token": -1.2142232656478882, "logits_per_char": -0.6071116328239441, "num_chars": 2}, {"sum_logits": -1.5556015968322754, "num_tokens": 1, "num_tokens_all": 572, "is_greedy": false, "logits_per_token": -1.5556015968322754, "logits_per_char": -0.7778007984161377, "num_chars": 2}, {"sum_logits": -1.543077826499939, "num_tokens": 1, "num_tokens_all": 572, "is_greedy": false, "logits_per_token": -1.543077826499939, "logits_per_char": -0.7715389132499695, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 67, "native_id": 67, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4529650211334229, "incorrect_loss_raw": 1.371021827061971, "correct_loss_per_char": 0.7264825105667114, "incorrect_loss_per_char": 0.6855109135309855, "correct_loss_per_token": 1.4529650211334229, "incorrect_loss_per_token": 1.371021827061971, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3896784782409668, "num_tokens": 1, "num_tokens_all": 557, "is_greedy": false, "logits_per_token": -1.3896784782409668, "logits_per_char": -0.6948392391204834, "num_chars": 2}, {"sum_logits": -1.3137394189834595, "num_tokens": 1, "num_tokens_all": 557, "is_greedy": true, "logits_per_token": -1.3137394189834595, "logits_per_char": -0.6568697094917297, "num_chars": 2}, {"sum_logits": -1.4529650211334229, "num_tokens": 1, "num_tokens_all": 557, "is_greedy": false, "logits_per_token": -1.4529650211334229, "logits_per_char": -0.7264825105667114, "num_chars": 2}, {"sum_logits": -1.4096475839614868, "num_tokens": 1, "num_tokens_all": 557, "is_greedy": false, "logits_per_token": -1.4096475839614868, "logits_per_char": -0.7048237919807434, "num_chars": 2}], "label": 2, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 68, "native_id": 68, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5821281671524048, "incorrect_loss_raw": 1.3394180138905842, "correct_loss_per_char": 0.7910640835762024, "incorrect_loss_per_char": 0.6697090069452921, "correct_loss_per_token": 1.5821281671524048, "incorrect_loss_per_token": 1.3394180138905842, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2151813507080078, "num_tokens": 1, "num_tokens_all": 505, "is_greedy": true, "logits_per_token": -1.2151813507080078, "logits_per_char": -0.6075906753540039, "num_chars": 2}, {"sum_logits": -1.3750841617584229, "num_tokens": 1, "num_tokens_all": 505, "is_greedy": false, "logits_per_token": -1.3750841617584229, "logits_per_char": -0.6875420808792114, "num_chars": 2}, {"sum_logits": -1.4279885292053223, "num_tokens": 1, "num_tokens_all": 505, "is_greedy": false, "logits_per_token": -1.4279885292053223, "logits_per_char": -0.7139942646026611, "num_chars": 2}, {"sum_logits": -1.5821281671524048, "num_tokens": 1, "num_tokens_all": 505, "is_greedy": false, "logits_per_token": -1.5821281671524048, "logits_per_char": -0.7910640835762024, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 69, "native_id": 69, "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3948090076446533, "incorrect_loss_raw": 1.3899824619293213, "correct_loss_per_char": 0.6974045038223267, "incorrect_loss_per_char": 0.6949912309646606, "correct_loss_per_token": 1.3948090076446533, "incorrect_loss_per_token": 1.3899824619293213, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.376896858215332, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": false, "logits_per_token": -1.376896858215332, "logits_per_char": -0.688448429107666, "num_chars": 2}, {"sum_logits": -1.4497443437576294, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": false, "logits_per_token": -1.4497443437576294, "logits_per_char": -0.7248721718788147, "num_chars": 2}, {"sum_logits": -1.3433061838150024, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": true, "logits_per_token": -1.3433061838150024, "logits_per_char": -0.6716530919075012, "num_chars": 2}, {"sum_logits": -1.3948090076446533, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": false, "logits_per_token": -1.3948090076446533, "logits_per_char": -0.6974045038223267, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 70, "native_id": 70, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2875921726226807, "incorrect_loss_raw": 1.4397018353144329, "correct_loss_per_char": 0.6437960863113403, "incorrect_loss_per_char": 0.7198509176572164, "correct_loss_per_token": 1.2875921726226807, "incorrect_loss_per_token": 1.4397018353144329, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2880539894104004, "num_tokens": 1, "num_tokens_all": 503, "is_greedy": false, "logits_per_token": -1.2880539894104004, "logits_per_char": -0.6440269947052002, "num_chars": 2}, {"sum_logits": -1.4478480815887451, "num_tokens": 1, "num_tokens_all": 503, "is_greedy": false, "logits_per_token": -1.4478480815887451, "logits_per_char": -0.7239240407943726, "num_chars": 2}, {"sum_logits": -1.5832034349441528, "num_tokens": 1, "num_tokens_all": 503, "is_greedy": false, "logits_per_token": -1.5832034349441528, "logits_per_char": -0.7916017174720764, "num_chars": 2}, {"sum_logits": -1.2875921726226807, "num_tokens": 1, "num_tokens_all": 503, "is_greedy": true, "logits_per_token": -1.2875921726226807, "logits_per_char": -0.6437960863113403, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 71, "native_id": 71, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5019822120666504, "incorrect_loss_raw": 1.3608218828837078, "correct_loss_per_char": 0.7509911060333252, "incorrect_loss_per_char": 0.6804109414418539, "correct_loss_per_token": 1.5019822120666504, "incorrect_loss_per_token": 1.3608218828837078, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2928805351257324, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": true, "logits_per_token": -1.2928805351257324, "logits_per_char": -0.6464402675628662, "num_chars": 2}, {"sum_logits": -1.303959846496582, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": false, "logits_per_token": -1.303959846496582, "logits_per_char": -0.651979923248291, "num_chars": 2}, {"sum_logits": -1.4856252670288086, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": false, "logits_per_token": -1.4856252670288086, "logits_per_char": -0.7428126335144043, "num_chars": 2}, {"sum_logits": -1.5019822120666504, "num_tokens": 1, "num_tokens_all": 519, "is_greedy": false, "logits_per_token": -1.5019822120666504, "logits_per_char": -0.7509911060333252, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 72, "native_id": 72, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.276318073272705, "incorrect_loss_raw": 1.4333057006200154, "correct_loss_per_char": 0.6381590366363525, "incorrect_loss_per_char": 0.7166528503100077, "correct_loss_per_token": 1.276318073272705, "incorrect_loss_per_token": 1.4333057006200154, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.276318073272705, "num_tokens": 1, "num_tokens_all": 497, "is_greedy": true, "logits_per_token": -1.276318073272705, "logits_per_char": -0.6381590366363525, "num_chars": 2}, {"sum_logits": -1.3884443044662476, "num_tokens": 1, "num_tokens_all": 497, "is_greedy": false, "logits_per_token": -1.3884443044662476, "logits_per_char": -0.6942221522331238, "num_chars": 2}, {"sum_logits": -1.4008036851882935, "num_tokens": 1, "num_tokens_all": 497, "is_greedy": false, "logits_per_token": -1.4008036851882935, "logits_per_char": -0.7004018425941467, "num_chars": 2}, {"sum_logits": -1.5106691122055054, "num_tokens": 1, "num_tokens_all": 497, "is_greedy": false, "logits_per_token": -1.5106691122055054, "logits_per_char": -0.7553345561027527, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 73, "native_id": 73, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3946712017059326, "incorrect_loss_raw": 1.3902374108632405, "correct_loss_per_char": 0.6973356008529663, "incorrect_loss_per_char": 0.6951187054316202, "correct_loss_per_token": 1.3946712017059326, "incorrect_loss_per_token": 1.3902374108632405, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3946712017059326, "num_tokens": 1, "num_tokens_all": 503, "is_greedy": false, "logits_per_token": -1.3946712017059326, "logits_per_char": -0.6973356008529663, "num_chars": 2}, {"sum_logits": -1.3612852096557617, "num_tokens": 1, "num_tokens_all": 503, "is_greedy": true, "logits_per_token": -1.3612852096557617, "logits_per_char": -0.6806426048278809, "num_chars": 2}, {"sum_logits": -1.3849157094955444, "num_tokens": 1, "num_tokens_all": 503, "is_greedy": false, "logits_per_token": -1.3849157094955444, "logits_per_char": -0.6924578547477722, "num_chars": 2}, {"sum_logits": -1.4245113134384155, "num_tokens": 1, "num_tokens_all": 503, "is_greedy": false, "logits_per_token": -1.4245113134384155, "logits_per_char": -0.7122556567192078, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 74, "native_id": 74, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4580485820770264, "incorrect_loss_raw": 1.3762538035710652, "correct_loss_per_char": 0.7290242910385132, "incorrect_loss_per_char": 0.6881269017855326, "correct_loss_per_token": 1.4580485820770264, "incorrect_loss_per_token": 1.3762538035710652, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3016444444656372, "num_tokens": 1, "num_tokens_all": 524, "is_greedy": true, "logits_per_token": -1.3016444444656372, "logits_per_char": -0.6508222222328186, "num_chars": 2}, {"sum_logits": -1.4580485820770264, "num_tokens": 1, "num_tokens_all": 524, "is_greedy": false, "logits_per_token": -1.4580485820770264, "logits_per_char": -0.7290242910385132, "num_chars": 2}, {"sum_logits": -1.5019562244415283, "num_tokens": 1, "num_tokens_all": 524, "is_greedy": false, "logits_per_token": -1.5019562244415283, "logits_per_char": -0.7509781122207642, "num_chars": 2}, {"sum_logits": -1.3251607418060303, "num_tokens": 1, "num_tokens_all": 524, "is_greedy": false, "logits_per_token": -1.3251607418060303, "logits_per_char": -0.6625803709030151, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 75, "native_id": 75, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5036613941192627, "incorrect_loss_raw": 1.3598244587580364, "correct_loss_per_char": 0.7518306970596313, "incorrect_loss_per_char": 0.6799122293790182, "correct_loss_per_token": 1.5036613941192627, "incorrect_loss_per_token": 1.3598244587580364, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.498046875, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.498046875, "logits_per_char": -0.7490234375, "num_chars": 2}, {"sum_logits": -1.2545580863952637, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": true, "logits_per_token": -1.2545580863952637, "logits_per_char": -0.6272790431976318, "num_chars": 2}, {"sum_logits": -1.5036613941192627, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.5036613941192627, "logits_per_char": -0.7518306970596313, "num_chars": 2}, {"sum_logits": -1.3268684148788452, "num_tokens": 1, "num_tokens_all": 571, "is_greedy": false, "logits_per_token": -1.3268684148788452, "logits_per_char": -0.6634342074394226, "num_chars": 2}], "label": 2, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 76, "native_id": 76, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5038377046585083, "incorrect_loss_raw": 1.3590619166692097, "correct_loss_per_char": 0.7519188523292542, "incorrect_loss_per_char": 0.6795309583346049, "correct_loss_per_token": 1.5038377046585083, "incorrect_loss_per_token": 1.3590619166692097, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5038377046585083, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": false, "logits_per_token": -1.5038377046585083, "logits_per_char": -0.7519188523292542, "num_chars": 2}, {"sum_logits": -1.2497836351394653, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": true, "logits_per_token": -1.2497836351394653, "logits_per_char": -0.6248918175697327, "num_chars": 2}, {"sum_logits": -1.3975552320480347, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": false, "logits_per_token": -1.3975552320480347, "logits_per_char": -0.6987776160240173, "num_chars": 2}, {"sum_logits": -1.4298468828201294, "num_tokens": 1, "num_tokens_all": 605, "is_greedy": false, "logits_per_token": -1.4298468828201294, "logits_per_char": -0.7149234414100647, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 77, "native_id": 77, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.330811858177185, "incorrect_loss_raw": 1.4162448644638062, "correct_loss_per_char": 0.6654059290885925, "incorrect_loss_per_char": 0.7081224322319031, "correct_loss_per_token": 1.330811858177185, "incorrect_loss_per_token": 1.4162448644638062, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.330811858177185, "num_tokens": 1, "num_tokens_all": 524, "is_greedy": true, "logits_per_token": -1.330811858177185, "logits_per_char": -0.6654059290885925, "num_chars": 2}, {"sum_logits": -1.3739213943481445, "num_tokens": 1, "num_tokens_all": 524, "is_greedy": false, "logits_per_token": -1.3739213943481445, "logits_per_char": -0.6869606971740723, "num_chars": 2}, {"sum_logits": -1.525261402130127, "num_tokens": 1, "num_tokens_all": 524, "is_greedy": false, "logits_per_token": -1.525261402130127, "logits_per_char": -0.7626307010650635, "num_chars": 2}, {"sum_logits": -1.349551796913147, "num_tokens": 1, "num_tokens_all": 524, "is_greedy": false, "logits_per_token": -1.349551796913147, "logits_per_char": -0.6747758984565735, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 78, "native_id": 78, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4205999374389648, "incorrect_loss_raw": 1.3813758691151936, "correct_loss_per_char": 0.7102999687194824, "incorrect_loss_per_char": 0.6906879345575968, "correct_loss_per_token": 1.4205999374389648, "incorrect_loss_per_token": 1.3813758691151936, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4205999374389648, "num_tokens": 1, "num_tokens_all": 542, "is_greedy": false, "logits_per_token": -1.4205999374389648, "logits_per_char": -0.7102999687194824, "num_chars": 2}, {"sum_logits": -1.3538748025894165, "num_tokens": 1, "num_tokens_all": 542, "is_greedy": false, "logits_per_token": -1.3538748025894165, "logits_per_char": -0.6769374012947083, "num_chars": 2}, {"sum_logits": -1.4411121606826782, "num_tokens": 1, "num_tokens_all": 542, "is_greedy": false, "logits_per_token": -1.4411121606826782, "logits_per_char": -0.7205560803413391, "num_chars": 2}, {"sum_logits": -1.3491406440734863, "num_tokens": 1, "num_tokens_all": 542, "is_greedy": true, "logits_per_token": -1.3491406440734863, "logits_per_char": -0.6745703220367432, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 79, "native_id": 79, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.325425148010254, "incorrect_loss_raw": 1.4241989850997925, "correct_loss_per_char": 0.662712574005127, "incorrect_loss_per_char": 0.7120994925498962, "correct_loss_per_token": 1.325425148010254, "incorrect_loss_per_token": 1.4241989850997925, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2307177782058716, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": true, "logits_per_token": -1.2307177782058716, "logits_per_char": -0.6153588891029358, "num_chars": 2}, {"sum_logits": -1.325425148010254, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": false, "logits_per_token": -1.325425148010254, "logits_per_char": -0.662712574005127, "num_chars": 2}, {"sum_logits": -1.4939063787460327, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": false, "logits_per_token": -1.4939063787460327, "logits_per_char": -0.7469531893730164, "num_chars": 2}, {"sum_logits": -1.5479727983474731, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": false, "logits_per_token": -1.5479727983474731, "logits_per_char": -0.7739863991737366, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 80, "native_id": 80, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.2722461223602295, "incorrect_loss_raw": 1.4535185098648071, "correct_loss_per_char": 0.6361230611801147, "incorrect_loss_per_char": 0.7267592549324036, "correct_loss_per_token": 1.2722461223602295, "incorrect_loss_per_token": 1.4535185098648071, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2036452293395996, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": true, "logits_per_token": -1.2036452293395996, "logits_per_char": -0.6018226146697998, "num_chars": 2}, {"sum_logits": -1.2722461223602295, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": false, "logits_per_token": -1.2722461223602295, "logits_per_char": -0.6361230611801147, "num_chars": 2}, {"sum_logits": -1.5409828424453735, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": false, "logits_per_token": -1.5409828424453735, "logits_per_char": -0.7704914212226868, "num_chars": 2}, {"sum_logits": -1.6159274578094482, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": false, "logits_per_token": -1.6159274578094482, "logits_per_char": -0.8079637289047241, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 81, "native_id": 81, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3356714248657227, "incorrect_loss_raw": 1.4150586525599163, "correct_loss_per_char": 0.6678357124328613, "incorrect_loss_per_char": 0.7075293262799581, "correct_loss_per_token": 1.3356714248657227, "incorrect_loss_per_token": 1.4150586525599163, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.510462760925293, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": false, "logits_per_token": -1.510462760925293, "logits_per_char": -0.7552313804626465, "num_chars": 2}, {"sum_logits": -1.3356714248657227, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": false, "logits_per_token": -1.3356714248657227, "logits_per_char": -0.6678357124328613, "num_chars": 2}, {"sum_logits": -1.4515169858932495, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": false, "logits_per_token": -1.4515169858932495, "logits_per_char": -0.7257584929466248, "num_chars": 2}, {"sum_logits": -1.283196210861206, "num_tokens": 1, "num_tokens_all": 538, "is_greedy": true, "logits_per_token": -1.283196210861206, "logits_per_char": -0.641598105430603, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 82, "native_id": 82, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.331871509552002, "incorrect_loss_raw": 1.4132047891616821, "correct_loss_per_char": 0.665935754776001, "incorrect_loss_per_char": 0.7066023945808411, "correct_loss_per_token": 1.331871509552002, "incorrect_loss_per_token": 1.4132047891616821, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3821799755096436, "num_tokens": 1, "num_tokens_all": 532, "is_greedy": false, "logits_per_token": -1.3821799755096436, "logits_per_char": -0.6910899877548218, "num_chars": 2}, {"sum_logits": -1.334655523300171, "num_tokens": 1, "num_tokens_all": 532, "is_greedy": false, "logits_per_token": -1.334655523300171, "logits_per_char": -0.6673277616500854, "num_chars": 2}, {"sum_logits": -1.522778868675232, "num_tokens": 1, "num_tokens_all": 532, "is_greedy": false, "logits_per_token": -1.522778868675232, "logits_per_char": -0.761389434337616, "num_chars": 2}, {"sum_logits": -1.331871509552002, "num_tokens": 1, "num_tokens_all": 532, "is_greedy": true, "logits_per_token": -1.331871509552002, "logits_per_char": -0.665935754776001, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 83, "native_id": 83, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3518255949020386, "incorrect_loss_raw": 1.4168519179026287, "correct_loss_per_char": 0.6759127974510193, "incorrect_loss_per_char": 0.7084259589513143, "correct_loss_per_token": 1.3518255949020386, "incorrect_loss_per_token": 1.4168519179026287, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2358843088150024, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": true, "logits_per_token": -1.2358843088150024, "logits_per_char": -0.6179421544075012, "num_chars": 2}, {"sum_logits": -1.4104403257369995, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": false, "logits_per_token": -1.4104403257369995, "logits_per_char": -0.7052201628684998, "num_chars": 2}, {"sum_logits": -1.6042311191558838, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": false, "logits_per_token": -1.6042311191558838, "logits_per_char": -0.8021155595779419, "num_chars": 2}, {"sum_logits": -1.3518255949020386, "num_tokens": 1, "num_tokens_all": 507, "is_greedy": false, "logits_per_token": -1.3518255949020386, "logits_per_char": -0.6759127974510193, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 84, "native_id": 84, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.5574132204055786, "incorrect_loss_raw": 1.3815457423528035, "correct_loss_per_char": 0.7787066102027893, "incorrect_loss_per_char": 0.6907728711764017, "correct_loss_per_token": 1.5574132204055786, "incorrect_loss_per_token": 1.3815457423528035, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.182985544204712, "num_tokens": 1, "num_tokens_all": 531, "is_greedy": true, "logits_per_token": -1.182985544204712, "logits_per_char": -0.591492772102356, "num_chars": 2}, {"sum_logits": -1.4685453176498413, "num_tokens": 1, "num_tokens_all": 531, "is_greedy": false, "logits_per_token": -1.4685453176498413, "logits_per_char": -0.7342726588249207, "num_chars": 2}, {"sum_logits": -1.4931063652038574, "num_tokens": 1, "num_tokens_all": 531, "is_greedy": false, "logits_per_token": -1.4931063652038574, "logits_per_char": -0.7465531826019287, "num_chars": 2}, {"sum_logits": -1.5574132204055786, "num_tokens": 1, "num_tokens_all": 531, "is_greedy": false, "logits_per_token": -1.5574132204055786, "logits_per_char": -0.7787066102027893, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 85, "native_id": 85, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.3780919313430786, "incorrect_loss_raw": 1.3988394339879353, "correct_loss_per_char": 0.6890459656715393, "incorrect_loss_per_char": 0.6994197169939677, "correct_loss_per_token": 1.3780919313430786, "incorrect_loss_per_token": 1.3988394339879353, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.5207743644714355, "num_tokens": 1, "num_tokens_all": 644, "is_greedy": false, "logits_per_token": -1.5207743644714355, "logits_per_char": -0.7603871822357178, "num_chars": 2}, {"sum_logits": -1.3216608762741089, "num_tokens": 1, "num_tokens_all": 644, "is_greedy": true, "logits_per_token": -1.3216608762741089, "logits_per_char": -0.6608304381370544, "num_chars": 2}, {"sum_logits": -1.3540830612182617, "num_tokens": 1, "num_tokens_all": 644, "is_greedy": false, "logits_per_token": -1.3540830612182617, "logits_per_char": -0.6770415306091309, "num_chars": 2}, {"sum_logits": -1.3780919313430786, "num_tokens": 1, "num_tokens_all": 644, "is_greedy": false, "logits_per_token": -1.3780919313430786, "logits_per_char": -0.6890459656715393, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 86, "native_id": 86, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.432599663734436, "incorrect_loss_raw": 1.3880101839701335, "correct_loss_per_char": 0.716299831867218, "incorrect_loss_per_char": 0.6940050919850668, "correct_loss_per_token": 1.432599663734436, "incorrect_loss_per_token": 1.3880101839701335, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.6018180847167969, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": false, "logits_per_token": -1.6018180847167969, "logits_per_char": -0.8009090423583984, "num_chars": 2}, {"sum_logits": -1.2644174098968506, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": true, "logits_per_token": -1.2644174098968506, "logits_per_char": -0.6322087049484253, "num_chars": 2}, {"sum_logits": -1.297795057296753, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": false, "logits_per_token": -1.297795057296753, "logits_per_char": -0.6488975286483765, "num_chars": 2}, {"sum_logits": -1.432599663734436, "num_tokens": 1, "num_tokens_all": 633, "is_greedy": false, "logits_per_token": -1.432599663734436, "logits_per_char": -0.716299831867218, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 87, "native_id": 87, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.462961196899414, "incorrect_loss_raw": 1.3720691998799641, "correct_loss_per_char": 0.731480598449707, "incorrect_loss_per_char": 0.6860345999399821, "correct_loss_per_token": 1.462961196899414, "incorrect_loss_per_token": 1.3720691998799641, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3026951551437378, "num_tokens": 1, "num_tokens_all": 510, "is_greedy": true, "logits_per_token": -1.3026951551437378, "logits_per_char": -0.6513475775718689, "num_chars": 2}, {"sum_logits": -1.3339627981185913, "num_tokens": 1, "num_tokens_all": 510, "is_greedy": false, "logits_per_token": -1.3339627981185913, "logits_per_char": -0.6669813990592957, "num_chars": 2}, {"sum_logits": -1.4795496463775635, "num_tokens": 1, "num_tokens_all": 510, "is_greedy": false, "logits_per_token": -1.4795496463775635, "logits_per_char": -0.7397748231887817, "num_chars": 2}, {"sum_logits": -1.462961196899414, "num_tokens": 1, "num_tokens_all": 510, "is_greedy": false, "logits_per_token": -1.462961196899414, "logits_per_char": -0.731480598449707, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 88, "native_id": 88, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4344950914382935, "incorrect_loss_raw": 1.3865909576416016, "correct_loss_per_char": 0.7172475457191467, "incorrect_loss_per_char": 0.6932954788208008, "correct_loss_per_token": 1.4344950914382935, "incorrect_loss_per_token": 1.3865909576416016, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3717373609542847, "num_tokens": 1, "num_tokens_all": 502, "is_greedy": false, "logits_per_token": -1.3717373609542847, "logits_per_char": -0.6858686804771423, "num_chars": 2}, {"sum_logits": -1.275681734085083, "num_tokens": 1, "num_tokens_all": 502, "is_greedy": true, "logits_per_token": -1.275681734085083, "logits_per_char": -0.6378408670425415, "num_chars": 2}, {"sum_logits": -1.512353777885437, "num_tokens": 1, "num_tokens_all": 502, "is_greedy": false, "logits_per_token": -1.512353777885437, "logits_per_char": -0.7561768889427185, "num_chars": 2}, {"sum_logits": -1.4344950914382935, "num_tokens": 1, "num_tokens_all": 502, "is_greedy": false, "logits_per_token": -1.4344950914382935, "logits_per_char": -0.7172475457191467, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 89, "native_id": 89, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": null, "correct_loss_raw": 1.2748063802719116, "incorrect_loss_raw": 1.431125521659851, "correct_loss_per_char": 0.6374031901359558, "incorrect_loss_per_char": 0.7155627608299255, "correct_loss_per_token": 1.2748063802719116, "incorrect_loss_per_token": 1.431125521659851, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4455146789550781, "num_tokens": 1, "num_tokens_all": 612, "is_greedy": false, "logits_per_token": -1.4455146789550781, "logits_per_char": -0.7227573394775391, "num_chars": 2}, {"sum_logits": -1.2748063802719116, "num_tokens": 1, "num_tokens_all": 612, "is_greedy": true, "logits_per_token": -1.2748063802719116, "logits_per_char": -0.6374031901359558, "num_chars": 2}, {"sum_logits": -1.4276052713394165, "num_tokens": 1, "num_tokens_all": 612, "is_greedy": false, "logits_per_token": -1.4276052713394165, "logits_per_char": -0.7138026356697083, "num_chars": 2}, {"sum_logits": -1.4202566146850586, "num_tokens": 1, "num_tokens_all": 612, "is_greedy": false, "logits_per_token": -1.4202566146850586, "logits_per_char": -0.7101283073425293, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 90, "native_id": 90, "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4029464721679688, "incorrect_loss_raw": 1.3862876892089844, "correct_loss_per_char": 0.7014732360839844, "incorrect_loss_per_char": 0.6931438446044922, "correct_loss_per_token": 1.4029464721679688, "incorrect_loss_per_token": 1.3862876892089844, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.4029464721679688, "num_tokens": 1, "num_tokens_all": 523, "is_greedy": false, "logits_per_token": -1.4029464721679688, "logits_per_char": -0.7014732360839844, "num_chars": 2}, {"sum_logits": -1.3714139461517334, "num_tokens": 1, "num_tokens_all": 523, "is_greedy": false, "logits_per_token": -1.3714139461517334, "logits_per_char": -0.6857069730758667, "num_chars": 2}, {"sum_logits": -1.426654577255249, "num_tokens": 1, "num_tokens_all": 523, "is_greedy": false, "logits_per_token": -1.426654577255249, "logits_per_char": -0.7133272886276245, "num_chars": 2}, {"sum_logits": -1.3607945442199707, "num_tokens": 1, "num_tokens_all": 523, "is_greedy": true, "logits_per_token": -1.3607945442199707, "logits_per_char": -0.6803972721099854, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 91, "native_id": 91, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4873454570770264, "incorrect_loss_raw": 1.3665603796641033, "correct_loss_per_char": 0.7436727285385132, "incorrect_loss_per_char": 0.6832801898320516, "correct_loss_per_token": 1.4873454570770264, "incorrect_loss_per_token": 1.3665603796641033, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.23740553855896, "num_tokens": 1, "num_tokens_all": 522, "is_greedy": true, "logits_per_token": -1.23740553855896, "logits_per_char": -0.61870276927948, "num_chars": 2}, {"sum_logits": -1.3941600322723389, "num_tokens": 1, "num_tokens_all": 522, "is_greedy": false, "logits_per_token": -1.3941600322723389, "logits_per_char": -0.6970800161361694, "num_chars": 2}, {"sum_logits": -1.4681155681610107, "num_tokens": 1, "num_tokens_all": 522, "is_greedy": false, "logits_per_token": -1.4681155681610107, "logits_per_char": -0.7340577840805054, "num_chars": 2}, {"sum_logits": -1.4873454570770264, "num_tokens": 1, "num_tokens_all": 522, "is_greedy": false, "logits_per_token": -1.4873454570770264, "logits_per_char": -0.7436727285385132, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 92, "native_id": 92, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.426455020904541, "incorrect_loss_raw": 1.380639672279358, "correct_loss_per_char": 0.7132275104522705, "incorrect_loss_per_char": 0.690319836139679, "correct_loss_per_token": 1.426455020904541, "incorrect_loss_per_token": 1.380639672279358, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3184866905212402, "num_tokens": 1, "num_tokens_all": 539, "is_greedy": true, "logits_per_token": -1.3184866905212402, "logits_per_char": -0.6592433452606201, "num_chars": 2}, {"sum_logits": -1.3621889352798462, "num_tokens": 1, "num_tokens_all": 539, "is_greedy": false, "logits_per_token": -1.3621889352798462, "logits_per_char": -0.6810944676399231, "num_chars": 2}, {"sum_logits": -1.4612433910369873, "num_tokens": 1, "num_tokens_all": 539, "is_greedy": false, "logits_per_token": -1.4612433910369873, "logits_per_char": -0.7306216955184937, "num_chars": 2}, {"sum_logits": -1.426455020904541, "num_tokens": 1, "num_tokens_all": 539, "is_greedy": false, "logits_per_token": -1.426455020904541, "logits_per_char": -0.7132275104522705, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 93, "native_id": 93, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.385583519935608, "incorrect_loss_raw": 1.4039785861968994, "correct_loss_per_char": 0.692791759967804, "incorrect_loss_per_char": 0.7019892930984497, "correct_loss_per_token": 1.385583519935608, "incorrect_loss_per_token": 1.4039785861968994, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.385583519935608, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": false, "logits_per_token": -1.385583519935608, "logits_per_char": -0.692791759967804, "num_chars": 2}, {"sum_logits": -1.2272394895553589, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": true, "logits_per_token": -1.2272394895553589, "logits_per_char": -0.6136197447776794, "num_chars": 2}, {"sum_logits": -1.5863919258117676, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": false, "logits_per_token": -1.5863919258117676, "logits_per_char": -0.7931959629058838, "num_chars": 2}, {"sum_logits": -1.3983043432235718, "num_tokens": 1, "num_tokens_all": 540, "is_greedy": false, "logits_per_token": -1.3983043432235718, "logits_per_char": -0.6991521716117859, "num_chars": 2}], "label": 0, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 94, "native_id": 94, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4960910081863403, "incorrect_loss_raw": 1.366624116897583, "correct_loss_per_char": 0.7480455040931702, "incorrect_loss_per_char": 0.6833120584487915, "correct_loss_per_token": 1.4960910081863403, "incorrect_loss_per_token": 1.366624116897583, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2266918420791626, "num_tokens": 1, "num_tokens_all": 544, "is_greedy": true, "logits_per_token": -1.2266918420791626, "logits_per_char": -0.6133459210395813, "num_chars": 2}, {"sum_logits": -1.3254870176315308, "num_tokens": 1, "num_tokens_all": 544, "is_greedy": false, "logits_per_token": -1.3254870176315308, "logits_per_char": -0.6627435088157654, "num_chars": 2}, {"sum_logits": -1.5476934909820557, "num_tokens": 1, "num_tokens_all": 544, "is_greedy": false, "logits_per_token": -1.5476934909820557, "logits_per_char": -0.7738467454910278, "num_chars": 2}, {"sum_logits": -1.4960910081863403, "num_tokens": 1, "num_tokens_all": 544, "is_greedy": false, "logits_per_token": -1.4960910081863403, "logits_per_char": -0.7480455040931702, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 95, "native_id": 95, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.572027325630188, "incorrect_loss_raw": 1.3448315064112346, "correct_loss_per_char": 0.786013662815094, "incorrect_loss_per_char": 0.6724157532056173, "correct_loss_per_token": 1.572027325630188, "incorrect_loss_per_token": 1.3448315064112346, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3814778327941895, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.3814778327941895, "logits_per_char": -0.6907389163970947, "num_chars": 2}, {"sum_logits": -1.3011460304260254, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": true, "logits_per_token": -1.3011460304260254, "logits_per_char": -0.6505730152130127, "num_chars": 2}, {"sum_logits": -1.3518706560134888, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.3518706560134888, "logits_per_char": -0.6759353280067444, "num_chars": 2}, {"sum_logits": -1.572027325630188, "num_tokens": 1, "num_tokens_all": 494, "is_greedy": false, "logits_per_token": -1.572027325630188, "logits_per_char": -0.786013662815094, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 96, "native_id": 96, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4087361097335815, "incorrect_loss_raw": 1.3906326293945312, "correct_loss_per_char": 0.7043680548667908, "incorrect_loss_per_char": 0.6953163146972656, "correct_loss_per_token": 1.4087361097335815, "incorrect_loss_per_token": 1.3906326293945312, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.3395439386367798, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": true, "logits_per_token": -1.3395439386367798, "logits_per_char": -0.6697719693183899, "num_chars": 2}, {"sum_logits": -1.366660714149475, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.366660714149475, "logits_per_char": -0.6833303570747375, "num_chars": 2}, {"sum_logits": -1.4087361097335815, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.4087361097335815, "logits_per_char": -0.7043680548667908, "num_chars": 2}, {"sum_logits": -1.4656932353973389, "num_tokens": 1, "num_tokens_all": 500, "is_greedy": false, "logits_per_token": -1.4656932353973389, "logits_per_char": -0.7328466176986694, "num_chars": 2}], "label": 2, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 97, "native_id": 97, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4601268768310547, "incorrect_loss_raw": 1.3745864232381184, "correct_loss_per_char": 0.7300634384155273, "incorrect_loss_per_char": 0.6872932116190592, "correct_loss_per_token": 1.4601268768310547, "incorrect_loss_per_token": 1.3745864232381184, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.2271047830581665, "num_tokens": 1, "num_tokens_all": 510, "is_greedy": true, "logits_per_token": -1.2271047830581665, "logits_per_char": -0.6135523915290833, "num_chars": 2}, {"sum_logits": -1.4191279411315918, "num_tokens": 1, "num_tokens_all": 510, "is_greedy": false, "logits_per_token": -1.4191279411315918, "logits_per_char": -0.7095639705657959, "num_chars": 2}, {"sum_logits": -1.4601268768310547, "num_tokens": 1, "num_tokens_all": 510, "is_greedy": false, "logits_per_token": -1.4601268768310547, "logits_per_char": -0.7300634384155273, "num_chars": 2}, {"sum_logits": -1.4775265455245972, "num_tokens": 1, "num_tokens_all": 510, "is_greedy": false, "logits_per_token": -1.4775265455245972, "logits_per_char": -0.7387632727622986, "num_chars": 2}], "label": 2, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 98, "native_id": 98, "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": null, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.330603003501892, "incorrect_loss_raw": 1.4509843190511067, "correct_loss_per_char": 0.665301501750946, "incorrect_loss_per_char": 0.7254921595255533, "correct_loss_per_token": 1.330603003501892, "incorrect_loss_per_token": 1.4509843190511067, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.073272705078125, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": true, "logits_per_token": -1.073272705078125, "logits_per_char": -0.5366363525390625, "num_chars": 2}, {"sum_logits": -1.330603003501892, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": false, "logits_per_token": -1.330603003501892, "logits_per_char": -0.665301501750946, "num_chars": 2}, {"sum_logits": -1.5445598363876343, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": false, "logits_per_token": -1.5445598363876343, "logits_per_char": -0.7722799181938171, "num_chars": 2}, {"sum_logits": -1.735120415687561, "num_tokens": 1, "num_tokens_all": 515, "is_greedy": false, "logits_per_token": -1.735120415687561, "logits_per_char": -0.8675602078437805, "num_chars": 2}], "label": 1, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}
{"doc_id": 99, "native_id": 99, "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": null, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": null, "correct_loss_raw": 1.4171552658081055, "incorrect_loss_raw": 1.3888380924860637, "correct_loss_per_char": 0.7085776329040527, "incorrect_loss_per_char": 0.6944190462430319, "correct_loss_per_token": 1.4171552658081055, "incorrect_loss_per_token": 1.3888380924860637, "correct_loss_uncond": null, "incorrect_loss_uncond": null}, "model_output": [{"sum_logits": -1.422950267791748, "num_tokens": 1, "num_tokens_all": 506, "is_greedy": false, "logits_per_token": -1.422950267791748, "logits_per_char": -0.711475133895874, "num_chars": 2}, {"sum_logits": -1.3001561164855957, "num_tokens": 1, "num_tokens_all": 506, "is_greedy": true, "logits_per_token": -1.3001561164855957, "logits_per_char": -0.6500780582427979, "num_chars": 2}, {"sum_logits": -1.4434078931808472, "num_tokens": 1, "num_tokens_all": 506, "is_greedy": false, "logits_per_token": -1.4434078931808472, "logits_per_char": -0.7217039465904236, "num_chars": 2}, {"sum_logits": -1.4171552658081055, "num_tokens": 1, "num_tokens_all": 506, "is_greedy": false, "logits_per_token": -1.4171552658081055, "logits_per_char": -0.7085776329040527, "num_chars": 2}], "label": 3, "task_hash": "1980c88e607a6dea06d45f27c60e3365", "model_hash": "03418cf8091a9882619950ffb07429a5"}